]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/MDSRank.h
import ceph pacific 16.2.5
[ceph.git] / ceph / src / mds / MDSRank.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2015 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef MDS_RANK_H_
16#define MDS_RANK_H_
17
11fdf7f2 18#include <string_view>
94b18763 19
f67539c2
TL
20#include <boost/asio/io_context.hpp>
21
7c673cae
FG
22#include "common/DecayCounter.h"
23#include "common/LogClient.h"
24#include "common/Timer.h"
25#include "common/TrackedOp.h"
26
9f95a23c
TL
27#include "include/common_fwd.h"
28
11fdf7f2 29#include "messages/MClientRequest.h"
7c673cae 30#include "messages/MCommand.h"
11fdf7f2 31#include "messages/MMDSMap.h"
7c673cae
FG
32
33#include "Beacon.h"
34#include "DamageTable.h"
35#include "MDSMap.h"
36#include "SessionMap.h"
37#include "MDCache.h"
7c673cae 38#include "MDLog.h"
11fdf7f2 39#include "MDSContext.h"
7c673cae 40#include "PurgeQueue.h"
91327a77 41#include "Server.h"
f67539c2 42#include "MetricsHandler.h"
7c673cae
FG
43#include "osdc/Journaler.h"
44
45// Full .h import instead of forward declaration for PerfCounter, for the
46// benefit of those including this header and using MDSRank::logger
47#include "common/perf_counters.h"
48
49enum {
50 l_mds_first = 2000,
51 l_mds_request,
52 l_mds_reply,
53 l_mds_reply_latency,
54 l_mds_forward,
55 l_mds_dir_fetch,
56 l_mds_dir_commit,
57 l_mds_dir_split,
58 l_mds_dir_merge,
7c673cae
FG
59 l_mds_inodes,
60 l_mds_inodes_top,
61 l_mds_inodes_bottom,
62 l_mds_inodes_pin_tail,
63 l_mds_inodes_pinned,
64 l_mds_inodes_expired,
65 l_mds_inodes_with_caps,
66 l_mds_caps,
67 l_mds_subtrees,
68 l_mds_traverse,
69 l_mds_traverse_hit,
70 l_mds_traverse_forward,
71 l_mds_traverse_discover,
72 l_mds_traverse_dir_fetch,
73 l_mds_traverse_remote_ino,
74 l_mds_traverse_lock,
75 l_mds_load_cent,
76 l_mds_dispatch_queue_len,
77 l_mds_exported,
78 l_mds_exported_inodes,
79 l_mds_imported,
80 l_mds_imported_inodes,
11fdf7f2
TL
81 l_mds_openino_dir_fetch,
82 l_mds_openino_backtrace_fetch,
83 l_mds_openino_peer_discover,
eafe8130
TL
84 l_mds_root_rfiles,
85 l_mds_root_rbytes,
86 l_mds_root_rsnaps,
f67539c2
TL
87 l_mds_scrub_backtrace_fetch,
88 l_mds_scrub_set_tag,
89 l_mds_scrub_backtrace_repaired,
90 l_mds_scrub_inotable_repaired,
91 l_mds_scrub_dir_inodes,
92 l_mds_scrub_dir_base_inodes,
93 l_mds_scrub_dirfrag_rstats,
94 l_mds_scrub_file_inodes,
95 l_mdss_handle_inode_file_caps,
96 l_mdss_ceph_cap_op_revoke,
97 l_mdss_ceph_cap_op_grant,
98 l_mdss_ceph_cap_op_trunc,
99 l_mdss_ceph_cap_op_flushsnap_ack,
100 l_mdss_ceph_cap_op_flush_ack,
101 l_mdss_handle_client_caps,
102 l_mdss_handle_client_caps_dirty,
103 l_mdss_handle_client_cap_release,
104 l_mdss_process_request_cap_release,
7c673cae
FG
105 l_mds_last,
106};
107
108// memory utilization
109enum {
110 l_mdm_first = 2500,
111 l_mdm_ino,
112 l_mdm_inoa,
113 l_mdm_inos,
114 l_mdm_dir,
115 l_mdm_dira,
116 l_mdm_dirs,
117 l_mdm_dn,
118 l_mdm_dna,
119 l_mdm_dns,
120 l_mdm_cap,
121 l_mdm_capa,
122 l_mdm_caps,
123 l_mdm_rss,
124 l_mdm_heap,
7c673cae
FG
125 l_mdm_last,
126};
127
128namespace ceph {
129 struct heartbeat_handle_d;
130}
131
7c673cae
FG
132class Locker;
133class MDCache;
134class MDLog;
135class MDBalancer;
136class InoTable;
137class SnapServer;
138class SnapClient;
139class MDSTableServer;
140class MDSTableClient;
141class Messenger;
f67539c2 142class MetricAggregator;
7c673cae
FG
143class Objecter;
144class MonClient;
9f95a23c 145class MgrClient;
7c673cae 146class Finisher;
7c673cae 147class ScrubStack;
11fdf7f2 148class C_ExecAndReply;
7c673cae
FG
149
150/**
151 * The public part of this class's interface is what's exposed to all
152 * the various subsystems (server, mdcache, etc), such as pointers
153 * to the other subsystems, and message-sending calls.
154 */
155class MDSRank {
7c673cae 156 public:
f64942e4
AA
157 friend class C_Flush_Journal;
158 friend class C_Drop_Cache;
11fdf7f2
TL
159 friend class C_CacheDropExecAndReply;
160 friend class C_ScrubExecAndReply;
161 friend class C_ScrubControlExecAndReply;
162
9f95a23c
TL
163 CephContext *cct;
164
165 MDSRank(
166 mds_rank_t whoami_,
f67539c2 167 std::string fs_name_,
9f95a23c
TL
168 ceph::mutex &mds_lock_,
169 LogChannelRef &clog_,
170 SafeTimer &timer_,
171 Beacon &beacon_,
172 std::unique_ptr<MDSMap> & mdsmap_,
173 Messenger *msgr,
174 MonClient *monc_,
175 MgrClient *mgrc,
176 Context *respawn_hook_,
f67539c2
TL
177 Context *suicide_hook_,
178 boost::asio::io_context& ioc);
9f95a23c 179
7c673cae 180 mds_rank_t get_nodeid() const { return whoami; }
f67539c2 181 std::string_view get_fs_name() const { return fs_name; }
b3b6e05e
TL
182 int64_t get_metadata_pool() const
183 {
184 return metadata_pool;
185 }
7c673cae 186
94b18763
FG
187 mono_time get_starttime() const {
188 return starttime;
189 }
190 chrono::duration<double> get_uptime() const {
191 mono_time now = mono_clock::now();
192 return chrono::duration<double>(now-starttime);
193 }
194
7c673cae
FG
195 bool is_daemon_stopping() const;
196
7c673cae
FG
197 MDSTableClient *get_table_client(int t);
198 MDSTableServer *get_table_server(int t);
199
7c673cae
FG
200 Session *get_session(client_t client) {
201 return sessionmap.get_session(entity_name_t::CLIENT(client.v));
202 }
9f95a23c 203 Session *get_session(const cref_t<Message> &m);
7c673cae
FG
204
205 MDSMap::DaemonState get_state() const { return state; }
206 MDSMap::DaemonState get_want_state() const { return beacon.get_want_state(); }
207
208 bool is_creating() const { return state == MDSMap::STATE_CREATING; }
209 bool is_starting() const { return state == MDSMap::STATE_STARTING; }
210 bool is_standby() const { return state == MDSMap::STATE_STANDBY; }
211 bool is_replay() const { return state == MDSMap::STATE_REPLAY; }
212 bool is_standby_replay() const { return state == MDSMap::STATE_STANDBY_REPLAY; }
213 bool is_resolve() const { return state == MDSMap::STATE_RESOLVE; }
214 bool is_reconnect() const { return state == MDSMap::STATE_RECONNECT; }
215 bool is_rejoin() const { return state == MDSMap::STATE_REJOIN; }
216 bool is_clientreplay() const { return state == MDSMap::STATE_CLIENTREPLAY; }
217 bool is_active() const { return state == MDSMap::STATE_ACTIVE; }
218 bool is_stopping() const { return state == MDSMap::STATE_STOPPING; }
219 bool is_any_replay() const { return (is_replay() || is_standby_replay()); }
220 bool is_stopped() const { return mdsmap->is_stopped(whoami); }
221 bool is_cluster_degraded() const { return cluster_degraded; }
11fdf7f2 222 bool allows_multimds_snaps() const { return mdsmap->allows_multimds_snaps(); }
7c673cae 223
eafe8130 224 bool is_cache_trimmable() const {
b3b6e05e 225 return is_standby_replay() || is_clientreplay() || is_active() || is_stopping();
eafe8130
TL
226 }
227
7c673cae 228 void handle_write_error(int err);
f67539c2 229 void handle_write_error_with_lock(int err);
7c673cae 230
c07f9fc5 231 void update_mlogger();
7c673cae 232
11fdf7f2 233 void queue_waiter(MDSContext *c) {
91327a77
AA
234 finished_queue.push_back(c);
235 progress_thread.signal();
236 }
494da23a
TL
237 void queue_waiter_front(MDSContext *c) {
238 finished_queue.push_front(c);
239 progress_thread.signal();
240 }
11fdf7f2
TL
241 void queue_waiters(MDSContext::vec& ls) {
242 MDSContext::vec v;
243 v.swap(ls);
244 std::copy(v.begin(), v.end(), std::back_inserter(finished_queue));
7c673cae
FG
245 progress_thread.signal();
246 }
11fdf7f2
TL
247 void queue_waiters_front(MDSContext::vec& ls) {
248 MDSContext::vec v;
249 v.swap(ls);
250 std::copy(v.rbegin(), v.rend(), std::front_inserter(finished_queue));
91327a77
AA
251 progress_thread.signal();
252 }
7c673cae 253
7c673cae
FG
254 // Daemon lifetime functions: these guys break the abstraction
255 // and call up into the parent MDSDaemon instance. It's kind
256 // of unavoidable: if we want any depth into our calls
257 // to be able to e.g. tear down the whole process, we have to
258 // have a reference going all the way down.
259 // >>>
260 void suicide();
261 void respawn();
262 // <<<
263
264 /**
265 * Call this periodically if inside a potentially long running piece
266 * of code while holding the mds_lock
267 */
268 void heartbeat_reset();
269
270 /**
271 * Report state DAMAGED to the mon, and then pass on to respawn(). Call
272 * this when an unrecoverable error is encountered while attempting
273 * to load an MDS rank's data structures. This is *not* for use with
274 * errors affecting normal dirfrag/inode objects -- they should be handled
275 * through cleaner scrub/repair mechanisms.
276 *
277 * Callers must already hold mds_lock.
278 */
279 void damaged();
280
281 /**
282 * Wrapper around `damaged` for users who are not
283 * already holding mds_lock.
284 *
285 * Callers must not already hold mds_lock.
286 */
287 void damaged_unlocked();
288
91327a77
AA
289 double last_cleared_laggy() const {
290 return beacon.last_cleared_laggy();
291 }
292
293 double get_dispatch_queue_max_age(utime_t now) const;
7c673cae 294
9f95a23c 295 void send_message_mds(const ref_t<Message>& m, mds_rank_t mds);
f67539c2 296 void send_message_mds(const ref_t<Message>& m, const entity_addrvec_t &addr);
9f95a23c
TL
297 void forward_message_mds(const cref_t<MClientRequest>& req, mds_rank_t mds);
298 void send_message_client_counted(const ref_t<Message>& m, client_t client);
299 void send_message_client_counted(const ref_t<Message>& m, Session* session);
300 void send_message_client_counted(const ref_t<Message>& m, const ConnectionRef& connection);
301 void send_message_client(const ref_t<Message>& m, Session* session);
302 void send_message(const ref_t<Message>& m, const ConnectionRef& c);
7c673cae 303
11fdf7f2 304 void wait_for_active_peer(mds_rank_t who, MDSContext *c) {
7c673cae
FG
305 waiting_for_active_peer[who].push_back(c);
306 }
11fdf7f2
TL
307 void wait_for_cluster_recovered(MDSContext *c) {
308 ceph_assert(cluster_degraded);
7c673cae
FG
309 waiting_for_active_peer[MDS_RANK_NONE].push_back(c);
310 }
311
11fdf7f2 312 void wait_for_any_client_connection(MDSContext *c) {
28e407b8
AA
313 waiting_for_any_client_connection.push_back(c);
314 }
315 void kick_waiters_for_any_client_connection(void) {
316 finish_contexts(g_ceph_context, waiting_for_any_client_connection);
317 }
11fdf7f2 318 void wait_for_active(MDSContext *c) {
7c673cae
FG
319 waiting_for_active.push_back(c);
320 }
11fdf7f2 321 void wait_for_replay(MDSContext *c) {
7c673cae
FG
322 waiting_for_replay.push_back(c);
323 }
11fdf7f2 324 void wait_for_rejoin(MDSContext *c) {
a8e16298
TL
325 waiting_for_rejoin.push_back(c);
326 }
11fdf7f2 327 void wait_for_reconnect(MDSContext *c) {
7c673cae
FG
328 waiting_for_reconnect.push_back(c);
329 }
11fdf7f2 330 void wait_for_resolve(MDSContext *c) {
7c673cae
FG
331 waiting_for_resolve.push_back(c);
332 }
11fdf7f2 333 void wait_for_mdsmap(epoch_t e, MDSContext *c) {
7c673cae
FG
334 waiting_for_mdsmap[e].push_back(c);
335 }
11fdf7f2 336 void enqueue_replay(MDSContext *c) {
7c673cae
FG
337 replay_queue.push_back(c);
338 }
339
340 bool queue_one_replay();
11fdf7f2 341 void maybe_clientreplay_done();
7c673cae
FG
342
343 void set_osd_epoch_barrier(epoch_t e);
344 epoch_t get_osd_epoch_barrier() const {return osd_epoch_barrier;}
345 epoch_t get_osd_epoch() const;
346
347 ceph_tid_t issue_tid() { return ++last_tid; }
348
11fdf7f2 349 MDSMap *get_mds_map() { return mdsmap.get(); }
7c673cae 350
28e407b8 351 uint64_t get_num_requests() const { return logger->get(l_mds_request); }
7c673cae
FG
352
353 int get_mds_slow_req_count() const { return mds_slow_req_count; }
354
355 void dump_status(Formatter *f) const;
356
11fdf7f2 357 void hit_export_target(mds_rank_t rank, double amount=-1.0);
7c673cae
FG
358 bool is_export_target(mds_rank_t rank) {
359 const set<mds_rank_t>& map_targets = mdsmap->get_mds_info(get_nodeid()).export_targets;
360 return map_targets.count(rank);
361 }
362
f67539c2 363 bool evict_client(int64_t session_id, bool wait, bool blocklist,
11fdf7f2 364 std::ostream& ss, Context *on_killed=nullptr);
92f5a8d4
TL
365 int config_client(int64_t session_id, bool remove,
366 const std::string& option, const std::string& value,
367 std::ostream& ss);
11fdf7f2 368
9f95a23c
TL
369 // Reference to global MDS::mds_lock, so that users of MDSRank don't
370 // carry around references to the outer MDS, and we can substitute
371 // a separate lock here in future potentially.
372 ceph::mutex &mds_lock;
373
374 // Reference to global cluster log client, just to avoid initialising
375 // a separate one here.
376 LogChannelRef &clog;
377
378 // Reference to global timer utility, because MDSRank and MDSDaemon
379 // currently both use the same mds_lock, so it makes sense for them
380 // to share a timer.
381 SafeTimer &timer;
382
383 std::unique_ptr<MDSMap> &mdsmap; /* MDSDaemon::mdsmap */
384
385 Objecter *objecter;
386
387 // sub systems
388 Server *server = nullptr;
389 MDCache *mdcache = nullptr;
390 Locker *locker = nullptr;
391 MDLog *mdlog = nullptr;
392 MDBalancer *balancer = nullptr;
393 ScrubStack *scrubstack = nullptr;
394 DamageTable damage_table;
395
396 InoTable *inotable = nullptr;
397
398 SnapServer *snapserver = nullptr;
399 SnapClient *snapclient = nullptr;
400
401 SessionMap sessionmap;
402
403 PerfCounters *logger = nullptr, *mlogger = nullptr;
404 OpTracker op_tracker;
405
406 // The last different state I held before current
407 MDSMap::DaemonState last_state = MDSMap::STATE_BOOT;
408 // The state assigned to me by the MDSMap
409 MDSMap::DaemonState state = MDSMap::STATE_BOOT;
410
411 bool cluster_degraded = false;
412
413 Finisher *finisher;
7c673cae 414 protected:
9f95a23c
TL
415 typedef enum {
416 // The MDSMap is available, configure default layouts and structures
417 MDS_BOOT_INITIAL = 0,
418 // We are ready to open some inodes
419 MDS_BOOT_OPEN_ROOT,
420 // We are ready to do a replay if needed
421 MDS_BOOT_PREPARE_LOG,
422 // Replay is complete
423 MDS_BOOT_REPLAY_DONE
424 } BootStep;
425
426 class ProgressThread : public Thread {
427 public:
428 explicit ProgressThread(MDSRank *mds_) : mds(mds_) {}
429 void * entry() override;
430 void shutdown();
431 void signal() {cond.notify_all();}
432 private:
433 MDSRank *mds;
434 ceph::condition_variable cond;
435 } progress_thread;
436
437 class C_MDS_StandbyReplayRestart;
438 class C_MDS_StandbyReplayRestartFinish;
439 // Friended to access retry_dispatch
440 friend class C_MDS_RetryMessage;
441 friend class C_MDS_BootStart;
442 friend class C_MDS_InternalBootStart;
443 friend class C_MDS_MonCommand;
444
445 const mds_rank_t whoami;
f67539c2 446 std::string fs_name;
9f95a23c
TL
447
448 ~MDSRank();
449
450 void inc_dispatch_depth() { ++dispatch_depth; }
451 void dec_dispatch_depth() { --dispatch_depth; }
452 void retry_dispatch(const cref_t<Message> &m);
f6b5b4d7
TL
453 bool is_valid_message(const cref_t<Message> &m);
454 void handle_message(const cref_t<Message> &m);
9f95a23c
TL
455 void _advance_queues();
456 bool _dispatch(const cref_t<Message> &m, bool new_msg);
457 bool is_stale_message(const cref_t<Message> &m) const;
458
459 /**
460 * Emit clog warnings for any ops reported as warnings by optracker
461 */
462 void check_ops_in_flight();
463
464 /**
465 * Share MDSMap with clients
466 */
9f95a23c
TL
467 void create_logger();
468
7c673cae 469 void dump_clientreplay_status(Formatter *f) const;
11fdf7f2
TL
470 void command_scrub_start(Formatter *f,
471 std::string_view path, std::string_view tag,
472 const vector<string>& scrubop_vec, Context *on_finish);
473 void command_tag_path(Formatter *f, std::string_view path,
474 std::string_view tag);
475 // scrub control commands
476 void command_scrub_abort(Formatter *f, Context *on_finish);
477 void command_scrub_pause(Formatter *f, Context *on_finish);
478 void command_scrub_resume(Formatter *f);
479 void command_scrub_status(Formatter *f);
480
481 void command_flush_path(Formatter *f, std::string_view path);
7c673cae
FG
482 void command_flush_journal(Formatter *f);
483 void command_get_subtrees(Formatter *f);
484 void command_export_dir(Formatter *f,
11fdf7f2 485 std::string_view path, mds_rank_t dest);
7c673cae
FG
486 bool command_dirfrag_split(
487 cmdmap_t cmdmap,
488 std::ostream &ss);
489 bool command_dirfrag_merge(
490 cmdmap_t cmdmap,
491 std::ostream &ss);
492 bool command_dirfrag_ls(
493 cmdmap_t cmdmap,
494 std::ostream &ss,
495 Formatter *f);
11fdf7f2 496 int _command_export_dir(std::string_view path, mds_rank_t dest);
7c673cae
FG
497 CDir *_command_dirfrag_get(
498 const cmdmap_t &cmdmap,
499 std::ostream &ss);
11fdf7f2
TL
500 void command_openfiles_ls(Formatter *f);
501 void command_dump_tree(const cmdmap_t &cmdmap, std::ostream &ss, Formatter *f);
502 void command_dump_inode(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss);
f64942e4
AA
503 void command_cache_drop(uint64_t timeout, Formatter *f, Context *on_finish);
504
7c673cae
FG
505 // FIXME the state machine logic should be separable from the dispatch
506 // logic that calls it.
507 // >>>
508 void calc_recovery_set();
509 void request_state(MDSMap::DaemonState s);
510
7c673cae
FG
511 void boot_create(); // i am new mds.
512 void boot_start(BootStep step=MDS_BOOT_INITIAL, int r=0); // starting|replay
513
514 void replay_start();
515 void creating_done();
516 void starting_done();
517 void replay_done();
518 void standby_replay_restart();
519 void _standby_replay_restart_finish(int r, uint64_t old_read_pos);
7c673cae
FG
520
521 void reopen_log();
522
523 void resolve_start();
524 void resolve_done();
525 void reconnect_start();
526 void reconnect_done();
527 void rejoin_joint_start();
528 void rejoin_start();
529 void rejoin_done();
530 void recovery_done(int oldstate);
531 void clientreplay_start();
532 void clientreplay_done();
533 void active_start();
534 void stopping_start();
535 void stopping_done();
536
537 void validate_sessions();
9f95a23c 538
7c673cae
FG
539 void handle_mds_recovery(mds_rank_t who);
540 void handle_mds_failure(mds_rank_t who);
7c673cae
FG
541
542 /* Update MDSMap export_targets for this rank. Called on ::tick(). */
11fdf7f2 543 void update_targets();
94b18763 544
11fdf7f2
TL
545 void _mon_command_finish(int r, std::string_view cmd, std::string_view outs);
546 void set_mdsmap_multimds_snaps_allowed();
9f95a23c
TL
547
548 Context *create_async_exec_context(C_ExecAndReply *ctx);
549
550 // Incarnation as seen in MDSMap at the point where a rank is
551 // assigned.
552 int incarnation = 0;
553
554 // Flag to indicate we entered shutdown: anyone seeing this to be true
555 // after taking mds_lock must drop out.
556 bool stopping = false;
557
558 // PurgeQueue is only used by StrayManager, but it is owned by MDSRank
559 // because its init/shutdown happens at the top level.
560 PurgeQueue purge_queue;
561
f67539c2
TL
562 MetricsHandler metrics_handler;
563 std::unique_ptr<MetricAggregator> metric_aggregator;
564
9f95a23c
TL
565 list<cref_t<Message>> waiting_for_nolaggy;
566 MDSContext::que finished_queue;
567 // Dispatch, retry, queues
568 int dispatch_depth = 0;
569
570 ceph::heartbeat_handle_d *hb = nullptr; // Heartbeat for threads using mds_lock
f67539c2 571 double heartbeat_grace;
9f95a23c
TL
572
573 map<mds_rank_t, version_t> peer_mdsmap_epoch;
574
575 ceph_tid_t last_tid = 0; // for mds-initiated requests (e.g. stray rename)
576
577 MDSContext::vec waiting_for_active, waiting_for_replay, waiting_for_rejoin,
578 waiting_for_reconnect, waiting_for_resolve;
579 MDSContext::vec waiting_for_any_client_connection;
580 MDSContext::que replay_queue;
581 bool replaying_requests_done = false;
582
583 map<mds_rank_t, MDSContext::vec > waiting_for_active_peer;
584 map<epoch_t, MDSContext::vec > waiting_for_mdsmap;
585
586 epoch_t osd_epoch_barrier = 0;
587
588 // Const reference to the beacon so that we can behave differently
589 // when it's laggy.
590 Beacon &beacon;
591
592 int mds_slow_req_count = 0;
593
9f95a23c
TL
594 map<mds_rank_t,DecayCounter> export_targets; /* targets this MDS is exporting to or wants/tries to */
595
596 Messenger *messenger;
597 MonClient *monc;
598 MgrClient *mgrc;
599
600 Context *respawn_hook;
601 Context *suicide_hook;
602
603 bool standby_replaying = false; // true if current replay pass is in standby-replay mode
94b18763 604private:
f91f0fd5
TL
605 bool send_status = true;
606
b3b6e05e
TL
607 // The metadata pool won't change in the whole life time of the fs,
608 // with this we can get rid of the mds_lock in many places too.
609 int64_t metadata_pool = -1;
610
9f95a23c
TL
611 // "task" string that gets displayed in ceph status
612 inline static const std::string SCRUB_STATUS_KEY = "scrub status";
11fdf7f2 613
9f95a23c
TL
614 void get_task_status(std::map<std::string, std::string> *status);
615 void schedule_update_timer_task();
616 void send_task_status();
617
f67539c2
TL
618 bool is_rank0() const {
619 return whoami == (mds_rank_t)0;
620 }
621
9f95a23c 622 mono_time starttime = mono_clock::zero();
f67539c2 623 boost::asio::io_context& ioc;
7c673cae
FG
624};
625
626/* This expects to be given a reference which it is responsible for.
627 * The finish function calls functions which
628 * will put the Message exactly once.*/
629class C_MDS_RetryMessage : public MDSInternalContext {
7c673cae 630public:
9f95a23c 631 C_MDS_RetryMessage(MDSRank *mds, const cref_t<Message> &m)
11fdf7f2 632 : MDSInternalContext(mds), m(m) {}
7c673cae 633 void finish(int r) override {
11fdf7f2
TL
634 get_mds()->retry_dispatch(m);
635 }
636protected:
9f95a23c 637 cref_t<Message> m;
11fdf7f2
TL
638};
639
640class CF_MDS_RetryMessageFactory : public MDSContextFactory {
641public:
9f95a23c 642 CF_MDS_RetryMessageFactory(MDSRank *mds, const cref_t<Message> &m)
11fdf7f2
TL
643 : mds(mds), m(m) {}
644
645 MDSContext *build() {
646 return new C_MDS_RetryMessage(mds, m);
7c673cae 647 }
11fdf7f2
TL
648private:
649 MDSRank *mds;
9f95a23c 650 cref_t<Message> m;
7c673cae
FG
651};
652
653/**
654 * The aspect of MDSRank exposed to MDSDaemon but not subsystems: i.e.
655 * the service/dispatcher stuff like init/shutdown that subsystems should
656 * never touch.
657 */
92f5a8d4 658class MDSRankDispatcher : public MDSRank, public md_config_obs_t
7c673cae
FG
659{
660public:
9f95a23c
TL
661 MDSRankDispatcher(
662 mds_rank_t whoami_,
f67539c2 663 std::string fs_name,
9f95a23c
TL
664 ceph::mutex &mds_lock_,
665 LogChannelRef &clog_,
666 SafeTimer &timer_,
667 Beacon &beacon_,
668 std::unique_ptr<MDSMap> &mdsmap_,
669 Messenger *msgr,
670 MonClient *monc_,
671 MgrClient *mgrc,
672 Context *respawn_hook_,
f67539c2
TL
673 Context *suicide_hook_,
674 boost::asio::io_context& ioc);
9f95a23c 675
7c673cae
FG
676 void init();
677 void tick();
678 void shutdown();
9f95a23c
TL
679 void handle_asok_command(
680 std::string_view command,
681 const cmdmap_t& cmdmap,
682 Formatter *f,
683 const bufferlist &inbl,
684 std::function<void(int,const std::string&,bufferlist&)> on_finish);
685 void handle_mds_map(const cref_t<MMDSMap> &m, const MDSMap &oldmap);
7c673cae 686 void handle_osd_map();
7c673cae
FG
687 void update_log_config();
688
92f5a8d4
TL
689 const char** get_tracked_conf_keys() const override final;
690 void handle_conf_change(const ConfigProxy& conf, const std::set<std::string>& changed) override;
691
adb31ebb 692 void dump_sessions(const SessionFilter &filter, Formatter *f, bool cap_dump=false) const;
9f95a23c
TL
693 void evict_clients(const SessionFilter &filter,
694 std::function<void(int,const std::string&,bufferlist&)> on_finish);
7c673cae
FG
695
696 // Call into me from MDS::ms_dispatch
9f95a23c 697 bool ms_dispatch(const cref_t<Message> &m);
7c673cae
FG
698};
699
7c673cae
FG
700#endif // MDS_RANK_H_
701