]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/MDSRank.h
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / mds / MDSRank.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2015 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef MDS_RANK_H_
16#define MDS_RANK_H_
17
11fdf7f2 18#include <string_view>
94b18763 19
f67539c2
TL
20#include <boost/asio/io_context.hpp>
21
7c673cae
FG
22#include "common/DecayCounter.h"
23#include "common/LogClient.h"
24#include "common/Timer.h"
a4b75251 25#include "common/fair_mutex.h"
7c673cae 26#include "common/TrackedOp.h"
a4b75251 27#include "common/ceph_mutex.h"
7c673cae 28
9f95a23c
TL
29#include "include/common_fwd.h"
30
11fdf7f2 31#include "messages/MClientRequest.h"
7c673cae 32#include "messages/MCommand.h"
11fdf7f2 33#include "messages/MMDSMap.h"
7c673cae
FG
34
35#include "Beacon.h"
36#include "DamageTable.h"
37#include "MDSMap.h"
38#include "SessionMap.h"
39#include "MDCache.h"
7c673cae 40#include "MDLog.h"
11fdf7f2 41#include "MDSContext.h"
7c673cae 42#include "PurgeQueue.h"
91327a77 43#include "Server.h"
f67539c2 44#include "MetricsHandler.h"
7c673cae
FG
45#include "osdc/Journaler.h"
46
47// Full .h import instead of forward declaration for PerfCounter, for the
48// benefit of those including this header and using MDSRank::logger
49#include "common/perf_counters.h"
50
51enum {
52 l_mds_first = 2000,
53 l_mds_request,
54 l_mds_reply,
55 l_mds_reply_latency,
33c7a0ef 56 l_mds_slow_reply,
7c673cae 57 l_mds_forward,
1e59de90
TL
58 l_mds_dir_fetch_complete,
59 l_mds_dir_fetch_keys,
7c673cae
FG
60 l_mds_dir_commit,
61 l_mds_dir_split,
62 l_mds_dir_merge,
7c673cae
FG
63 l_mds_inodes,
64 l_mds_inodes_top,
65 l_mds_inodes_bottom,
66 l_mds_inodes_pin_tail,
67 l_mds_inodes_pinned,
68 l_mds_inodes_expired,
69 l_mds_inodes_with_caps,
70 l_mds_caps,
71 l_mds_subtrees,
72 l_mds_traverse,
73 l_mds_traverse_hit,
74 l_mds_traverse_forward,
75 l_mds_traverse_discover,
76 l_mds_traverse_dir_fetch,
77 l_mds_traverse_remote_ino,
78 l_mds_traverse_lock,
79 l_mds_load_cent,
80 l_mds_dispatch_queue_len,
81 l_mds_exported,
82 l_mds_exported_inodes,
83 l_mds_imported,
84 l_mds_imported_inodes,
11fdf7f2
TL
85 l_mds_openino_dir_fetch,
86 l_mds_openino_backtrace_fetch,
87 l_mds_openino_peer_discover,
eafe8130
TL
88 l_mds_root_rfiles,
89 l_mds_root_rbytes,
90 l_mds_root_rsnaps,
f67539c2
TL
91 l_mds_scrub_backtrace_fetch,
92 l_mds_scrub_set_tag,
93 l_mds_scrub_backtrace_repaired,
94 l_mds_scrub_inotable_repaired,
95 l_mds_scrub_dir_inodes,
96 l_mds_scrub_dir_base_inodes,
97 l_mds_scrub_dirfrag_rstats,
98 l_mds_scrub_file_inodes,
99 l_mdss_handle_inode_file_caps,
100 l_mdss_ceph_cap_op_revoke,
101 l_mdss_ceph_cap_op_grant,
102 l_mdss_ceph_cap_op_trunc,
103 l_mdss_ceph_cap_op_flushsnap_ack,
104 l_mdss_ceph_cap_op_flush_ack,
105 l_mdss_handle_client_caps,
106 l_mdss_handle_client_caps_dirty,
107 l_mdss_handle_client_cap_release,
108 l_mdss_process_request_cap_release,
7c673cae
FG
109 l_mds_last,
110};
111
112// memory utilization
113enum {
114 l_mdm_first = 2500,
115 l_mdm_ino,
116 l_mdm_inoa,
117 l_mdm_inos,
118 l_mdm_dir,
119 l_mdm_dira,
120 l_mdm_dirs,
121 l_mdm_dn,
122 l_mdm_dna,
123 l_mdm_dns,
124 l_mdm_cap,
125 l_mdm_capa,
126 l_mdm_caps,
127 l_mdm_rss,
128 l_mdm_heap,
7c673cae
FG
129 l_mdm_last,
130};
131
132namespace ceph {
133 struct heartbeat_handle_d;
134}
135
7c673cae
FG
136class Locker;
137class MDCache;
138class MDLog;
139class MDBalancer;
140class InoTable;
141class SnapServer;
142class SnapClient;
143class MDSTableServer;
144class MDSTableClient;
145class Messenger;
f67539c2 146class MetricAggregator;
7c673cae
FG
147class Objecter;
148class MonClient;
9f95a23c 149class MgrClient;
7c673cae 150class Finisher;
7c673cae 151class ScrubStack;
11fdf7f2 152class C_ExecAndReply;
7c673cae
FG
153
154/**
155 * The public part of this class's interface is what's exposed to all
156 * the various subsystems (server, mdcache, etc), such as pointers
157 * to the other subsystems, and message-sending calls.
158 */
159class MDSRank {
7c673cae 160 public:
f64942e4
AA
161 friend class C_Flush_Journal;
162 friend class C_Drop_Cache;
11fdf7f2
TL
163 friend class C_CacheDropExecAndReply;
164 friend class C_ScrubExecAndReply;
165 friend class C_ScrubControlExecAndReply;
166
9f95a23c
TL
167 CephContext *cct;
168
169 MDSRank(
170 mds_rank_t whoami_,
a4b75251 171 ceph::fair_mutex &mds_lock_,
9f95a23c 172 LogChannelRef &clog_,
a4b75251 173 CommonSafeTimer<ceph::fair_mutex> &timer_,
9f95a23c
TL
174 Beacon &beacon_,
175 std::unique_ptr<MDSMap> & mdsmap_,
176 Messenger *msgr,
177 MonClient *monc_,
178 MgrClient *mgrc,
179 Context *respawn_hook_,
f67539c2
TL
180 Context *suicide_hook_,
181 boost::asio::io_context& ioc);
9f95a23c 182
7c673cae 183 mds_rank_t get_nodeid() const { return whoami; }
b3b6e05e
TL
184 int64_t get_metadata_pool() const
185 {
186 return metadata_pool;
187 }
7c673cae 188
94b18763
FG
189 mono_time get_starttime() const {
190 return starttime;
191 }
20effc67 192 std::chrono::duration<double> get_uptime() const {
94b18763 193 mono_time now = mono_clock::now();
20effc67 194 return std::chrono::duration<double>(now-starttime);
94b18763
FG
195 }
196
7c673cae
FG
197 bool is_daemon_stopping() const;
198
7c673cae
FG
199 MDSTableClient *get_table_client(int t);
200 MDSTableServer *get_table_server(int t);
201
7c673cae
FG
202 Session *get_session(client_t client) {
203 return sessionmap.get_session(entity_name_t::CLIENT(client.v));
204 }
9f95a23c 205 Session *get_session(const cref_t<Message> &m);
7c673cae
FG
206
207 MDSMap::DaemonState get_state() const { return state; }
208 MDSMap::DaemonState get_want_state() const { return beacon.get_want_state(); }
209
210 bool is_creating() const { return state == MDSMap::STATE_CREATING; }
211 bool is_starting() const { return state == MDSMap::STATE_STARTING; }
212 bool is_standby() const { return state == MDSMap::STATE_STANDBY; }
213 bool is_replay() const { return state == MDSMap::STATE_REPLAY; }
214 bool is_standby_replay() const { return state == MDSMap::STATE_STANDBY_REPLAY; }
215 bool is_resolve() const { return state == MDSMap::STATE_RESOLVE; }
216 bool is_reconnect() const { return state == MDSMap::STATE_RECONNECT; }
217 bool is_rejoin() const { return state == MDSMap::STATE_REJOIN; }
218 bool is_clientreplay() const { return state == MDSMap::STATE_CLIENTREPLAY; }
219 bool is_active() const { return state == MDSMap::STATE_ACTIVE; }
220 bool is_stopping() const { return state == MDSMap::STATE_STOPPING; }
221 bool is_any_replay() const { return (is_replay() || is_standby_replay()); }
222 bool is_stopped() const { return mdsmap->is_stopped(whoami); }
223 bool is_cluster_degraded() const { return cluster_degraded; }
11fdf7f2 224 bool allows_multimds_snaps() const { return mdsmap->allows_multimds_snaps(); }
7c673cae 225
eafe8130 226 bool is_cache_trimmable() const {
b3b6e05e 227 return is_standby_replay() || is_clientreplay() || is_active() || is_stopping();
eafe8130
TL
228 }
229
7c673cae 230 void handle_write_error(int err);
f67539c2 231 void handle_write_error_with_lock(int err);
7c673cae 232
c07f9fc5 233 void update_mlogger();
7c673cae 234
11fdf7f2 235 void queue_waiter(MDSContext *c) {
91327a77
AA
236 finished_queue.push_back(c);
237 progress_thread.signal();
238 }
494da23a
TL
239 void queue_waiter_front(MDSContext *c) {
240 finished_queue.push_front(c);
241 progress_thread.signal();
242 }
11fdf7f2
TL
243 void queue_waiters(MDSContext::vec& ls) {
244 MDSContext::vec v;
245 v.swap(ls);
246 std::copy(v.begin(), v.end(), std::back_inserter(finished_queue));
7c673cae
FG
247 progress_thread.signal();
248 }
11fdf7f2
TL
249 void queue_waiters_front(MDSContext::vec& ls) {
250 MDSContext::vec v;
251 v.swap(ls);
252 std::copy(v.rbegin(), v.rend(), std::front_inserter(finished_queue));
91327a77
AA
253 progress_thread.signal();
254 }
7c673cae 255
7c673cae
FG
256 // Daemon lifetime functions: these guys break the abstraction
257 // and call up into the parent MDSDaemon instance. It's kind
258 // of unavoidable: if we want any depth into our calls
259 // to be able to e.g. tear down the whole process, we have to
260 // have a reference going all the way down.
261 // >>>
262 void suicide();
263 void respawn();
264 // <<<
265
266 /**
267 * Call this periodically if inside a potentially long running piece
268 * of code while holding the mds_lock
269 */
270 void heartbeat_reset();
33c7a0ef
TL
271 int heartbeat_reset_grace(int count=1) {
272 return count * _heartbeat_reset_grace;
273 }
7c673cae 274
aee94f69
TL
275 /**
276 * Abort the MDS and flush any clog messages.
277 *
278 * Callers must already hold mds_lock.
279 */
280 void abort(std::string_view msg);
281
7c673cae
FG
282 /**
283 * Report state DAMAGED to the mon, and then pass on to respawn(). Call
284 * this when an unrecoverable error is encountered while attempting
285 * to load an MDS rank's data structures. This is *not* for use with
286 * errors affecting normal dirfrag/inode objects -- they should be handled
287 * through cleaner scrub/repair mechanisms.
288 *
289 * Callers must already hold mds_lock.
290 */
291 void damaged();
292
293 /**
294 * Wrapper around `damaged` for users who are not
295 * already holding mds_lock.
296 *
297 * Callers must not already hold mds_lock.
298 */
299 void damaged_unlocked();
300
91327a77
AA
301 double last_cleared_laggy() const {
302 return beacon.last_cleared_laggy();
303 }
304
305 double get_dispatch_queue_max_age(utime_t now) const;
7c673cae 306
9f95a23c 307 void send_message_mds(const ref_t<Message>& m, mds_rank_t mds);
f67539c2 308 void send_message_mds(const ref_t<Message>& m, const entity_addrvec_t &addr);
aee94f69 309 void forward_message_mds(MDRequestRef& mdr, mds_rank_t mds);
9f95a23c
TL
310 void send_message_client_counted(const ref_t<Message>& m, client_t client);
311 void send_message_client_counted(const ref_t<Message>& m, Session* session);
312 void send_message_client_counted(const ref_t<Message>& m, const ConnectionRef& connection);
313 void send_message_client(const ref_t<Message>& m, Session* session);
314 void send_message(const ref_t<Message>& m, const ConnectionRef& c);
7c673cae 315
20effc67
TL
316 void wait_for_bootstrapped_peer(mds_rank_t who, MDSContext *c) {
317 waiting_for_bootstrapping_peer[who].push_back(c);
318 }
11fdf7f2 319 void wait_for_active_peer(mds_rank_t who, MDSContext *c) {
7c673cae
FG
320 waiting_for_active_peer[who].push_back(c);
321 }
11fdf7f2
TL
322 void wait_for_cluster_recovered(MDSContext *c) {
323 ceph_assert(cluster_degraded);
7c673cae
FG
324 waiting_for_active_peer[MDS_RANK_NONE].push_back(c);
325 }
326
11fdf7f2 327 void wait_for_any_client_connection(MDSContext *c) {
28e407b8
AA
328 waiting_for_any_client_connection.push_back(c);
329 }
330 void kick_waiters_for_any_client_connection(void) {
331 finish_contexts(g_ceph_context, waiting_for_any_client_connection);
332 }
11fdf7f2 333 void wait_for_active(MDSContext *c) {
7c673cae
FG
334 waiting_for_active.push_back(c);
335 }
11fdf7f2 336 void wait_for_replay(MDSContext *c) {
7c673cae
FG
337 waiting_for_replay.push_back(c);
338 }
11fdf7f2 339 void wait_for_rejoin(MDSContext *c) {
a8e16298
TL
340 waiting_for_rejoin.push_back(c);
341 }
11fdf7f2 342 void wait_for_reconnect(MDSContext *c) {
7c673cae
FG
343 waiting_for_reconnect.push_back(c);
344 }
11fdf7f2 345 void wait_for_resolve(MDSContext *c) {
7c673cae
FG
346 waiting_for_resolve.push_back(c);
347 }
11fdf7f2 348 void wait_for_mdsmap(epoch_t e, MDSContext *c) {
7c673cae
FG
349 waiting_for_mdsmap[e].push_back(c);
350 }
11fdf7f2 351 void enqueue_replay(MDSContext *c) {
7c673cae
FG
352 replay_queue.push_back(c);
353 }
354
355 bool queue_one_replay();
11fdf7f2 356 void maybe_clientreplay_done();
7c673cae
FG
357
358 void set_osd_epoch_barrier(epoch_t e);
359 epoch_t get_osd_epoch_barrier() const {return osd_epoch_barrier;}
360 epoch_t get_osd_epoch() const;
361
362 ceph_tid_t issue_tid() { return ++last_tid; }
363
11fdf7f2 364 MDSMap *get_mds_map() { return mdsmap.get(); }
7c673cae 365
28e407b8 366 uint64_t get_num_requests() const { return logger->get(l_mds_request); }
7c673cae
FG
367
368 int get_mds_slow_req_count() const { return mds_slow_req_count; }
369
370 void dump_status(Formatter *f) const;
371
11fdf7f2 372 void hit_export_target(mds_rank_t rank, double amount=-1.0);
7c673cae 373 bool is_export_target(mds_rank_t rank) {
20effc67 374 const std::set<mds_rank_t>& map_targets = mdsmap->get_mds_info(get_nodeid()).export_targets;
7c673cae
FG
375 return map_targets.count(rank);
376 }
377
f67539c2 378 bool evict_client(int64_t session_id, bool wait, bool blocklist,
11fdf7f2 379 std::ostream& ss, Context *on_killed=nullptr);
92f5a8d4
TL
380 int config_client(int64_t session_id, bool remove,
381 const std::string& option, const std::string& value,
382 std::ostream& ss);
1e59de90
TL
383 void schedule_inmemory_logger();
384
385 double get_inject_journal_corrupt_dentry_first() const {
386 return inject_journal_corrupt_dentry_first;
387 }
11fdf7f2 388
9f95a23c
TL
389 // Reference to global MDS::mds_lock, so that users of MDSRank don't
390 // carry around references to the outer MDS, and we can substitute
391 // a separate lock here in future potentially.
a4b75251 392 ceph::fair_mutex &mds_lock;
9f95a23c
TL
393
394 // Reference to global cluster log client, just to avoid initialising
395 // a separate one here.
396 LogChannelRef &clog;
397
398 // Reference to global timer utility, because MDSRank and MDSDaemon
399 // currently both use the same mds_lock, so it makes sense for them
400 // to share a timer.
a4b75251 401 CommonSafeTimer<ceph::fair_mutex> &timer;
9f95a23c
TL
402
403 std::unique_ptr<MDSMap> &mdsmap; /* MDSDaemon::mdsmap */
404
405 Objecter *objecter;
406
407 // sub systems
408 Server *server = nullptr;
409 MDCache *mdcache = nullptr;
410 Locker *locker = nullptr;
411 MDLog *mdlog = nullptr;
412 MDBalancer *balancer = nullptr;
413 ScrubStack *scrubstack = nullptr;
414 DamageTable damage_table;
415
416 InoTable *inotable = nullptr;
417
418 SnapServer *snapserver = nullptr;
419 SnapClient *snapclient = nullptr;
420
421 SessionMap sessionmap;
422
423 PerfCounters *logger = nullptr, *mlogger = nullptr;
424 OpTracker op_tracker;
425
426 // The last different state I held before current
427 MDSMap::DaemonState last_state = MDSMap::STATE_BOOT;
428 // The state assigned to me by the MDSMap
2a845540 429 MDSMap::DaemonState state = MDSMap::STATE_STANDBY;
9f95a23c
TL
430
431 bool cluster_degraded = false;
432
433 Finisher *finisher;
7c673cae 434 protected:
9f95a23c
TL
435 typedef enum {
436 // The MDSMap is available, configure default layouts and structures
437 MDS_BOOT_INITIAL = 0,
438 // We are ready to open some inodes
439 MDS_BOOT_OPEN_ROOT,
440 // We are ready to do a replay if needed
441 MDS_BOOT_PREPARE_LOG,
442 // Replay is complete
443 MDS_BOOT_REPLAY_DONE
444 } BootStep;
445
446 class ProgressThread : public Thread {
447 public:
448 explicit ProgressThread(MDSRank *mds_) : mds(mds_) {}
449 void * entry() override;
450 void shutdown();
451 void signal() {cond.notify_all();}
452 private:
453 MDSRank *mds;
a4b75251 454 std::condition_variable_any cond;
9f95a23c
TL
455 } progress_thread;
456
457 class C_MDS_StandbyReplayRestart;
458 class C_MDS_StandbyReplayRestartFinish;
459 // Friended to access retry_dispatch
460 friend class C_MDS_RetryMessage;
461 friend class C_MDS_BootStart;
462 friend class C_MDS_InternalBootStart;
463 friend class C_MDS_MonCommand;
464
465 const mds_rank_t whoami;
466
467 ~MDSRank();
468
469 void inc_dispatch_depth() { ++dispatch_depth; }
470 void dec_dispatch_depth() { --dispatch_depth; }
471 void retry_dispatch(const cref_t<Message> &m);
f6b5b4d7
TL
472 bool is_valid_message(const cref_t<Message> &m);
473 void handle_message(const cref_t<Message> &m);
9f95a23c
TL
474 void _advance_queues();
475 bool _dispatch(const cref_t<Message> &m, bool new_msg);
476 bool is_stale_message(const cref_t<Message> &m) const;
477
478 /**
479 * Emit clog warnings for any ops reported as warnings by optracker
480 */
481 void check_ops_in_flight();
482
483 /**
484 * Share MDSMap with clients
485 */
9f95a23c
TL
486 void create_logger();
487
7c673cae 488 void dump_clientreplay_status(Formatter *f) const;
11fdf7f2
TL
489 void command_scrub_start(Formatter *f,
490 std::string_view path, std::string_view tag,
20effc67 491 const std::vector<std::string>& scrubop_vec, Context *on_finish);
11fdf7f2
TL
492 void command_tag_path(Formatter *f, std::string_view path,
493 std::string_view tag);
494 // scrub control commands
495 void command_scrub_abort(Formatter *f, Context *on_finish);
496 void command_scrub_pause(Formatter *f, Context *on_finish);
497 void command_scrub_resume(Formatter *f);
498 void command_scrub_status(Formatter *f);
499
500 void command_flush_path(Formatter *f, std::string_view path);
7c673cae
FG
501 void command_flush_journal(Formatter *f);
502 void command_get_subtrees(Formatter *f);
503 void command_export_dir(Formatter *f,
11fdf7f2 504 std::string_view path, mds_rank_t dest);
7c673cae
FG
505 bool command_dirfrag_split(
506 cmdmap_t cmdmap,
507 std::ostream &ss);
508 bool command_dirfrag_merge(
509 cmdmap_t cmdmap,
510 std::ostream &ss);
511 bool command_dirfrag_ls(
512 cmdmap_t cmdmap,
513 std::ostream &ss,
514 Formatter *f);
11fdf7f2 515 int _command_export_dir(std::string_view path, mds_rank_t dest);
7c673cae
FG
516 CDir *_command_dirfrag_get(
517 const cmdmap_t &cmdmap,
518 std::ostream &ss);
11fdf7f2
TL
519 void command_openfiles_ls(Formatter *f);
520 void command_dump_tree(const cmdmap_t &cmdmap, std::ostream &ss, Formatter *f);
521 void command_dump_inode(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss);
f64942e4
AA
522 void command_cache_drop(uint64_t timeout, Formatter *f, Context *on_finish);
523
7c673cae
FG
524 // FIXME the state machine logic should be separable from the dispatch
525 // logic that calls it.
526 // >>>
527 void calc_recovery_set();
528 void request_state(MDSMap::DaemonState s);
529
7c673cae
FG
530 void boot_create(); // i am new mds.
531 void boot_start(BootStep step=MDS_BOOT_INITIAL, int r=0); // starting|replay
532
533 void replay_start();
534 void creating_done();
535 void starting_done();
536 void replay_done();
537 void standby_replay_restart();
538 void _standby_replay_restart_finish(int r, uint64_t old_read_pos);
7c673cae
FG
539
540 void reopen_log();
541
542 void resolve_start();
543 void resolve_done();
544 void reconnect_start();
545 void reconnect_done();
546 void rejoin_joint_start();
547 void rejoin_start();
548 void rejoin_done();
549 void recovery_done(int oldstate);
550 void clientreplay_start();
551 void clientreplay_done();
552 void active_start();
553 void stopping_start();
554 void stopping_done();
555
556 void validate_sessions();
9f95a23c 557
7c673cae
FG
558 void handle_mds_recovery(mds_rank_t who);
559 void handle_mds_failure(mds_rank_t who);
7c673cae
FG
560
561 /* Update MDSMap export_targets for this rank. Called on ::tick(). */
11fdf7f2 562 void update_targets();
94b18763 563
11fdf7f2
TL
564 void _mon_command_finish(int r, std::string_view cmd, std::string_view outs);
565 void set_mdsmap_multimds_snaps_allowed();
9f95a23c
TL
566
567 Context *create_async_exec_context(C_ExecAndReply *ctx);
568
a4b75251
TL
569 // blocklist the provided addrs and set OSD epoch barrier
570 // with the provided epoch.
571 void apply_blocklist(const std::set<entity_addr_t> &addrs, epoch_t epoch);
572
1e59de90
TL
573 void reset_event_flags();
574
9f95a23c
TL
575 // Incarnation as seen in MDSMap at the point where a rank is
576 // assigned.
577 int incarnation = 0;
578
579 // Flag to indicate we entered shutdown: anyone seeing this to be true
580 // after taking mds_lock must drop out.
581 bool stopping = false;
582
583 // PurgeQueue is only used by StrayManager, but it is owned by MDSRank
584 // because its init/shutdown happens at the top level.
585 PurgeQueue purge_queue;
586
f67539c2
TL
587 MetricsHandler metrics_handler;
588 std::unique_ptr<MetricAggregator> metric_aggregator;
589
20effc67 590 std::list<cref_t<Message>> waiting_for_nolaggy;
9f95a23c
TL
591 MDSContext::que finished_queue;
592 // Dispatch, retry, queues
593 int dispatch_depth = 0;
594
595 ceph::heartbeat_handle_d *hb = nullptr; // Heartbeat for threads using mds_lock
f67539c2 596 double heartbeat_grace;
33c7a0ef 597 int _heartbeat_reset_grace;
9f95a23c 598
20effc67 599 std::map<mds_rank_t, version_t> peer_mdsmap_epoch;
9f95a23c
TL
600
601 ceph_tid_t last_tid = 0; // for mds-initiated requests (e.g. stray rename)
602
603 MDSContext::vec waiting_for_active, waiting_for_replay, waiting_for_rejoin,
604 waiting_for_reconnect, waiting_for_resolve;
605 MDSContext::vec waiting_for_any_client_connection;
606 MDSContext::que replay_queue;
607 bool replaying_requests_done = false;
608
20effc67
TL
609 std::map<mds_rank_t, MDSContext::vec> waiting_for_active_peer;
610 std::map<mds_rank_t, MDSContext::vec> waiting_for_bootstrapping_peer;
611 std::map<epoch_t, MDSContext::vec> waiting_for_mdsmap;
9f95a23c
TL
612
613 epoch_t osd_epoch_barrier = 0;
614
615 // Const reference to the beacon so that we can behave differently
616 // when it's laggy.
617 Beacon &beacon;
618
619 int mds_slow_req_count = 0;
620
20effc67 621 std::map<mds_rank_t,DecayCounter> export_targets; /* targets this MDS is exporting to or wants/tries to */
9f95a23c
TL
622
623 Messenger *messenger;
624 MonClient *monc;
625 MgrClient *mgrc;
626
627 Context *respawn_hook;
628 Context *suicide_hook;
629
630 bool standby_replaying = false; // true if current replay pass is in standby-replay mode
1e59de90
TL
631 uint64_t extraordinary_events_dump_interval = 0;
632 double inject_journal_corrupt_dentry_first = 0.0;
94b18763 633private:
f91f0fd5
TL
634 bool send_status = true;
635
b3b6e05e
TL
636 // The metadata pool won't change in the whole life time of the fs,
637 // with this we can get rid of the mds_lock in many places too.
638 int64_t metadata_pool = -1;
639
9f95a23c
TL
640 // "task" string that gets displayed in ceph status
641 inline static const std::string SCRUB_STATUS_KEY = "scrub status";
11fdf7f2 642
1e59de90
TL
643 bool client_eviction_dump = false;
644
9f95a23c
TL
645 void get_task_status(std::map<std::string, std::string> *status);
646 void schedule_update_timer_task();
647 void send_task_status();
648
1e59de90 649 void inmemory_logger();
f67539c2
TL
650 bool is_rank0() const {
651 return whoami == (mds_rank_t)0;
652 }
653
9f95a23c 654 mono_time starttime = mono_clock::zero();
f67539c2 655 boost::asio::io_context& ioc;
7c673cae
FG
656};
657
7c673cae 658class C_MDS_RetryMessage : public MDSInternalContext {
7c673cae 659public:
9f95a23c 660 C_MDS_RetryMessage(MDSRank *mds, const cref_t<Message> &m)
11fdf7f2 661 : MDSInternalContext(mds), m(m) {}
7c673cae 662 void finish(int r) override {
11fdf7f2
TL
663 get_mds()->retry_dispatch(m);
664 }
665protected:
9f95a23c 666 cref_t<Message> m;
11fdf7f2
TL
667};
668
669class CF_MDS_RetryMessageFactory : public MDSContextFactory {
670public:
9f95a23c 671 CF_MDS_RetryMessageFactory(MDSRank *mds, const cref_t<Message> &m)
11fdf7f2
TL
672 : mds(mds), m(m) {}
673
674 MDSContext *build() {
675 return new C_MDS_RetryMessage(mds, m);
7c673cae 676 }
11fdf7f2
TL
677private:
678 MDSRank *mds;
9f95a23c 679 cref_t<Message> m;
7c673cae
FG
680};
681
682/**
683 * The aspect of MDSRank exposed to MDSDaemon but not subsystems: i.e.
684 * the service/dispatcher stuff like init/shutdown that subsystems should
685 * never touch.
686 */
92f5a8d4 687class MDSRankDispatcher : public MDSRank, public md_config_obs_t
7c673cae
FG
688{
689public:
9f95a23c
TL
690 MDSRankDispatcher(
691 mds_rank_t whoami_,
a4b75251 692 ceph::fair_mutex &mds_lock_,
9f95a23c 693 LogChannelRef &clog_,
a4b75251 694 CommonSafeTimer<ceph::fair_mutex> &timer_,
9f95a23c
TL
695 Beacon &beacon_,
696 std::unique_ptr<MDSMap> &mdsmap_,
697 Messenger *msgr,
698 MonClient *monc_,
699 MgrClient *mgrc,
700 Context *respawn_hook_,
f67539c2
TL
701 Context *suicide_hook_,
702 boost::asio::io_context& ioc);
9f95a23c 703
7c673cae
FG
704 void init();
705 void tick();
706 void shutdown();
9f95a23c
TL
707 void handle_asok_command(
708 std::string_view command,
709 const cmdmap_t& cmdmap,
710 Formatter *f,
711 const bufferlist &inbl,
712 std::function<void(int,const std::string&,bufferlist&)> on_finish);
713 void handle_mds_map(const cref_t<MMDSMap> &m, const MDSMap &oldmap);
7c673cae 714 void handle_osd_map();
7c673cae
FG
715 void update_log_config();
716
92f5a8d4
TL
717 const char** get_tracked_conf_keys() const override final;
718 void handle_conf_change(const ConfigProxy& conf, const std::set<std::string>& changed) override;
719
adb31ebb 720 void dump_sessions(const SessionFilter &filter, Formatter *f, bool cap_dump=false) const;
9f95a23c
TL
721 void evict_clients(const SessionFilter &filter,
722 std::function<void(int,const std::string&,bufferlist&)> on_finish);
7c673cae
FG
723
724 // Call into me from MDS::ms_dispatch
9f95a23c 725 bool ms_dispatch(const cref_t<Message> &m);
7c673cae
FG
726};
727
7c673cae 728#endif // MDS_RANK_H_