]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2015 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef MDS_RANK_H_ | |
16 | #define MDS_RANK_H_ | |
17 | ||
11fdf7f2 | 18 | #include <string_view> |
94b18763 | 19 | |
f67539c2 TL |
20 | #include <boost/asio/io_context.hpp> |
21 | ||
7c673cae FG |
22 | #include "common/DecayCounter.h" |
23 | #include "common/LogClient.h" | |
24 | #include "common/Timer.h" | |
a4b75251 | 25 | #include "common/fair_mutex.h" |
7c673cae | 26 | #include "common/TrackedOp.h" |
a4b75251 | 27 | #include "common/ceph_mutex.h" |
7c673cae | 28 | |
9f95a23c TL |
29 | #include "include/common_fwd.h" |
30 | ||
11fdf7f2 | 31 | #include "messages/MClientRequest.h" |
7c673cae | 32 | #include "messages/MCommand.h" |
11fdf7f2 | 33 | #include "messages/MMDSMap.h" |
7c673cae FG |
34 | |
35 | #include "Beacon.h" | |
36 | #include "DamageTable.h" | |
37 | #include "MDSMap.h" | |
38 | #include "SessionMap.h" | |
39 | #include "MDCache.h" | |
7c673cae | 40 | #include "MDLog.h" |
11fdf7f2 | 41 | #include "MDSContext.h" |
7c673cae | 42 | #include "PurgeQueue.h" |
91327a77 | 43 | #include "Server.h" |
f67539c2 | 44 | #include "MetricsHandler.h" |
7c673cae FG |
45 | #include "osdc/Journaler.h" |
46 | ||
47 | // Full .h import instead of forward declaration for PerfCounter, for the | |
48 | // benefit of those including this header and using MDSRank::logger | |
49 | #include "common/perf_counters.h" | |
50 | ||
51 | enum { | |
52 | l_mds_first = 2000, | |
53 | l_mds_request, | |
54 | l_mds_reply, | |
55 | l_mds_reply_latency, | |
33c7a0ef | 56 | l_mds_slow_reply, |
7c673cae | 57 | l_mds_forward, |
1e59de90 TL |
58 | l_mds_dir_fetch_complete, |
59 | l_mds_dir_fetch_keys, | |
7c673cae FG |
60 | l_mds_dir_commit, |
61 | l_mds_dir_split, | |
62 | l_mds_dir_merge, | |
7c673cae FG |
63 | l_mds_inodes, |
64 | l_mds_inodes_top, | |
65 | l_mds_inodes_bottom, | |
66 | l_mds_inodes_pin_tail, | |
67 | l_mds_inodes_pinned, | |
68 | l_mds_inodes_expired, | |
69 | l_mds_inodes_with_caps, | |
70 | l_mds_caps, | |
71 | l_mds_subtrees, | |
72 | l_mds_traverse, | |
73 | l_mds_traverse_hit, | |
74 | l_mds_traverse_forward, | |
75 | l_mds_traverse_discover, | |
76 | l_mds_traverse_dir_fetch, | |
77 | l_mds_traverse_remote_ino, | |
78 | l_mds_traverse_lock, | |
79 | l_mds_load_cent, | |
80 | l_mds_dispatch_queue_len, | |
81 | l_mds_exported, | |
82 | l_mds_exported_inodes, | |
83 | l_mds_imported, | |
84 | l_mds_imported_inodes, | |
11fdf7f2 TL |
85 | l_mds_openino_dir_fetch, |
86 | l_mds_openino_backtrace_fetch, | |
87 | l_mds_openino_peer_discover, | |
eafe8130 TL |
88 | l_mds_root_rfiles, |
89 | l_mds_root_rbytes, | |
90 | l_mds_root_rsnaps, | |
f67539c2 TL |
91 | l_mds_scrub_backtrace_fetch, |
92 | l_mds_scrub_set_tag, | |
93 | l_mds_scrub_backtrace_repaired, | |
94 | l_mds_scrub_inotable_repaired, | |
95 | l_mds_scrub_dir_inodes, | |
96 | l_mds_scrub_dir_base_inodes, | |
97 | l_mds_scrub_dirfrag_rstats, | |
98 | l_mds_scrub_file_inodes, | |
99 | l_mdss_handle_inode_file_caps, | |
100 | l_mdss_ceph_cap_op_revoke, | |
101 | l_mdss_ceph_cap_op_grant, | |
102 | l_mdss_ceph_cap_op_trunc, | |
103 | l_mdss_ceph_cap_op_flushsnap_ack, | |
104 | l_mdss_ceph_cap_op_flush_ack, | |
105 | l_mdss_handle_client_caps, | |
106 | l_mdss_handle_client_caps_dirty, | |
107 | l_mdss_handle_client_cap_release, | |
108 | l_mdss_process_request_cap_release, | |
7c673cae FG |
109 | l_mds_last, |
110 | }; | |
111 | ||
112 | // memory utilization | |
113 | enum { | |
114 | l_mdm_first = 2500, | |
115 | l_mdm_ino, | |
116 | l_mdm_inoa, | |
117 | l_mdm_inos, | |
118 | l_mdm_dir, | |
119 | l_mdm_dira, | |
120 | l_mdm_dirs, | |
121 | l_mdm_dn, | |
122 | l_mdm_dna, | |
123 | l_mdm_dns, | |
124 | l_mdm_cap, | |
125 | l_mdm_capa, | |
126 | l_mdm_caps, | |
127 | l_mdm_rss, | |
128 | l_mdm_heap, | |
7c673cae FG |
129 | l_mdm_last, |
130 | }; | |
131 | ||
132 | namespace ceph { | |
133 | struct heartbeat_handle_d; | |
134 | } | |
135 | ||
7c673cae FG |
136 | class Locker; |
137 | class MDCache; | |
138 | class MDLog; | |
139 | class MDBalancer; | |
140 | class InoTable; | |
141 | class SnapServer; | |
142 | class SnapClient; | |
143 | class MDSTableServer; | |
144 | class MDSTableClient; | |
145 | class Messenger; | |
f67539c2 | 146 | class MetricAggregator; |
7c673cae FG |
147 | class Objecter; |
148 | class MonClient; | |
9f95a23c | 149 | class MgrClient; |
7c673cae | 150 | class Finisher; |
7c673cae | 151 | class ScrubStack; |
11fdf7f2 | 152 | class C_ExecAndReply; |
7c673cae FG |
153 | |
154 | /** | |
155 | * The public part of this class's interface is what's exposed to all | |
156 | * the various subsystems (server, mdcache, etc), such as pointers | |
157 | * to the other subsystems, and message-sending calls. | |
158 | */ | |
159 | class MDSRank { | |
7c673cae | 160 | public: |
f64942e4 AA |
161 | friend class C_Flush_Journal; |
162 | friend class C_Drop_Cache; | |
11fdf7f2 TL |
163 | friend class C_CacheDropExecAndReply; |
164 | friend class C_ScrubExecAndReply; | |
165 | friend class C_ScrubControlExecAndReply; | |
166 | ||
9f95a23c TL |
167 | CephContext *cct; |
168 | ||
169 | MDSRank( | |
170 | mds_rank_t whoami_, | |
a4b75251 | 171 | ceph::fair_mutex &mds_lock_, |
9f95a23c | 172 | LogChannelRef &clog_, |
a4b75251 | 173 | CommonSafeTimer<ceph::fair_mutex> &timer_, |
9f95a23c TL |
174 | Beacon &beacon_, |
175 | std::unique_ptr<MDSMap> & mdsmap_, | |
176 | Messenger *msgr, | |
177 | MonClient *monc_, | |
178 | MgrClient *mgrc, | |
179 | Context *respawn_hook_, | |
f67539c2 TL |
180 | Context *suicide_hook_, |
181 | boost::asio::io_context& ioc); | |
9f95a23c | 182 | |
7c673cae | 183 | mds_rank_t get_nodeid() const { return whoami; } |
b3b6e05e TL |
184 | int64_t get_metadata_pool() const |
185 | { | |
186 | return metadata_pool; | |
187 | } | |
7c673cae | 188 | |
94b18763 FG |
189 | mono_time get_starttime() const { |
190 | return starttime; | |
191 | } | |
20effc67 | 192 | std::chrono::duration<double> get_uptime() const { |
94b18763 | 193 | mono_time now = mono_clock::now(); |
20effc67 | 194 | return std::chrono::duration<double>(now-starttime); |
94b18763 FG |
195 | } |
196 | ||
7c673cae FG |
197 | bool is_daemon_stopping() const; |
198 | ||
7c673cae FG |
199 | MDSTableClient *get_table_client(int t); |
200 | MDSTableServer *get_table_server(int t); | |
201 | ||
7c673cae FG |
202 | Session *get_session(client_t client) { |
203 | return sessionmap.get_session(entity_name_t::CLIENT(client.v)); | |
204 | } | |
9f95a23c | 205 | Session *get_session(const cref_t<Message> &m); |
7c673cae FG |
206 | |
207 | MDSMap::DaemonState get_state() const { return state; } | |
208 | MDSMap::DaemonState get_want_state() const { return beacon.get_want_state(); } | |
209 | ||
210 | bool is_creating() const { return state == MDSMap::STATE_CREATING; } | |
211 | bool is_starting() const { return state == MDSMap::STATE_STARTING; } | |
212 | bool is_standby() const { return state == MDSMap::STATE_STANDBY; } | |
213 | bool is_replay() const { return state == MDSMap::STATE_REPLAY; } | |
214 | bool is_standby_replay() const { return state == MDSMap::STATE_STANDBY_REPLAY; } | |
215 | bool is_resolve() const { return state == MDSMap::STATE_RESOLVE; } | |
216 | bool is_reconnect() const { return state == MDSMap::STATE_RECONNECT; } | |
217 | bool is_rejoin() const { return state == MDSMap::STATE_REJOIN; } | |
218 | bool is_clientreplay() const { return state == MDSMap::STATE_CLIENTREPLAY; } | |
219 | bool is_active() const { return state == MDSMap::STATE_ACTIVE; } | |
220 | bool is_stopping() const { return state == MDSMap::STATE_STOPPING; } | |
221 | bool is_any_replay() const { return (is_replay() || is_standby_replay()); } | |
222 | bool is_stopped() const { return mdsmap->is_stopped(whoami); } | |
223 | bool is_cluster_degraded() const { return cluster_degraded; } | |
11fdf7f2 | 224 | bool allows_multimds_snaps() const { return mdsmap->allows_multimds_snaps(); } |
7c673cae | 225 | |
eafe8130 | 226 | bool is_cache_trimmable() const { |
b3b6e05e | 227 | return is_standby_replay() || is_clientreplay() || is_active() || is_stopping(); |
eafe8130 TL |
228 | } |
229 | ||
7c673cae | 230 | void handle_write_error(int err); |
f67539c2 | 231 | void handle_write_error_with_lock(int err); |
7c673cae | 232 | |
c07f9fc5 | 233 | void update_mlogger(); |
7c673cae | 234 | |
11fdf7f2 | 235 | void queue_waiter(MDSContext *c) { |
91327a77 AA |
236 | finished_queue.push_back(c); |
237 | progress_thread.signal(); | |
238 | } | |
494da23a TL |
239 | void queue_waiter_front(MDSContext *c) { |
240 | finished_queue.push_front(c); | |
241 | progress_thread.signal(); | |
242 | } | |
11fdf7f2 TL |
243 | void queue_waiters(MDSContext::vec& ls) { |
244 | MDSContext::vec v; | |
245 | v.swap(ls); | |
246 | std::copy(v.begin(), v.end(), std::back_inserter(finished_queue)); | |
7c673cae FG |
247 | progress_thread.signal(); |
248 | } | |
11fdf7f2 TL |
249 | void queue_waiters_front(MDSContext::vec& ls) { |
250 | MDSContext::vec v; | |
251 | v.swap(ls); | |
252 | std::copy(v.rbegin(), v.rend(), std::front_inserter(finished_queue)); | |
91327a77 AA |
253 | progress_thread.signal(); |
254 | } | |
7c673cae | 255 | |
7c673cae FG |
256 | // Daemon lifetime functions: these guys break the abstraction |
257 | // and call up into the parent MDSDaemon instance. It's kind | |
258 | // of unavoidable: if we want any depth into our calls | |
259 | // to be able to e.g. tear down the whole process, we have to | |
260 | // have a reference going all the way down. | |
261 | // >>> | |
262 | void suicide(); | |
263 | void respawn(); | |
264 | // <<< | |
265 | ||
266 | /** | |
267 | * Call this periodically if inside a potentially long running piece | |
268 | * of code while holding the mds_lock | |
269 | */ | |
270 | void heartbeat_reset(); | |
33c7a0ef TL |
271 | int heartbeat_reset_grace(int count=1) { |
272 | return count * _heartbeat_reset_grace; | |
273 | } | |
7c673cae | 274 | |
aee94f69 TL |
275 | /** |
276 | * Abort the MDS and flush any clog messages. | |
277 | * | |
278 | * Callers must already hold mds_lock. | |
279 | */ | |
280 | void abort(std::string_view msg); | |
281 | ||
7c673cae FG |
282 | /** |
283 | * Report state DAMAGED to the mon, and then pass on to respawn(). Call | |
284 | * this when an unrecoverable error is encountered while attempting | |
285 | * to load an MDS rank's data structures. This is *not* for use with | |
286 | * errors affecting normal dirfrag/inode objects -- they should be handled | |
287 | * through cleaner scrub/repair mechanisms. | |
288 | * | |
289 | * Callers must already hold mds_lock. | |
290 | */ | |
291 | void damaged(); | |
292 | ||
293 | /** | |
294 | * Wrapper around `damaged` for users who are not | |
295 | * already holding mds_lock. | |
296 | * | |
297 | * Callers must not already hold mds_lock. | |
298 | */ | |
299 | void damaged_unlocked(); | |
300 | ||
91327a77 AA |
301 | double last_cleared_laggy() const { |
302 | return beacon.last_cleared_laggy(); | |
303 | } | |
304 | ||
305 | double get_dispatch_queue_max_age(utime_t now) const; | |
7c673cae | 306 | |
9f95a23c | 307 | void send_message_mds(const ref_t<Message>& m, mds_rank_t mds); |
f67539c2 | 308 | void send_message_mds(const ref_t<Message>& m, const entity_addrvec_t &addr); |
aee94f69 | 309 | void forward_message_mds(MDRequestRef& mdr, mds_rank_t mds); |
9f95a23c TL |
310 | void send_message_client_counted(const ref_t<Message>& m, client_t client); |
311 | void send_message_client_counted(const ref_t<Message>& m, Session* session); | |
312 | void send_message_client_counted(const ref_t<Message>& m, const ConnectionRef& connection); | |
313 | void send_message_client(const ref_t<Message>& m, Session* session); | |
314 | void send_message(const ref_t<Message>& m, const ConnectionRef& c); | |
7c673cae | 315 | |
20effc67 TL |
316 | void wait_for_bootstrapped_peer(mds_rank_t who, MDSContext *c) { |
317 | waiting_for_bootstrapping_peer[who].push_back(c); | |
318 | } | |
11fdf7f2 | 319 | void wait_for_active_peer(mds_rank_t who, MDSContext *c) { |
7c673cae FG |
320 | waiting_for_active_peer[who].push_back(c); |
321 | } | |
11fdf7f2 TL |
322 | void wait_for_cluster_recovered(MDSContext *c) { |
323 | ceph_assert(cluster_degraded); | |
7c673cae FG |
324 | waiting_for_active_peer[MDS_RANK_NONE].push_back(c); |
325 | } | |
326 | ||
11fdf7f2 | 327 | void wait_for_any_client_connection(MDSContext *c) { |
28e407b8 AA |
328 | waiting_for_any_client_connection.push_back(c); |
329 | } | |
330 | void kick_waiters_for_any_client_connection(void) { | |
331 | finish_contexts(g_ceph_context, waiting_for_any_client_connection); | |
332 | } | |
11fdf7f2 | 333 | void wait_for_active(MDSContext *c) { |
7c673cae FG |
334 | waiting_for_active.push_back(c); |
335 | } | |
11fdf7f2 | 336 | void wait_for_replay(MDSContext *c) { |
7c673cae FG |
337 | waiting_for_replay.push_back(c); |
338 | } | |
11fdf7f2 | 339 | void wait_for_rejoin(MDSContext *c) { |
a8e16298 TL |
340 | waiting_for_rejoin.push_back(c); |
341 | } | |
11fdf7f2 | 342 | void wait_for_reconnect(MDSContext *c) { |
7c673cae FG |
343 | waiting_for_reconnect.push_back(c); |
344 | } | |
11fdf7f2 | 345 | void wait_for_resolve(MDSContext *c) { |
7c673cae FG |
346 | waiting_for_resolve.push_back(c); |
347 | } | |
11fdf7f2 | 348 | void wait_for_mdsmap(epoch_t e, MDSContext *c) { |
7c673cae FG |
349 | waiting_for_mdsmap[e].push_back(c); |
350 | } | |
11fdf7f2 | 351 | void enqueue_replay(MDSContext *c) { |
7c673cae FG |
352 | replay_queue.push_back(c); |
353 | } | |
354 | ||
355 | bool queue_one_replay(); | |
11fdf7f2 | 356 | void maybe_clientreplay_done(); |
7c673cae FG |
357 | |
358 | void set_osd_epoch_barrier(epoch_t e); | |
359 | epoch_t get_osd_epoch_barrier() const {return osd_epoch_barrier;} | |
360 | epoch_t get_osd_epoch() const; | |
361 | ||
362 | ceph_tid_t issue_tid() { return ++last_tid; } | |
363 | ||
11fdf7f2 | 364 | MDSMap *get_mds_map() { return mdsmap.get(); } |
7c673cae | 365 | |
28e407b8 | 366 | uint64_t get_num_requests() const { return logger->get(l_mds_request); } |
7c673cae FG |
367 | |
368 | int get_mds_slow_req_count() const { return mds_slow_req_count; } | |
369 | ||
370 | void dump_status(Formatter *f) const; | |
371 | ||
11fdf7f2 | 372 | void hit_export_target(mds_rank_t rank, double amount=-1.0); |
7c673cae | 373 | bool is_export_target(mds_rank_t rank) { |
20effc67 | 374 | const std::set<mds_rank_t>& map_targets = mdsmap->get_mds_info(get_nodeid()).export_targets; |
7c673cae FG |
375 | return map_targets.count(rank); |
376 | } | |
377 | ||
f67539c2 | 378 | bool evict_client(int64_t session_id, bool wait, bool blocklist, |
11fdf7f2 | 379 | std::ostream& ss, Context *on_killed=nullptr); |
92f5a8d4 TL |
380 | int config_client(int64_t session_id, bool remove, |
381 | const std::string& option, const std::string& value, | |
382 | std::ostream& ss); | |
1e59de90 TL |
383 | void schedule_inmemory_logger(); |
384 | ||
385 | double get_inject_journal_corrupt_dentry_first() const { | |
386 | return inject_journal_corrupt_dentry_first; | |
387 | } | |
11fdf7f2 | 388 | |
9f95a23c TL |
389 | // Reference to global MDS::mds_lock, so that users of MDSRank don't |
390 | // carry around references to the outer MDS, and we can substitute | |
391 | // a separate lock here in future potentially. | |
a4b75251 | 392 | ceph::fair_mutex &mds_lock; |
9f95a23c TL |
393 | |
394 | // Reference to global cluster log client, just to avoid initialising | |
395 | // a separate one here. | |
396 | LogChannelRef &clog; | |
397 | ||
398 | // Reference to global timer utility, because MDSRank and MDSDaemon | |
399 | // currently both use the same mds_lock, so it makes sense for them | |
400 | // to share a timer. | |
a4b75251 | 401 | CommonSafeTimer<ceph::fair_mutex> &timer; |
9f95a23c TL |
402 | |
403 | std::unique_ptr<MDSMap> &mdsmap; /* MDSDaemon::mdsmap */ | |
404 | ||
405 | Objecter *objecter; | |
406 | ||
407 | // sub systems | |
408 | Server *server = nullptr; | |
409 | MDCache *mdcache = nullptr; | |
410 | Locker *locker = nullptr; | |
411 | MDLog *mdlog = nullptr; | |
412 | MDBalancer *balancer = nullptr; | |
413 | ScrubStack *scrubstack = nullptr; | |
414 | DamageTable damage_table; | |
415 | ||
416 | InoTable *inotable = nullptr; | |
417 | ||
418 | SnapServer *snapserver = nullptr; | |
419 | SnapClient *snapclient = nullptr; | |
420 | ||
421 | SessionMap sessionmap; | |
422 | ||
423 | PerfCounters *logger = nullptr, *mlogger = nullptr; | |
424 | OpTracker op_tracker; | |
425 | ||
426 | // The last different state I held before current | |
427 | MDSMap::DaemonState last_state = MDSMap::STATE_BOOT; | |
428 | // The state assigned to me by the MDSMap | |
2a845540 | 429 | MDSMap::DaemonState state = MDSMap::STATE_STANDBY; |
9f95a23c TL |
430 | |
431 | bool cluster_degraded = false; | |
432 | ||
433 | Finisher *finisher; | |
7c673cae | 434 | protected: |
9f95a23c TL |
435 | typedef enum { |
436 | // The MDSMap is available, configure default layouts and structures | |
437 | MDS_BOOT_INITIAL = 0, | |
438 | // We are ready to open some inodes | |
439 | MDS_BOOT_OPEN_ROOT, | |
440 | // We are ready to do a replay if needed | |
441 | MDS_BOOT_PREPARE_LOG, | |
442 | // Replay is complete | |
443 | MDS_BOOT_REPLAY_DONE | |
444 | } BootStep; | |
445 | ||
446 | class ProgressThread : public Thread { | |
447 | public: | |
448 | explicit ProgressThread(MDSRank *mds_) : mds(mds_) {} | |
449 | void * entry() override; | |
450 | void shutdown(); | |
451 | void signal() {cond.notify_all();} | |
452 | private: | |
453 | MDSRank *mds; | |
a4b75251 | 454 | std::condition_variable_any cond; |
9f95a23c TL |
455 | } progress_thread; |
456 | ||
457 | class C_MDS_StandbyReplayRestart; | |
458 | class C_MDS_StandbyReplayRestartFinish; | |
459 | // Friended to access retry_dispatch | |
460 | friend class C_MDS_RetryMessage; | |
461 | friend class C_MDS_BootStart; | |
462 | friend class C_MDS_InternalBootStart; | |
463 | friend class C_MDS_MonCommand; | |
464 | ||
465 | const mds_rank_t whoami; | |
466 | ||
467 | ~MDSRank(); | |
468 | ||
469 | void inc_dispatch_depth() { ++dispatch_depth; } | |
470 | void dec_dispatch_depth() { --dispatch_depth; } | |
471 | void retry_dispatch(const cref_t<Message> &m); | |
f6b5b4d7 TL |
472 | bool is_valid_message(const cref_t<Message> &m); |
473 | void handle_message(const cref_t<Message> &m); | |
9f95a23c TL |
474 | void _advance_queues(); |
475 | bool _dispatch(const cref_t<Message> &m, bool new_msg); | |
476 | bool is_stale_message(const cref_t<Message> &m) const; | |
477 | ||
478 | /** | |
479 | * Emit clog warnings for any ops reported as warnings by optracker | |
480 | */ | |
481 | void check_ops_in_flight(); | |
482 | ||
483 | /** | |
484 | * Share MDSMap with clients | |
485 | */ | |
9f95a23c TL |
486 | void create_logger(); |
487 | ||
7c673cae | 488 | void dump_clientreplay_status(Formatter *f) const; |
11fdf7f2 TL |
489 | void command_scrub_start(Formatter *f, |
490 | std::string_view path, std::string_view tag, | |
20effc67 | 491 | const std::vector<std::string>& scrubop_vec, Context *on_finish); |
11fdf7f2 TL |
492 | void command_tag_path(Formatter *f, std::string_view path, |
493 | std::string_view tag); | |
494 | // scrub control commands | |
495 | void command_scrub_abort(Formatter *f, Context *on_finish); | |
496 | void command_scrub_pause(Formatter *f, Context *on_finish); | |
497 | void command_scrub_resume(Formatter *f); | |
498 | void command_scrub_status(Formatter *f); | |
499 | ||
500 | void command_flush_path(Formatter *f, std::string_view path); | |
7c673cae FG |
501 | void command_flush_journal(Formatter *f); |
502 | void command_get_subtrees(Formatter *f); | |
503 | void command_export_dir(Formatter *f, | |
11fdf7f2 | 504 | std::string_view path, mds_rank_t dest); |
7c673cae FG |
505 | bool command_dirfrag_split( |
506 | cmdmap_t cmdmap, | |
507 | std::ostream &ss); | |
508 | bool command_dirfrag_merge( | |
509 | cmdmap_t cmdmap, | |
510 | std::ostream &ss); | |
511 | bool command_dirfrag_ls( | |
512 | cmdmap_t cmdmap, | |
513 | std::ostream &ss, | |
514 | Formatter *f); | |
11fdf7f2 | 515 | int _command_export_dir(std::string_view path, mds_rank_t dest); |
7c673cae FG |
516 | CDir *_command_dirfrag_get( |
517 | const cmdmap_t &cmdmap, | |
518 | std::ostream &ss); | |
11fdf7f2 TL |
519 | void command_openfiles_ls(Formatter *f); |
520 | void command_dump_tree(const cmdmap_t &cmdmap, std::ostream &ss, Formatter *f); | |
521 | void command_dump_inode(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss); | |
f64942e4 AA |
522 | void command_cache_drop(uint64_t timeout, Formatter *f, Context *on_finish); |
523 | ||
7c673cae FG |
524 | // FIXME the state machine logic should be separable from the dispatch |
525 | // logic that calls it. | |
526 | // >>> | |
527 | void calc_recovery_set(); | |
528 | void request_state(MDSMap::DaemonState s); | |
529 | ||
7c673cae FG |
530 | void boot_create(); // i am new mds. |
531 | void boot_start(BootStep step=MDS_BOOT_INITIAL, int r=0); // starting|replay | |
532 | ||
533 | void replay_start(); | |
534 | void creating_done(); | |
535 | void starting_done(); | |
536 | void replay_done(); | |
537 | void standby_replay_restart(); | |
538 | void _standby_replay_restart_finish(int r, uint64_t old_read_pos); | |
7c673cae FG |
539 | |
540 | void reopen_log(); | |
541 | ||
542 | void resolve_start(); | |
543 | void resolve_done(); | |
544 | void reconnect_start(); | |
545 | void reconnect_done(); | |
546 | void rejoin_joint_start(); | |
547 | void rejoin_start(); | |
548 | void rejoin_done(); | |
549 | void recovery_done(int oldstate); | |
550 | void clientreplay_start(); | |
551 | void clientreplay_done(); | |
552 | void active_start(); | |
553 | void stopping_start(); | |
554 | void stopping_done(); | |
555 | ||
556 | void validate_sessions(); | |
9f95a23c | 557 | |
7c673cae FG |
558 | void handle_mds_recovery(mds_rank_t who); |
559 | void handle_mds_failure(mds_rank_t who); | |
7c673cae FG |
560 | |
561 | /* Update MDSMap export_targets for this rank. Called on ::tick(). */ | |
11fdf7f2 | 562 | void update_targets(); |
94b18763 | 563 | |
11fdf7f2 TL |
564 | void _mon_command_finish(int r, std::string_view cmd, std::string_view outs); |
565 | void set_mdsmap_multimds_snaps_allowed(); | |
9f95a23c TL |
566 | |
567 | Context *create_async_exec_context(C_ExecAndReply *ctx); | |
568 | ||
a4b75251 TL |
569 | // blocklist the provided addrs and set OSD epoch barrier |
570 | // with the provided epoch. | |
571 | void apply_blocklist(const std::set<entity_addr_t> &addrs, epoch_t epoch); | |
572 | ||
1e59de90 TL |
573 | void reset_event_flags(); |
574 | ||
9f95a23c TL |
575 | // Incarnation as seen in MDSMap at the point where a rank is |
576 | // assigned. | |
577 | int incarnation = 0; | |
578 | ||
579 | // Flag to indicate we entered shutdown: anyone seeing this to be true | |
580 | // after taking mds_lock must drop out. | |
581 | bool stopping = false; | |
582 | ||
583 | // PurgeQueue is only used by StrayManager, but it is owned by MDSRank | |
584 | // because its init/shutdown happens at the top level. | |
585 | PurgeQueue purge_queue; | |
586 | ||
f67539c2 TL |
587 | MetricsHandler metrics_handler; |
588 | std::unique_ptr<MetricAggregator> metric_aggregator; | |
589 | ||
20effc67 | 590 | std::list<cref_t<Message>> waiting_for_nolaggy; |
9f95a23c TL |
591 | MDSContext::que finished_queue; |
592 | // Dispatch, retry, queues | |
593 | int dispatch_depth = 0; | |
594 | ||
595 | ceph::heartbeat_handle_d *hb = nullptr; // Heartbeat for threads using mds_lock | |
f67539c2 | 596 | double heartbeat_grace; |
33c7a0ef | 597 | int _heartbeat_reset_grace; |
9f95a23c | 598 | |
20effc67 | 599 | std::map<mds_rank_t, version_t> peer_mdsmap_epoch; |
9f95a23c TL |
600 | |
601 | ceph_tid_t last_tid = 0; // for mds-initiated requests (e.g. stray rename) | |
602 | ||
603 | MDSContext::vec waiting_for_active, waiting_for_replay, waiting_for_rejoin, | |
604 | waiting_for_reconnect, waiting_for_resolve; | |
605 | MDSContext::vec waiting_for_any_client_connection; | |
606 | MDSContext::que replay_queue; | |
607 | bool replaying_requests_done = false; | |
608 | ||
20effc67 TL |
609 | std::map<mds_rank_t, MDSContext::vec> waiting_for_active_peer; |
610 | std::map<mds_rank_t, MDSContext::vec> waiting_for_bootstrapping_peer; | |
611 | std::map<epoch_t, MDSContext::vec> waiting_for_mdsmap; | |
9f95a23c TL |
612 | |
613 | epoch_t osd_epoch_barrier = 0; | |
614 | ||
615 | // Const reference to the beacon so that we can behave differently | |
616 | // when it's laggy. | |
617 | Beacon &beacon; | |
618 | ||
619 | int mds_slow_req_count = 0; | |
620 | ||
20effc67 | 621 | std::map<mds_rank_t,DecayCounter> export_targets; /* targets this MDS is exporting to or wants/tries to */ |
9f95a23c TL |
622 | |
623 | Messenger *messenger; | |
624 | MonClient *monc; | |
625 | MgrClient *mgrc; | |
626 | ||
627 | Context *respawn_hook; | |
628 | Context *suicide_hook; | |
629 | ||
630 | bool standby_replaying = false; // true if current replay pass is in standby-replay mode | |
1e59de90 TL |
631 | uint64_t extraordinary_events_dump_interval = 0; |
632 | double inject_journal_corrupt_dentry_first = 0.0; | |
94b18763 | 633 | private: |
f91f0fd5 TL |
634 | bool send_status = true; |
635 | ||
b3b6e05e TL |
636 | // The metadata pool won't change in the whole life time of the fs, |
637 | // with this we can get rid of the mds_lock in many places too. | |
638 | int64_t metadata_pool = -1; | |
639 | ||
9f95a23c TL |
640 | // "task" string that gets displayed in ceph status |
641 | inline static const std::string SCRUB_STATUS_KEY = "scrub status"; | |
11fdf7f2 | 642 | |
1e59de90 TL |
643 | bool client_eviction_dump = false; |
644 | ||
9f95a23c TL |
645 | void get_task_status(std::map<std::string, std::string> *status); |
646 | void schedule_update_timer_task(); | |
647 | void send_task_status(); | |
648 | ||
1e59de90 | 649 | void inmemory_logger(); |
f67539c2 TL |
650 | bool is_rank0() const { |
651 | return whoami == (mds_rank_t)0; | |
652 | } | |
653 | ||
9f95a23c | 654 | mono_time starttime = mono_clock::zero(); |
f67539c2 | 655 | boost::asio::io_context& ioc; |
7c673cae FG |
656 | }; |
657 | ||
7c673cae | 658 | class C_MDS_RetryMessage : public MDSInternalContext { |
7c673cae | 659 | public: |
9f95a23c | 660 | C_MDS_RetryMessage(MDSRank *mds, const cref_t<Message> &m) |
11fdf7f2 | 661 | : MDSInternalContext(mds), m(m) {} |
7c673cae | 662 | void finish(int r) override { |
11fdf7f2 TL |
663 | get_mds()->retry_dispatch(m); |
664 | } | |
665 | protected: | |
9f95a23c | 666 | cref_t<Message> m; |
11fdf7f2 TL |
667 | }; |
668 | ||
669 | class CF_MDS_RetryMessageFactory : public MDSContextFactory { | |
670 | public: | |
9f95a23c | 671 | CF_MDS_RetryMessageFactory(MDSRank *mds, const cref_t<Message> &m) |
11fdf7f2 TL |
672 | : mds(mds), m(m) {} |
673 | ||
674 | MDSContext *build() { | |
675 | return new C_MDS_RetryMessage(mds, m); | |
7c673cae | 676 | } |
11fdf7f2 TL |
677 | private: |
678 | MDSRank *mds; | |
9f95a23c | 679 | cref_t<Message> m; |
7c673cae FG |
680 | }; |
681 | ||
682 | /** | |
683 | * The aspect of MDSRank exposed to MDSDaemon but not subsystems: i.e. | |
684 | * the service/dispatcher stuff like init/shutdown that subsystems should | |
685 | * never touch. | |
686 | */ | |
92f5a8d4 | 687 | class MDSRankDispatcher : public MDSRank, public md_config_obs_t |
7c673cae FG |
688 | { |
689 | public: | |
9f95a23c TL |
690 | MDSRankDispatcher( |
691 | mds_rank_t whoami_, | |
a4b75251 | 692 | ceph::fair_mutex &mds_lock_, |
9f95a23c | 693 | LogChannelRef &clog_, |
a4b75251 | 694 | CommonSafeTimer<ceph::fair_mutex> &timer_, |
9f95a23c TL |
695 | Beacon &beacon_, |
696 | std::unique_ptr<MDSMap> &mdsmap_, | |
697 | Messenger *msgr, | |
698 | MonClient *monc_, | |
699 | MgrClient *mgrc, | |
700 | Context *respawn_hook_, | |
f67539c2 TL |
701 | Context *suicide_hook_, |
702 | boost::asio::io_context& ioc); | |
9f95a23c | 703 | |
7c673cae FG |
704 | void init(); |
705 | void tick(); | |
706 | void shutdown(); | |
9f95a23c TL |
707 | void handle_asok_command( |
708 | std::string_view command, | |
709 | const cmdmap_t& cmdmap, | |
710 | Formatter *f, | |
711 | const bufferlist &inbl, | |
712 | std::function<void(int,const std::string&,bufferlist&)> on_finish); | |
713 | void handle_mds_map(const cref_t<MMDSMap> &m, const MDSMap &oldmap); | |
7c673cae | 714 | void handle_osd_map(); |
7c673cae FG |
715 | void update_log_config(); |
716 | ||
92f5a8d4 TL |
717 | const char** get_tracked_conf_keys() const override final; |
718 | void handle_conf_change(const ConfigProxy& conf, const std::set<std::string>& changed) override; | |
719 | ||
adb31ebb | 720 | void dump_sessions(const SessionFilter &filter, Formatter *f, bool cap_dump=false) const; |
9f95a23c TL |
721 | void evict_clients(const SessionFilter &filter, |
722 | std::function<void(int,const std::string&,bufferlist&)> on_finish); | |
7c673cae FG |
723 | |
724 | // Call into me from MDS::ms_dispatch | |
9f95a23c | 725 | bool ms_dispatch(const cref_t<Message> &m); |
7c673cae FG |
726 | }; |
727 | ||
7c673cae | 728 | #endif // MDS_RANK_H_ |