]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2015 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef MDS_RANK_H_ | |
16 | #define MDS_RANK_H_ | |
17 | ||
11fdf7f2 | 18 | #include <string_view> |
94b18763 | 19 | |
f67539c2 TL |
20 | #include <boost/asio/io_context.hpp> |
21 | ||
7c673cae FG |
22 | #include "common/DecayCounter.h" |
23 | #include "common/LogClient.h" | |
24 | #include "common/Timer.h" | |
25 | #include "common/TrackedOp.h" | |
26 | ||
9f95a23c TL |
27 | #include "include/common_fwd.h" |
28 | ||
11fdf7f2 | 29 | #include "messages/MClientRequest.h" |
7c673cae | 30 | #include "messages/MCommand.h" |
11fdf7f2 | 31 | #include "messages/MMDSMap.h" |
7c673cae FG |
32 | |
33 | #include "Beacon.h" | |
34 | #include "DamageTable.h" | |
35 | #include "MDSMap.h" | |
36 | #include "SessionMap.h" | |
37 | #include "MDCache.h" | |
7c673cae | 38 | #include "MDLog.h" |
11fdf7f2 | 39 | #include "MDSContext.h" |
7c673cae | 40 | #include "PurgeQueue.h" |
91327a77 | 41 | #include "Server.h" |
f67539c2 | 42 | #include "MetricsHandler.h" |
7c673cae FG |
43 | #include "osdc/Journaler.h" |
44 | ||
45 | // Full .h import instead of forward declaration for PerfCounter, for the | |
46 | // benefit of those including this header and using MDSRank::logger | |
47 | #include "common/perf_counters.h" | |
48 | ||
49 | enum { | |
50 | l_mds_first = 2000, | |
51 | l_mds_request, | |
52 | l_mds_reply, | |
53 | l_mds_reply_latency, | |
54 | l_mds_forward, | |
55 | l_mds_dir_fetch, | |
56 | l_mds_dir_commit, | |
57 | l_mds_dir_split, | |
58 | l_mds_dir_merge, | |
7c673cae FG |
59 | l_mds_inodes, |
60 | l_mds_inodes_top, | |
61 | l_mds_inodes_bottom, | |
62 | l_mds_inodes_pin_tail, | |
63 | l_mds_inodes_pinned, | |
64 | l_mds_inodes_expired, | |
65 | l_mds_inodes_with_caps, | |
66 | l_mds_caps, | |
67 | l_mds_subtrees, | |
68 | l_mds_traverse, | |
69 | l_mds_traverse_hit, | |
70 | l_mds_traverse_forward, | |
71 | l_mds_traverse_discover, | |
72 | l_mds_traverse_dir_fetch, | |
73 | l_mds_traverse_remote_ino, | |
74 | l_mds_traverse_lock, | |
75 | l_mds_load_cent, | |
76 | l_mds_dispatch_queue_len, | |
77 | l_mds_exported, | |
78 | l_mds_exported_inodes, | |
79 | l_mds_imported, | |
80 | l_mds_imported_inodes, | |
11fdf7f2 TL |
81 | l_mds_openino_dir_fetch, |
82 | l_mds_openino_backtrace_fetch, | |
83 | l_mds_openino_peer_discover, | |
eafe8130 TL |
84 | l_mds_root_rfiles, |
85 | l_mds_root_rbytes, | |
86 | l_mds_root_rsnaps, | |
f67539c2 TL |
87 | l_mds_scrub_backtrace_fetch, |
88 | l_mds_scrub_set_tag, | |
89 | l_mds_scrub_backtrace_repaired, | |
90 | l_mds_scrub_inotable_repaired, | |
91 | l_mds_scrub_dir_inodes, | |
92 | l_mds_scrub_dir_base_inodes, | |
93 | l_mds_scrub_dirfrag_rstats, | |
94 | l_mds_scrub_file_inodes, | |
95 | l_mdss_handle_inode_file_caps, | |
96 | l_mdss_ceph_cap_op_revoke, | |
97 | l_mdss_ceph_cap_op_grant, | |
98 | l_mdss_ceph_cap_op_trunc, | |
99 | l_mdss_ceph_cap_op_flushsnap_ack, | |
100 | l_mdss_ceph_cap_op_flush_ack, | |
101 | l_mdss_handle_client_caps, | |
102 | l_mdss_handle_client_caps_dirty, | |
103 | l_mdss_handle_client_cap_release, | |
104 | l_mdss_process_request_cap_release, | |
7c673cae FG |
105 | l_mds_last, |
106 | }; | |
107 | ||
108 | // memory utilization | |
109 | enum { | |
110 | l_mdm_first = 2500, | |
111 | l_mdm_ino, | |
112 | l_mdm_inoa, | |
113 | l_mdm_inos, | |
114 | l_mdm_dir, | |
115 | l_mdm_dira, | |
116 | l_mdm_dirs, | |
117 | l_mdm_dn, | |
118 | l_mdm_dna, | |
119 | l_mdm_dns, | |
120 | l_mdm_cap, | |
121 | l_mdm_capa, | |
122 | l_mdm_caps, | |
123 | l_mdm_rss, | |
124 | l_mdm_heap, | |
7c673cae FG |
125 | l_mdm_last, |
126 | }; | |
127 | ||
128 | namespace ceph { | |
129 | struct heartbeat_handle_d; | |
130 | } | |
131 | ||
7c673cae FG |
132 | class Locker; |
133 | class MDCache; | |
134 | class MDLog; | |
135 | class MDBalancer; | |
136 | class InoTable; | |
137 | class SnapServer; | |
138 | class SnapClient; | |
139 | class MDSTableServer; | |
140 | class MDSTableClient; | |
141 | class Messenger; | |
f67539c2 | 142 | class MetricAggregator; |
7c673cae FG |
143 | class Objecter; |
144 | class MonClient; | |
9f95a23c | 145 | class MgrClient; |
7c673cae | 146 | class Finisher; |
7c673cae | 147 | class ScrubStack; |
11fdf7f2 | 148 | class C_ExecAndReply; |
7c673cae FG |
149 | |
150 | /** | |
151 | * The public part of this class's interface is what's exposed to all | |
152 | * the various subsystems (server, mdcache, etc), such as pointers | |
153 | * to the other subsystems, and message-sending calls. | |
154 | */ | |
155 | class MDSRank { | |
7c673cae | 156 | public: |
f64942e4 AA |
157 | friend class C_Flush_Journal; |
158 | friend class C_Drop_Cache; | |
11fdf7f2 TL |
159 | friend class C_CacheDropExecAndReply; |
160 | friend class C_ScrubExecAndReply; | |
161 | friend class C_ScrubControlExecAndReply; | |
162 | ||
9f95a23c TL |
163 | CephContext *cct; |
164 | ||
165 | MDSRank( | |
166 | mds_rank_t whoami_, | |
f67539c2 | 167 | std::string fs_name_, |
9f95a23c TL |
168 | ceph::mutex &mds_lock_, |
169 | LogChannelRef &clog_, | |
170 | SafeTimer &timer_, | |
171 | Beacon &beacon_, | |
172 | std::unique_ptr<MDSMap> & mdsmap_, | |
173 | Messenger *msgr, | |
174 | MonClient *monc_, | |
175 | MgrClient *mgrc, | |
176 | Context *respawn_hook_, | |
f67539c2 TL |
177 | Context *suicide_hook_, |
178 | boost::asio::io_context& ioc); | |
9f95a23c | 179 | |
7c673cae | 180 | mds_rank_t get_nodeid() const { return whoami; } |
f67539c2 | 181 | std::string_view get_fs_name() const { return fs_name; } |
b3b6e05e TL |
182 | int64_t get_metadata_pool() const |
183 | { | |
184 | return metadata_pool; | |
185 | } | |
7c673cae | 186 | |
94b18763 FG |
187 | mono_time get_starttime() const { |
188 | return starttime; | |
189 | } | |
190 | chrono::duration<double> get_uptime() const { | |
191 | mono_time now = mono_clock::now(); | |
192 | return chrono::duration<double>(now-starttime); | |
193 | } | |
194 | ||
7c673cae FG |
195 | bool is_daemon_stopping() const; |
196 | ||
7c673cae FG |
197 | MDSTableClient *get_table_client(int t); |
198 | MDSTableServer *get_table_server(int t); | |
199 | ||
7c673cae FG |
200 | Session *get_session(client_t client) { |
201 | return sessionmap.get_session(entity_name_t::CLIENT(client.v)); | |
202 | } | |
9f95a23c | 203 | Session *get_session(const cref_t<Message> &m); |
7c673cae FG |
204 | |
205 | MDSMap::DaemonState get_state() const { return state; } | |
206 | MDSMap::DaemonState get_want_state() const { return beacon.get_want_state(); } | |
207 | ||
208 | bool is_creating() const { return state == MDSMap::STATE_CREATING; } | |
209 | bool is_starting() const { return state == MDSMap::STATE_STARTING; } | |
210 | bool is_standby() const { return state == MDSMap::STATE_STANDBY; } | |
211 | bool is_replay() const { return state == MDSMap::STATE_REPLAY; } | |
212 | bool is_standby_replay() const { return state == MDSMap::STATE_STANDBY_REPLAY; } | |
213 | bool is_resolve() const { return state == MDSMap::STATE_RESOLVE; } | |
214 | bool is_reconnect() const { return state == MDSMap::STATE_RECONNECT; } | |
215 | bool is_rejoin() const { return state == MDSMap::STATE_REJOIN; } | |
216 | bool is_clientreplay() const { return state == MDSMap::STATE_CLIENTREPLAY; } | |
217 | bool is_active() const { return state == MDSMap::STATE_ACTIVE; } | |
218 | bool is_stopping() const { return state == MDSMap::STATE_STOPPING; } | |
219 | bool is_any_replay() const { return (is_replay() || is_standby_replay()); } | |
220 | bool is_stopped() const { return mdsmap->is_stopped(whoami); } | |
221 | bool is_cluster_degraded() const { return cluster_degraded; } | |
11fdf7f2 | 222 | bool allows_multimds_snaps() const { return mdsmap->allows_multimds_snaps(); } |
7c673cae | 223 | |
eafe8130 | 224 | bool is_cache_trimmable() const { |
b3b6e05e | 225 | return is_standby_replay() || is_clientreplay() || is_active() || is_stopping(); |
eafe8130 TL |
226 | } |
227 | ||
7c673cae | 228 | void handle_write_error(int err); |
f67539c2 | 229 | void handle_write_error_with_lock(int err); |
7c673cae | 230 | |
c07f9fc5 | 231 | void update_mlogger(); |
7c673cae | 232 | |
11fdf7f2 | 233 | void queue_waiter(MDSContext *c) { |
91327a77 AA |
234 | finished_queue.push_back(c); |
235 | progress_thread.signal(); | |
236 | } | |
494da23a TL |
237 | void queue_waiter_front(MDSContext *c) { |
238 | finished_queue.push_front(c); | |
239 | progress_thread.signal(); | |
240 | } | |
11fdf7f2 TL |
241 | void queue_waiters(MDSContext::vec& ls) { |
242 | MDSContext::vec v; | |
243 | v.swap(ls); | |
244 | std::copy(v.begin(), v.end(), std::back_inserter(finished_queue)); | |
7c673cae FG |
245 | progress_thread.signal(); |
246 | } | |
11fdf7f2 TL |
247 | void queue_waiters_front(MDSContext::vec& ls) { |
248 | MDSContext::vec v; | |
249 | v.swap(ls); | |
250 | std::copy(v.rbegin(), v.rend(), std::front_inserter(finished_queue)); | |
91327a77 AA |
251 | progress_thread.signal(); |
252 | } | |
7c673cae | 253 | |
7c673cae FG |
254 | // Daemon lifetime functions: these guys break the abstraction |
255 | // and call up into the parent MDSDaemon instance. It's kind | |
256 | // of unavoidable: if we want any depth into our calls | |
257 | // to be able to e.g. tear down the whole process, we have to | |
258 | // have a reference going all the way down. | |
259 | // >>> | |
260 | void suicide(); | |
261 | void respawn(); | |
262 | // <<< | |
263 | ||
264 | /** | |
265 | * Call this periodically if inside a potentially long running piece | |
266 | * of code while holding the mds_lock | |
267 | */ | |
268 | void heartbeat_reset(); | |
269 | ||
270 | /** | |
271 | * Report state DAMAGED to the mon, and then pass on to respawn(). Call | |
272 | * this when an unrecoverable error is encountered while attempting | |
273 | * to load an MDS rank's data structures. This is *not* for use with | |
274 | * errors affecting normal dirfrag/inode objects -- they should be handled | |
275 | * through cleaner scrub/repair mechanisms. | |
276 | * | |
277 | * Callers must already hold mds_lock. | |
278 | */ | |
279 | void damaged(); | |
280 | ||
281 | /** | |
282 | * Wrapper around `damaged` for users who are not | |
283 | * already holding mds_lock. | |
284 | * | |
285 | * Callers must not already hold mds_lock. | |
286 | */ | |
287 | void damaged_unlocked(); | |
288 | ||
91327a77 AA |
289 | double last_cleared_laggy() const { |
290 | return beacon.last_cleared_laggy(); | |
291 | } | |
292 | ||
293 | double get_dispatch_queue_max_age(utime_t now) const; | |
7c673cae | 294 | |
9f95a23c | 295 | void send_message_mds(const ref_t<Message>& m, mds_rank_t mds); |
f67539c2 | 296 | void send_message_mds(const ref_t<Message>& m, const entity_addrvec_t &addr); |
9f95a23c TL |
297 | void forward_message_mds(const cref_t<MClientRequest>& req, mds_rank_t mds); |
298 | void send_message_client_counted(const ref_t<Message>& m, client_t client); | |
299 | void send_message_client_counted(const ref_t<Message>& m, Session* session); | |
300 | void send_message_client_counted(const ref_t<Message>& m, const ConnectionRef& connection); | |
301 | void send_message_client(const ref_t<Message>& m, Session* session); | |
302 | void send_message(const ref_t<Message>& m, const ConnectionRef& c); | |
7c673cae | 303 | |
11fdf7f2 | 304 | void wait_for_active_peer(mds_rank_t who, MDSContext *c) { |
7c673cae FG |
305 | waiting_for_active_peer[who].push_back(c); |
306 | } | |
11fdf7f2 TL |
307 | void wait_for_cluster_recovered(MDSContext *c) { |
308 | ceph_assert(cluster_degraded); | |
7c673cae FG |
309 | waiting_for_active_peer[MDS_RANK_NONE].push_back(c); |
310 | } | |
311 | ||
11fdf7f2 | 312 | void wait_for_any_client_connection(MDSContext *c) { |
28e407b8 AA |
313 | waiting_for_any_client_connection.push_back(c); |
314 | } | |
315 | void kick_waiters_for_any_client_connection(void) { | |
316 | finish_contexts(g_ceph_context, waiting_for_any_client_connection); | |
317 | } | |
11fdf7f2 | 318 | void wait_for_active(MDSContext *c) { |
7c673cae FG |
319 | waiting_for_active.push_back(c); |
320 | } | |
11fdf7f2 | 321 | void wait_for_replay(MDSContext *c) { |
7c673cae FG |
322 | waiting_for_replay.push_back(c); |
323 | } | |
11fdf7f2 | 324 | void wait_for_rejoin(MDSContext *c) { |
a8e16298 TL |
325 | waiting_for_rejoin.push_back(c); |
326 | } | |
11fdf7f2 | 327 | void wait_for_reconnect(MDSContext *c) { |
7c673cae FG |
328 | waiting_for_reconnect.push_back(c); |
329 | } | |
11fdf7f2 | 330 | void wait_for_resolve(MDSContext *c) { |
7c673cae FG |
331 | waiting_for_resolve.push_back(c); |
332 | } | |
11fdf7f2 | 333 | void wait_for_mdsmap(epoch_t e, MDSContext *c) { |
7c673cae FG |
334 | waiting_for_mdsmap[e].push_back(c); |
335 | } | |
11fdf7f2 | 336 | void enqueue_replay(MDSContext *c) { |
7c673cae FG |
337 | replay_queue.push_back(c); |
338 | } | |
339 | ||
340 | bool queue_one_replay(); | |
11fdf7f2 | 341 | void maybe_clientreplay_done(); |
7c673cae FG |
342 | |
343 | void set_osd_epoch_barrier(epoch_t e); | |
344 | epoch_t get_osd_epoch_barrier() const {return osd_epoch_barrier;} | |
345 | epoch_t get_osd_epoch() const; | |
346 | ||
347 | ceph_tid_t issue_tid() { return ++last_tid; } | |
348 | ||
11fdf7f2 | 349 | MDSMap *get_mds_map() { return mdsmap.get(); } |
7c673cae | 350 | |
28e407b8 | 351 | uint64_t get_num_requests() const { return logger->get(l_mds_request); } |
7c673cae FG |
352 | |
353 | int get_mds_slow_req_count() const { return mds_slow_req_count; } | |
354 | ||
355 | void dump_status(Formatter *f) const; | |
356 | ||
11fdf7f2 | 357 | void hit_export_target(mds_rank_t rank, double amount=-1.0); |
7c673cae FG |
358 | bool is_export_target(mds_rank_t rank) { |
359 | const set<mds_rank_t>& map_targets = mdsmap->get_mds_info(get_nodeid()).export_targets; | |
360 | return map_targets.count(rank); | |
361 | } | |
362 | ||
f67539c2 | 363 | bool evict_client(int64_t session_id, bool wait, bool blocklist, |
11fdf7f2 | 364 | std::ostream& ss, Context *on_killed=nullptr); |
92f5a8d4 TL |
365 | int config_client(int64_t session_id, bool remove, |
366 | const std::string& option, const std::string& value, | |
367 | std::ostream& ss); | |
11fdf7f2 | 368 | |
9f95a23c TL |
369 | // Reference to global MDS::mds_lock, so that users of MDSRank don't |
370 | // carry around references to the outer MDS, and we can substitute | |
371 | // a separate lock here in future potentially. | |
372 | ceph::mutex &mds_lock; | |
373 | ||
374 | // Reference to global cluster log client, just to avoid initialising | |
375 | // a separate one here. | |
376 | LogChannelRef &clog; | |
377 | ||
378 | // Reference to global timer utility, because MDSRank and MDSDaemon | |
379 | // currently both use the same mds_lock, so it makes sense for them | |
380 | // to share a timer. | |
381 | SafeTimer &timer; | |
382 | ||
383 | std::unique_ptr<MDSMap> &mdsmap; /* MDSDaemon::mdsmap */ | |
384 | ||
385 | Objecter *objecter; | |
386 | ||
387 | // sub systems | |
388 | Server *server = nullptr; | |
389 | MDCache *mdcache = nullptr; | |
390 | Locker *locker = nullptr; | |
391 | MDLog *mdlog = nullptr; | |
392 | MDBalancer *balancer = nullptr; | |
393 | ScrubStack *scrubstack = nullptr; | |
394 | DamageTable damage_table; | |
395 | ||
396 | InoTable *inotable = nullptr; | |
397 | ||
398 | SnapServer *snapserver = nullptr; | |
399 | SnapClient *snapclient = nullptr; | |
400 | ||
401 | SessionMap sessionmap; | |
402 | ||
403 | PerfCounters *logger = nullptr, *mlogger = nullptr; | |
404 | OpTracker op_tracker; | |
405 | ||
406 | // The last different state I held before current | |
407 | MDSMap::DaemonState last_state = MDSMap::STATE_BOOT; | |
408 | // The state assigned to me by the MDSMap | |
409 | MDSMap::DaemonState state = MDSMap::STATE_BOOT; | |
410 | ||
411 | bool cluster_degraded = false; | |
412 | ||
413 | Finisher *finisher; | |
7c673cae | 414 | protected: |
9f95a23c TL |
415 | typedef enum { |
416 | // The MDSMap is available, configure default layouts and structures | |
417 | MDS_BOOT_INITIAL = 0, | |
418 | // We are ready to open some inodes | |
419 | MDS_BOOT_OPEN_ROOT, | |
420 | // We are ready to do a replay if needed | |
421 | MDS_BOOT_PREPARE_LOG, | |
422 | // Replay is complete | |
423 | MDS_BOOT_REPLAY_DONE | |
424 | } BootStep; | |
425 | ||
426 | class ProgressThread : public Thread { | |
427 | public: | |
428 | explicit ProgressThread(MDSRank *mds_) : mds(mds_) {} | |
429 | void * entry() override; | |
430 | void shutdown(); | |
431 | void signal() {cond.notify_all();} | |
432 | private: | |
433 | MDSRank *mds; | |
434 | ceph::condition_variable cond; | |
435 | } progress_thread; | |
436 | ||
437 | class C_MDS_StandbyReplayRestart; | |
438 | class C_MDS_StandbyReplayRestartFinish; | |
439 | // Friended to access retry_dispatch | |
440 | friend class C_MDS_RetryMessage; | |
441 | friend class C_MDS_BootStart; | |
442 | friend class C_MDS_InternalBootStart; | |
443 | friend class C_MDS_MonCommand; | |
444 | ||
445 | const mds_rank_t whoami; | |
f67539c2 | 446 | std::string fs_name; |
9f95a23c TL |
447 | |
448 | ~MDSRank(); | |
449 | ||
450 | void inc_dispatch_depth() { ++dispatch_depth; } | |
451 | void dec_dispatch_depth() { --dispatch_depth; } | |
452 | void retry_dispatch(const cref_t<Message> &m); | |
f6b5b4d7 TL |
453 | bool is_valid_message(const cref_t<Message> &m); |
454 | void handle_message(const cref_t<Message> &m); | |
9f95a23c TL |
455 | void _advance_queues(); |
456 | bool _dispatch(const cref_t<Message> &m, bool new_msg); | |
457 | bool is_stale_message(const cref_t<Message> &m) const; | |
458 | ||
459 | /** | |
460 | * Emit clog warnings for any ops reported as warnings by optracker | |
461 | */ | |
462 | void check_ops_in_flight(); | |
463 | ||
464 | /** | |
465 | * Share MDSMap with clients | |
466 | */ | |
9f95a23c TL |
467 | void create_logger(); |
468 | ||
7c673cae | 469 | void dump_clientreplay_status(Formatter *f) const; |
11fdf7f2 TL |
470 | void command_scrub_start(Formatter *f, |
471 | std::string_view path, std::string_view tag, | |
472 | const vector<string>& scrubop_vec, Context *on_finish); | |
473 | void command_tag_path(Formatter *f, std::string_view path, | |
474 | std::string_view tag); | |
475 | // scrub control commands | |
476 | void command_scrub_abort(Formatter *f, Context *on_finish); | |
477 | void command_scrub_pause(Formatter *f, Context *on_finish); | |
478 | void command_scrub_resume(Formatter *f); | |
479 | void command_scrub_status(Formatter *f); | |
480 | ||
481 | void command_flush_path(Formatter *f, std::string_view path); | |
7c673cae FG |
482 | void command_flush_journal(Formatter *f); |
483 | void command_get_subtrees(Formatter *f); | |
484 | void command_export_dir(Formatter *f, | |
11fdf7f2 | 485 | std::string_view path, mds_rank_t dest); |
7c673cae FG |
486 | bool command_dirfrag_split( |
487 | cmdmap_t cmdmap, | |
488 | std::ostream &ss); | |
489 | bool command_dirfrag_merge( | |
490 | cmdmap_t cmdmap, | |
491 | std::ostream &ss); | |
492 | bool command_dirfrag_ls( | |
493 | cmdmap_t cmdmap, | |
494 | std::ostream &ss, | |
495 | Formatter *f); | |
11fdf7f2 | 496 | int _command_export_dir(std::string_view path, mds_rank_t dest); |
7c673cae FG |
497 | CDir *_command_dirfrag_get( |
498 | const cmdmap_t &cmdmap, | |
499 | std::ostream &ss); | |
11fdf7f2 TL |
500 | void command_openfiles_ls(Formatter *f); |
501 | void command_dump_tree(const cmdmap_t &cmdmap, std::ostream &ss, Formatter *f); | |
502 | void command_dump_inode(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss); | |
f64942e4 AA |
503 | void command_cache_drop(uint64_t timeout, Formatter *f, Context *on_finish); |
504 | ||
7c673cae FG |
505 | // FIXME the state machine logic should be separable from the dispatch |
506 | // logic that calls it. | |
507 | // >>> | |
508 | void calc_recovery_set(); | |
509 | void request_state(MDSMap::DaemonState s); | |
510 | ||
7c673cae FG |
511 | void boot_create(); // i am new mds. |
512 | void boot_start(BootStep step=MDS_BOOT_INITIAL, int r=0); // starting|replay | |
513 | ||
514 | void replay_start(); | |
515 | void creating_done(); | |
516 | void starting_done(); | |
517 | void replay_done(); | |
518 | void standby_replay_restart(); | |
519 | void _standby_replay_restart_finish(int r, uint64_t old_read_pos); | |
7c673cae FG |
520 | |
521 | void reopen_log(); | |
522 | ||
523 | void resolve_start(); | |
524 | void resolve_done(); | |
525 | void reconnect_start(); | |
526 | void reconnect_done(); | |
527 | void rejoin_joint_start(); | |
528 | void rejoin_start(); | |
529 | void rejoin_done(); | |
530 | void recovery_done(int oldstate); | |
531 | void clientreplay_start(); | |
532 | void clientreplay_done(); | |
533 | void active_start(); | |
534 | void stopping_start(); | |
535 | void stopping_done(); | |
536 | ||
537 | void validate_sessions(); | |
9f95a23c | 538 | |
7c673cae FG |
539 | void handle_mds_recovery(mds_rank_t who); |
540 | void handle_mds_failure(mds_rank_t who); | |
7c673cae FG |
541 | |
542 | /* Update MDSMap export_targets for this rank. Called on ::tick(). */ | |
11fdf7f2 | 543 | void update_targets(); |
94b18763 | 544 | |
11fdf7f2 TL |
545 | void _mon_command_finish(int r, std::string_view cmd, std::string_view outs); |
546 | void set_mdsmap_multimds_snaps_allowed(); | |
9f95a23c TL |
547 | |
548 | Context *create_async_exec_context(C_ExecAndReply *ctx); | |
549 | ||
550 | // Incarnation as seen in MDSMap at the point where a rank is | |
551 | // assigned. | |
552 | int incarnation = 0; | |
553 | ||
554 | // Flag to indicate we entered shutdown: anyone seeing this to be true | |
555 | // after taking mds_lock must drop out. | |
556 | bool stopping = false; | |
557 | ||
558 | // PurgeQueue is only used by StrayManager, but it is owned by MDSRank | |
559 | // because its init/shutdown happens at the top level. | |
560 | PurgeQueue purge_queue; | |
561 | ||
f67539c2 TL |
562 | MetricsHandler metrics_handler; |
563 | std::unique_ptr<MetricAggregator> metric_aggregator; | |
564 | ||
9f95a23c TL |
565 | list<cref_t<Message>> waiting_for_nolaggy; |
566 | MDSContext::que finished_queue; | |
567 | // Dispatch, retry, queues | |
568 | int dispatch_depth = 0; | |
569 | ||
570 | ceph::heartbeat_handle_d *hb = nullptr; // Heartbeat for threads using mds_lock | |
f67539c2 | 571 | double heartbeat_grace; |
9f95a23c TL |
572 | |
573 | map<mds_rank_t, version_t> peer_mdsmap_epoch; | |
574 | ||
575 | ceph_tid_t last_tid = 0; // for mds-initiated requests (e.g. stray rename) | |
576 | ||
577 | MDSContext::vec waiting_for_active, waiting_for_replay, waiting_for_rejoin, | |
578 | waiting_for_reconnect, waiting_for_resolve; | |
579 | MDSContext::vec waiting_for_any_client_connection; | |
580 | MDSContext::que replay_queue; | |
581 | bool replaying_requests_done = false; | |
582 | ||
583 | map<mds_rank_t, MDSContext::vec > waiting_for_active_peer; | |
584 | map<epoch_t, MDSContext::vec > waiting_for_mdsmap; | |
585 | ||
586 | epoch_t osd_epoch_barrier = 0; | |
587 | ||
588 | // Const reference to the beacon so that we can behave differently | |
589 | // when it's laggy. | |
590 | Beacon &beacon; | |
591 | ||
592 | int mds_slow_req_count = 0; | |
593 | ||
9f95a23c TL |
594 | map<mds_rank_t,DecayCounter> export_targets; /* targets this MDS is exporting to or wants/tries to */ |
595 | ||
596 | Messenger *messenger; | |
597 | MonClient *monc; | |
598 | MgrClient *mgrc; | |
599 | ||
600 | Context *respawn_hook; | |
601 | Context *suicide_hook; | |
602 | ||
603 | bool standby_replaying = false; // true if current replay pass is in standby-replay mode | |
94b18763 | 604 | private: |
f91f0fd5 TL |
605 | bool send_status = true; |
606 | ||
b3b6e05e TL |
607 | // The metadata pool won't change in the whole life time of the fs, |
608 | // with this we can get rid of the mds_lock in many places too. | |
609 | int64_t metadata_pool = -1; | |
610 | ||
9f95a23c TL |
611 | // "task" string that gets displayed in ceph status |
612 | inline static const std::string SCRUB_STATUS_KEY = "scrub status"; | |
11fdf7f2 | 613 | |
9f95a23c TL |
614 | void get_task_status(std::map<std::string, std::string> *status); |
615 | void schedule_update_timer_task(); | |
616 | void send_task_status(); | |
617 | ||
f67539c2 TL |
618 | bool is_rank0() const { |
619 | return whoami == (mds_rank_t)0; | |
620 | } | |
621 | ||
9f95a23c | 622 | mono_time starttime = mono_clock::zero(); |
f67539c2 | 623 | boost::asio::io_context& ioc; |
7c673cae FG |
624 | }; |
625 | ||
626 | /* This expects to be given a reference which it is responsible for. | |
627 | * The finish function calls functions which | |
628 | * will put the Message exactly once.*/ | |
629 | class C_MDS_RetryMessage : public MDSInternalContext { | |
7c673cae | 630 | public: |
9f95a23c | 631 | C_MDS_RetryMessage(MDSRank *mds, const cref_t<Message> &m) |
11fdf7f2 | 632 | : MDSInternalContext(mds), m(m) {} |
7c673cae | 633 | void finish(int r) override { |
11fdf7f2 TL |
634 | get_mds()->retry_dispatch(m); |
635 | } | |
636 | protected: | |
9f95a23c | 637 | cref_t<Message> m; |
11fdf7f2 TL |
638 | }; |
639 | ||
640 | class CF_MDS_RetryMessageFactory : public MDSContextFactory { | |
641 | public: | |
9f95a23c | 642 | CF_MDS_RetryMessageFactory(MDSRank *mds, const cref_t<Message> &m) |
11fdf7f2 TL |
643 | : mds(mds), m(m) {} |
644 | ||
645 | MDSContext *build() { | |
646 | return new C_MDS_RetryMessage(mds, m); | |
7c673cae | 647 | } |
11fdf7f2 TL |
648 | private: |
649 | MDSRank *mds; | |
9f95a23c | 650 | cref_t<Message> m; |
7c673cae FG |
651 | }; |
652 | ||
653 | /** | |
654 | * The aspect of MDSRank exposed to MDSDaemon but not subsystems: i.e. | |
655 | * the service/dispatcher stuff like init/shutdown that subsystems should | |
656 | * never touch. | |
657 | */ | |
92f5a8d4 | 658 | class MDSRankDispatcher : public MDSRank, public md_config_obs_t |
7c673cae FG |
659 | { |
660 | public: | |
9f95a23c TL |
661 | MDSRankDispatcher( |
662 | mds_rank_t whoami_, | |
f67539c2 | 663 | std::string fs_name, |
9f95a23c TL |
664 | ceph::mutex &mds_lock_, |
665 | LogChannelRef &clog_, | |
666 | SafeTimer &timer_, | |
667 | Beacon &beacon_, | |
668 | std::unique_ptr<MDSMap> &mdsmap_, | |
669 | Messenger *msgr, | |
670 | MonClient *monc_, | |
671 | MgrClient *mgrc, | |
672 | Context *respawn_hook_, | |
f67539c2 TL |
673 | Context *suicide_hook_, |
674 | boost::asio::io_context& ioc); | |
9f95a23c | 675 | |
7c673cae FG |
676 | void init(); |
677 | void tick(); | |
678 | void shutdown(); | |
9f95a23c TL |
679 | void handle_asok_command( |
680 | std::string_view command, | |
681 | const cmdmap_t& cmdmap, | |
682 | Formatter *f, | |
683 | const bufferlist &inbl, | |
684 | std::function<void(int,const std::string&,bufferlist&)> on_finish); | |
685 | void handle_mds_map(const cref_t<MMDSMap> &m, const MDSMap &oldmap); | |
7c673cae | 686 | void handle_osd_map(); |
7c673cae FG |
687 | void update_log_config(); |
688 | ||
92f5a8d4 TL |
689 | const char** get_tracked_conf_keys() const override final; |
690 | void handle_conf_change(const ConfigProxy& conf, const std::set<std::string>& changed) override; | |
691 | ||
adb31ebb | 692 | void dump_sessions(const SessionFilter &filter, Formatter *f, bool cap_dump=false) const; |
9f95a23c TL |
693 | void evict_clients(const SessionFilter &filter, |
694 | std::function<void(int,const std::string&,bufferlist&)> on_finish); | |
7c673cae FG |
695 | |
696 | // Call into me from MDS::ms_dispatch | |
9f95a23c | 697 | bool ms_dispatch(const cref_t<Message> &m); |
7c673cae FG |
698 | }; |
699 | ||
7c673cae FG |
700 | #endif // MDS_RANK_H_ |
701 |