]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2015 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef MDS_RANK_H_ | |
16 | #define MDS_RANK_H_ | |
17 | ||
11fdf7f2 | 18 | #include <string_view> |
94b18763 | 19 | |
7c673cae FG |
20 | #include "common/DecayCounter.h" |
21 | #include "common/LogClient.h" | |
22 | #include "common/Timer.h" | |
23 | #include "common/TrackedOp.h" | |
24 | ||
9f95a23c TL |
25 | #include "include/common_fwd.h" |
26 | ||
11fdf7f2 | 27 | #include "messages/MClientRequest.h" |
7c673cae | 28 | #include "messages/MCommand.h" |
11fdf7f2 | 29 | #include "messages/MMDSMap.h" |
7c673cae FG |
30 | |
31 | #include "Beacon.h" | |
32 | #include "DamageTable.h" | |
33 | #include "MDSMap.h" | |
34 | #include "SessionMap.h" | |
35 | #include "MDCache.h" | |
7c673cae | 36 | #include "MDLog.h" |
11fdf7f2 | 37 | #include "MDSContext.h" |
7c673cae | 38 | #include "PurgeQueue.h" |
91327a77 | 39 | #include "Server.h" |
7c673cae FG |
40 | #include "osdc/Journaler.h" |
41 | ||
42 | // Full .h import instead of forward declaration for PerfCounter, for the | |
43 | // benefit of those including this header and using MDSRank::logger | |
44 | #include "common/perf_counters.h" | |
45 | ||
46 | enum { | |
47 | l_mds_first = 2000, | |
48 | l_mds_request, | |
49 | l_mds_reply, | |
50 | l_mds_reply_latency, | |
51 | l_mds_forward, | |
52 | l_mds_dir_fetch, | |
53 | l_mds_dir_commit, | |
54 | l_mds_dir_split, | |
55 | l_mds_dir_merge, | |
7c673cae FG |
56 | l_mds_inodes, |
57 | l_mds_inodes_top, | |
58 | l_mds_inodes_bottom, | |
59 | l_mds_inodes_pin_tail, | |
60 | l_mds_inodes_pinned, | |
61 | l_mds_inodes_expired, | |
62 | l_mds_inodes_with_caps, | |
63 | l_mds_caps, | |
64 | l_mds_subtrees, | |
65 | l_mds_traverse, | |
66 | l_mds_traverse_hit, | |
67 | l_mds_traverse_forward, | |
68 | l_mds_traverse_discover, | |
69 | l_mds_traverse_dir_fetch, | |
70 | l_mds_traverse_remote_ino, | |
71 | l_mds_traverse_lock, | |
72 | l_mds_load_cent, | |
73 | l_mds_dispatch_queue_len, | |
74 | l_mds_exported, | |
75 | l_mds_exported_inodes, | |
76 | l_mds_imported, | |
77 | l_mds_imported_inodes, | |
11fdf7f2 TL |
78 | l_mds_openino_dir_fetch, |
79 | l_mds_openino_backtrace_fetch, | |
80 | l_mds_openino_peer_discover, | |
eafe8130 TL |
81 | l_mds_root_rfiles, |
82 | l_mds_root_rbytes, | |
83 | l_mds_root_rsnaps, | |
7c673cae FG |
84 | l_mds_last, |
85 | }; | |
86 | ||
87 | // memory utilization | |
88 | enum { | |
89 | l_mdm_first = 2500, | |
90 | l_mdm_ino, | |
91 | l_mdm_inoa, | |
92 | l_mdm_inos, | |
93 | l_mdm_dir, | |
94 | l_mdm_dira, | |
95 | l_mdm_dirs, | |
96 | l_mdm_dn, | |
97 | l_mdm_dna, | |
98 | l_mdm_dns, | |
99 | l_mdm_cap, | |
100 | l_mdm_capa, | |
101 | l_mdm_caps, | |
102 | l_mdm_rss, | |
103 | l_mdm_heap, | |
7c673cae FG |
104 | l_mdm_last, |
105 | }; | |
106 | ||
107 | namespace ceph { | |
108 | struct heartbeat_handle_d; | |
109 | } | |
110 | ||
7c673cae FG |
111 | class Locker; |
112 | class MDCache; | |
113 | class MDLog; | |
114 | class MDBalancer; | |
115 | class InoTable; | |
116 | class SnapServer; | |
117 | class SnapClient; | |
118 | class MDSTableServer; | |
119 | class MDSTableClient; | |
120 | class Messenger; | |
121 | class Objecter; | |
122 | class MonClient; | |
9f95a23c | 123 | class MgrClient; |
7c673cae | 124 | class Finisher; |
7c673cae | 125 | class ScrubStack; |
11fdf7f2 | 126 | class C_ExecAndReply; |
7c673cae FG |
127 | |
128 | /** | |
129 | * The public part of this class's interface is what's exposed to all | |
130 | * the various subsystems (server, mdcache, etc), such as pointers | |
131 | * to the other subsystems, and message-sending calls. | |
132 | */ | |
133 | class MDSRank { | |
7c673cae | 134 | public: |
f64942e4 AA |
135 | friend class C_Flush_Journal; |
136 | friend class C_Drop_Cache; | |
11fdf7f2 TL |
137 | friend class C_CacheDropExecAndReply; |
138 | friend class C_ScrubExecAndReply; | |
139 | friend class C_ScrubControlExecAndReply; | |
140 | ||
9f95a23c TL |
141 | CephContext *cct; |
142 | ||
143 | MDSRank( | |
144 | mds_rank_t whoami_, | |
145 | ceph::mutex &mds_lock_, | |
146 | LogChannelRef &clog_, | |
147 | SafeTimer &timer_, | |
148 | Beacon &beacon_, | |
149 | std::unique_ptr<MDSMap> & mdsmap_, | |
150 | Messenger *msgr, | |
151 | MonClient *monc_, | |
152 | MgrClient *mgrc, | |
153 | Context *respawn_hook_, | |
154 | Context *suicide_hook_); | |
155 | ||
7c673cae FG |
156 | mds_rank_t get_nodeid() const { return whoami; } |
157 | int64_t get_metadata_pool(); | |
158 | ||
94b18763 FG |
159 | mono_time get_starttime() const { |
160 | return starttime; | |
161 | } | |
162 | chrono::duration<double> get_uptime() const { | |
163 | mono_time now = mono_clock::now(); | |
164 | return chrono::duration<double>(now-starttime); | |
165 | } | |
166 | ||
7c673cae FG |
167 | bool is_daemon_stopping() const; |
168 | ||
7c673cae FG |
169 | MDSTableClient *get_table_client(int t); |
170 | MDSTableServer *get_table_server(int t); | |
171 | ||
7c673cae FG |
172 | Session *get_session(client_t client) { |
173 | return sessionmap.get_session(entity_name_t::CLIENT(client.v)); | |
174 | } | |
9f95a23c | 175 | Session *get_session(const cref_t<Message> &m); |
7c673cae FG |
176 | |
177 | MDSMap::DaemonState get_state() const { return state; } | |
178 | MDSMap::DaemonState get_want_state() const { return beacon.get_want_state(); } | |
179 | ||
180 | bool is_creating() const { return state == MDSMap::STATE_CREATING; } | |
181 | bool is_starting() const { return state == MDSMap::STATE_STARTING; } | |
182 | bool is_standby() const { return state == MDSMap::STATE_STANDBY; } | |
183 | bool is_replay() const { return state == MDSMap::STATE_REPLAY; } | |
184 | bool is_standby_replay() const { return state == MDSMap::STATE_STANDBY_REPLAY; } | |
185 | bool is_resolve() const { return state == MDSMap::STATE_RESOLVE; } | |
186 | bool is_reconnect() const { return state == MDSMap::STATE_RECONNECT; } | |
187 | bool is_rejoin() const { return state == MDSMap::STATE_REJOIN; } | |
188 | bool is_clientreplay() const { return state == MDSMap::STATE_CLIENTREPLAY; } | |
189 | bool is_active() const { return state == MDSMap::STATE_ACTIVE; } | |
190 | bool is_stopping() const { return state == MDSMap::STATE_STOPPING; } | |
191 | bool is_any_replay() const { return (is_replay() || is_standby_replay()); } | |
192 | bool is_stopped() const { return mdsmap->is_stopped(whoami); } | |
193 | bool is_cluster_degraded() const { return cluster_degraded; } | |
11fdf7f2 | 194 | bool allows_multimds_snaps() const { return mdsmap->allows_multimds_snaps(); } |
7c673cae | 195 | |
eafe8130 TL |
196 | bool is_cache_trimmable() const { |
197 | return is_clientreplay() || is_active() || is_stopping(); | |
198 | } | |
199 | ||
7c673cae FG |
200 | void handle_write_error(int err); |
201 | ||
c07f9fc5 | 202 | void update_mlogger(); |
7c673cae | 203 | |
11fdf7f2 | 204 | void queue_waiter(MDSContext *c) { |
91327a77 AA |
205 | finished_queue.push_back(c); |
206 | progress_thread.signal(); | |
207 | } | |
494da23a TL |
208 | void queue_waiter_front(MDSContext *c) { |
209 | finished_queue.push_front(c); | |
210 | progress_thread.signal(); | |
211 | } | |
11fdf7f2 TL |
212 | void queue_waiters(MDSContext::vec& ls) { |
213 | MDSContext::vec v; | |
214 | v.swap(ls); | |
215 | std::copy(v.begin(), v.end(), std::back_inserter(finished_queue)); | |
7c673cae FG |
216 | progress_thread.signal(); |
217 | } | |
11fdf7f2 TL |
218 | void queue_waiters_front(MDSContext::vec& ls) { |
219 | MDSContext::vec v; | |
220 | v.swap(ls); | |
221 | std::copy(v.rbegin(), v.rend(), std::front_inserter(finished_queue)); | |
91327a77 AA |
222 | progress_thread.signal(); |
223 | } | |
7c673cae | 224 | |
7c673cae FG |
225 | // Daemon lifetime functions: these guys break the abstraction |
226 | // and call up into the parent MDSDaemon instance. It's kind | |
227 | // of unavoidable: if we want any depth into our calls | |
228 | // to be able to e.g. tear down the whole process, we have to | |
229 | // have a reference going all the way down. | |
230 | // >>> | |
231 | void suicide(); | |
232 | void respawn(); | |
233 | // <<< | |
234 | ||
235 | /** | |
236 | * Call this periodically if inside a potentially long running piece | |
237 | * of code while holding the mds_lock | |
238 | */ | |
239 | void heartbeat_reset(); | |
240 | ||
241 | /** | |
242 | * Report state DAMAGED to the mon, and then pass on to respawn(). Call | |
243 | * this when an unrecoverable error is encountered while attempting | |
244 | * to load an MDS rank's data structures. This is *not* for use with | |
245 | * errors affecting normal dirfrag/inode objects -- they should be handled | |
246 | * through cleaner scrub/repair mechanisms. | |
247 | * | |
248 | * Callers must already hold mds_lock. | |
249 | */ | |
250 | void damaged(); | |
251 | ||
252 | /** | |
253 | * Wrapper around `damaged` for users who are not | |
254 | * already holding mds_lock. | |
255 | * | |
256 | * Callers must not already hold mds_lock. | |
257 | */ | |
258 | void damaged_unlocked(); | |
259 | ||
91327a77 AA |
260 | double last_cleared_laggy() const { |
261 | return beacon.last_cleared_laggy(); | |
262 | } | |
263 | ||
264 | double get_dispatch_queue_max_age(utime_t now) const; | |
7c673cae | 265 | |
9f95a23c TL |
266 | void send_message_mds(const ref_t<Message>& m, mds_rank_t mds); |
267 | void forward_message_mds(const cref_t<MClientRequest>& req, mds_rank_t mds); | |
268 | void send_message_client_counted(const ref_t<Message>& m, client_t client); | |
269 | void send_message_client_counted(const ref_t<Message>& m, Session* session); | |
270 | void send_message_client_counted(const ref_t<Message>& m, const ConnectionRef& connection); | |
271 | void send_message_client(const ref_t<Message>& m, Session* session); | |
272 | void send_message(const ref_t<Message>& m, const ConnectionRef& c); | |
7c673cae | 273 | |
11fdf7f2 | 274 | void wait_for_active_peer(mds_rank_t who, MDSContext *c) { |
7c673cae FG |
275 | waiting_for_active_peer[who].push_back(c); |
276 | } | |
11fdf7f2 TL |
277 | void wait_for_cluster_recovered(MDSContext *c) { |
278 | ceph_assert(cluster_degraded); | |
7c673cae FG |
279 | waiting_for_active_peer[MDS_RANK_NONE].push_back(c); |
280 | } | |
281 | ||
11fdf7f2 | 282 | void wait_for_any_client_connection(MDSContext *c) { |
28e407b8 AA |
283 | waiting_for_any_client_connection.push_back(c); |
284 | } | |
285 | void kick_waiters_for_any_client_connection(void) { | |
286 | finish_contexts(g_ceph_context, waiting_for_any_client_connection); | |
287 | } | |
11fdf7f2 | 288 | void wait_for_active(MDSContext *c) { |
7c673cae FG |
289 | waiting_for_active.push_back(c); |
290 | } | |
11fdf7f2 | 291 | void wait_for_replay(MDSContext *c) { |
7c673cae FG |
292 | waiting_for_replay.push_back(c); |
293 | } | |
11fdf7f2 | 294 | void wait_for_rejoin(MDSContext *c) { |
a8e16298 TL |
295 | waiting_for_rejoin.push_back(c); |
296 | } | |
11fdf7f2 | 297 | void wait_for_reconnect(MDSContext *c) { |
7c673cae FG |
298 | waiting_for_reconnect.push_back(c); |
299 | } | |
11fdf7f2 | 300 | void wait_for_resolve(MDSContext *c) { |
7c673cae FG |
301 | waiting_for_resolve.push_back(c); |
302 | } | |
11fdf7f2 | 303 | void wait_for_mdsmap(epoch_t e, MDSContext *c) { |
7c673cae FG |
304 | waiting_for_mdsmap[e].push_back(c); |
305 | } | |
11fdf7f2 | 306 | void enqueue_replay(MDSContext *c) { |
7c673cae FG |
307 | replay_queue.push_back(c); |
308 | } | |
309 | ||
310 | bool queue_one_replay(); | |
11fdf7f2 | 311 | void maybe_clientreplay_done(); |
7c673cae FG |
312 | |
313 | void set_osd_epoch_barrier(epoch_t e); | |
314 | epoch_t get_osd_epoch_barrier() const {return osd_epoch_barrier;} | |
315 | epoch_t get_osd_epoch() const; | |
316 | ||
317 | ceph_tid_t issue_tid() { return ++last_tid; } | |
318 | ||
11fdf7f2 | 319 | MDSMap *get_mds_map() { return mdsmap.get(); } |
7c673cae | 320 | |
28e407b8 | 321 | uint64_t get_num_requests() const { return logger->get(l_mds_request); } |
7c673cae FG |
322 | |
323 | int get_mds_slow_req_count() const { return mds_slow_req_count; } | |
324 | ||
325 | void dump_status(Formatter *f) const; | |
326 | ||
11fdf7f2 | 327 | void hit_export_target(mds_rank_t rank, double amount=-1.0); |
7c673cae FG |
328 | bool is_export_target(mds_rank_t rank) { |
329 | const set<mds_rank_t>& map_targets = mdsmap->get_mds_info(get_nodeid()).export_targets; | |
330 | return map_targets.count(rank); | |
331 | } | |
332 | ||
31f18b77 | 333 | bool evict_client(int64_t session_id, bool wait, bool blacklist, |
11fdf7f2 | 334 | std::ostream& ss, Context *on_killed=nullptr); |
92f5a8d4 TL |
335 | int config_client(int64_t session_id, bool remove, |
336 | const std::string& option, const std::string& value, | |
337 | std::ostream& ss); | |
11fdf7f2 TL |
338 | |
339 | void mark_base_recursively_scrubbed(inodeno_t ino); | |
31f18b77 | 340 | |
9f95a23c TL |
341 | // Reference to global MDS::mds_lock, so that users of MDSRank don't |
342 | // carry around references to the outer MDS, and we can substitute | |
343 | // a separate lock here in future potentially. | |
344 | ceph::mutex &mds_lock; | |
345 | ||
346 | // Reference to global cluster log client, just to avoid initialising | |
347 | // a separate one here. | |
348 | LogChannelRef &clog; | |
349 | ||
350 | // Reference to global timer utility, because MDSRank and MDSDaemon | |
351 | // currently both use the same mds_lock, so it makes sense for them | |
352 | // to share a timer. | |
353 | SafeTimer &timer; | |
354 | ||
355 | std::unique_ptr<MDSMap> &mdsmap; /* MDSDaemon::mdsmap */ | |
356 | ||
357 | Objecter *objecter; | |
358 | ||
359 | // sub systems | |
360 | Server *server = nullptr; | |
361 | MDCache *mdcache = nullptr; | |
362 | Locker *locker = nullptr; | |
363 | MDLog *mdlog = nullptr; | |
364 | MDBalancer *balancer = nullptr; | |
365 | ScrubStack *scrubstack = nullptr; | |
366 | DamageTable damage_table; | |
367 | ||
368 | InoTable *inotable = nullptr; | |
369 | ||
370 | SnapServer *snapserver = nullptr; | |
371 | SnapClient *snapclient = nullptr; | |
372 | ||
373 | SessionMap sessionmap; | |
374 | ||
375 | PerfCounters *logger = nullptr, *mlogger = nullptr; | |
376 | OpTracker op_tracker; | |
377 | ||
378 | // The last different state I held before current | |
379 | MDSMap::DaemonState last_state = MDSMap::STATE_BOOT; | |
380 | // The state assigned to me by the MDSMap | |
381 | MDSMap::DaemonState state = MDSMap::STATE_BOOT; | |
382 | ||
383 | bool cluster_degraded = false; | |
384 | ||
385 | Finisher *finisher; | |
7c673cae | 386 | protected: |
9f95a23c TL |
387 | typedef enum { |
388 | // The MDSMap is available, configure default layouts and structures | |
389 | MDS_BOOT_INITIAL = 0, | |
390 | // We are ready to open some inodes | |
391 | MDS_BOOT_OPEN_ROOT, | |
392 | // We are ready to do a replay if needed | |
393 | MDS_BOOT_PREPARE_LOG, | |
394 | // Replay is complete | |
395 | MDS_BOOT_REPLAY_DONE | |
396 | } BootStep; | |
397 | ||
398 | class ProgressThread : public Thread { | |
399 | public: | |
400 | explicit ProgressThread(MDSRank *mds_) : mds(mds_) {} | |
401 | void * entry() override; | |
402 | void shutdown(); | |
403 | void signal() {cond.notify_all();} | |
404 | private: | |
405 | MDSRank *mds; | |
406 | ceph::condition_variable cond; | |
407 | } progress_thread; | |
408 | ||
409 | class C_MDS_StandbyReplayRestart; | |
410 | class C_MDS_StandbyReplayRestartFinish; | |
411 | // Friended to access retry_dispatch | |
412 | friend class C_MDS_RetryMessage; | |
413 | friend class C_MDS_BootStart; | |
414 | friend class C_MDS_InternalBootStart; | |
415 | friend class C_MDS_MonCommand; | |
416 | ||
417 | const mds_rank_t whoami; | |
418 | ||
419 | ~MDSRank(); | |
420 | ||
421 | void inc_dispatch_depth() { ++dispatch_depth; } | |
422 | void dec_dispatch_depth() { --dispatch_depth; } | |
423 | void retry_dispatch(const cref_t<Message> &m); | |
424 | bool handle_deferrable_message(const cref_t<Message> &m); | |
425 | void _advance_queues(); | |
426 | bool _dispatch(const cref_t<Message> &m, bool new_msg); | |
427 | bool is_stale_message(const cref_t<Message> &m) const; | |
428 | ||
429 | /** | |
430 | * Emit clog warnings for any ops reported as warnings by optracker | |
431 | */ | |
432 | void check_ops_in_flight(); | |
433 | ||
434 | /** | |
435 | * Share MDSMap with clients | |
436 | */ | |
437 | void bcast_mds_map(); // to mounted clients | |
438 | void create_logger(); | |
439 | ||
7c673cae | 440 | void dump_clientreplay_status(Formatter *f) const; |
11fdf7f2 TL |
441 | void command_scrub_start(Formatter *f, |
442 | std::string_view path, std::string_view tag, | |
443 | const vector<string>& scrubop_vec, Context *on_finish); | |
444 | void command_tag_path(Formatter *f, std::string_view path, | |
445 | std::string_view tag); | |
446 | // scrub control commands | |
447 | void command_scrub_abort(Formatter *f, Context *on_finish); | |
448 | void command_scrub_pause(Formatter *f, Context *on_finish); | |
449 | void command_scrub_resume(Formatter *f); | |
450 | void command_scrub_status(Formatter *f); | |
451 | ||
452 | void command_flush_path(Formatter *f, std::string_view path); | |
7c673cae FG |
453 | void command_flush_journal(Formatter *f); |
454 | void command_get_subtrees(Formatter *f); | |
455 | void command_export_dir(Formatter *f, | |
11fdf7f2 | 456 | std::string_view path, mds_rank_t dest); |
7c673cae FG |
457 | bool command_dirfrag_split( |
458 | cmdmap_t cmdmap, | |
459 | std::ostream &ss); | |
460 | bool command_dirfrag_merge( | |
461 | cmdmap_t cmdmap, | |
462 | std::ostream &ss); | |
463 | bool command_dirfrag_ls( | |
464 | cmdmap_t cmdmap, | |
465 | std::ostream &ss, | |
466 | Formatter *f); | |
11fdf7f2 | 467 | int _command_export_dir(std::string_view path, mds_rank_t dest); |
7c673cae FG |
468 | CDir *_command_dirfrag_get( |
469 | const cmdmap_t &cmdmap, | |
470 | std::ostream &ss); | |
11fdf7f2 TL |
471 | void command_openfiles_ls(Formatter *f); |
472 | void command_dump_tree(const cmdmap_t &cmdmap, std::ostream &ss, Formatter *f); | |
473 | void command_dump_inode(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss); | |
f64942e4 AA |
474 | void command_cache_drop(uint64_t timeout, Formatter *f, Context *on_finish); |
475 | ||
7c673cae FG |
476 | // FIXME the state machine logic should be separable from the dispatch |
477 | // logic that calls it. | |
478 | // >>> | |
479 | void calc_recovery_set(); | |
480 | void request_state(MDSMap::DaemonState s); | |
481 | ||
7c673cae FG |
482 | void boot_create(); // i am new mds. |
483 | void boot_start(BootStep step=MDS_BOOT_INITIAL, int r=0); // starting|replay | |
484 | ||
485 | void replay_start(); | |
486 | void creating_done(); | |
487 | void starting_done(); | |
488 | void replay_done(); | |
489 | void standby_replay_restart(); | |
490 | void _standby_replay_restart_finish(int r, uint64_t old_read_pos); | |
7c673cae FG |
491 | |
492 | void reopen_log(); | |
493 | ||
494 | void resolve_start(); | |
495 | void resolve_done(); | |
496 | void reconnect_start(); | |
497 | void reconnect_done(); | |
498 | void rejoin_joint_start(); | |
499 | void rejoin_start(); | |
500 | void rejoin_done(); | |
501 | void recovery_done(int oldstate); | |
502 | void clientreplay_start(); | |
503 | void clientreplay_done(); | |
504 | void active_start(); | |
505 | void stopping_start(); | |
506 | void stopping_done(); | |
507 | ||
508 | void validate_sessions(); | |
9f95a23c | 509 | |
7c673cae FG |
510 | void handle_mds_recovery(mds_rank_t who); |
511 | void handle_mds_failure(mds_rank_t who); | |
7c673cae FG |
512 | |
513 | /* Update MDSMap export_targets for this rank. Called on ::tick(). */ | |
11fdf7f2 | 514 | void update_targets(); |
94b18763 | 515 | |
11fdf7f2 TL |
516 | void _mon_command_finish(int r, std::string_view cmd, std::string_view outs); |
517 | void set_mdsmap_multimds_snaps_allowed(); | |
9f95a23c TL |
518 | |
519 | Context *create_async_exec_context(C_ExecAndReply *ctx); | |
520 | ||
521 | // Incarnation as seen in MDSMap at the point where a rank is | |
522 | // assigned. | |
523 | int incarnation = 0; | |
524 | ||
525 | // Flag to indicate we entered shutdown: anyone seeing this to be true | |
526 | // after taking mds_lock must drop out. | |
527 | bool stopping = false; | |
528 | ||
529 | // PurgeQueue is only used by StrayManager, but it is owned by MDSRank | |
530 | // because its init/shutdown happens at the top level. | |
531 | PurgeQueue purge_queue; | |
532 | ||
533 | list<cref_t<Message>> waiting_for_nolaggy; | |
534 | MDSContext::que finished_queue; | |
535 | // Dispatch, retry, queues | |
536 | int dispatch_depth = 0; | |
537 | ||
538 | ceph::heartbeat_handle_d *hb = nullptr; // Heartbeat for threads using mds_lock | |
539 | ||
540 | map<mds_rank_t, version_t> peer_mdsmap_epoch; | |
541 | ||
542 | ceph_tid_t last_tid = 0; // for mds-initiated requests (e.g. stray rename) | |
543 | ||
544 | MDSContext::vec waiting_for_active, waiting_for_replay, waiting_for_rejoin, | |
545 | waiting_for_reconnect, waiting_for_resolve; | |
546 | MDSContext::vec waiting_for_any_client_connection; | |
547 | MDSContext::que replay_queue; | |
548 | bool replaying_requests_done = false; | |
549 | ||
550 | map<mds_rank_t, MDSContext::vec > waiting_for_active_peer; | |
551 | map<epoch_t, MDSContext::vec > waiting_for_mdsmap; | |
552 | ||
553 | epoch_t osd_epoch_barrier = 0; | |
554 | ||
555 | // Const reference to the beacon so that we can behave differently | |
556 | // when it's laggy. | |
557 | Beacon &beacon; | |
558 | ||
559 | int mds_slow_req_count = 0; | |
560 | ||
561 | epoch_t last_client_mdsmap_bcast = 0; | |
562 | ||
563 | map<mds_rank_t,DecayCounter> export_targets; /* targets this MDS is exporting to or wants/tries to */ | |
564 | ||
565 | Messenger *messenger; | |
566 | MonClient *monc; | |
567 | MgrClient *mgrc; | |
568 | ||
569 | Context *respawn_hook; | |
570 | Context *suicide_hook; | |
571 | ||
572 | bool standby_replaying = false; // true if current replay pass is in standby-replay mode | |
94b18763 | 573 | private: |
9f95a23c TL |
574 | // "task" string that gets displayed in ceph status |
575 | inline static const std::string SCRUB_STATUS_KEY = "scrub status"; | |
11fdf7f2 | 576 | |
9f95a23c TL |
577 | void get_task_status(std::map<std::string, std::string> *status); |
578 | void schedule_update_timer_task(); | |
579 | void send_task_status(); | |
580 | ||
581 | mono_time starttime = mono_clock::zero(); | |
7c673cae FG |
582 | }; |
583 | ||
584 | /* This expects to be given a reference which it is responsible for. | |
585 | * The finish function calls functions which | |
586 | * will put the Message exactly once.*/ | |
587 | class C_MDS_RetryMessage : public MDSInternalContext { | |
7c673cae | 588 | public: |
9f95a23c | 589 | C_MDS_RetryMessage(MDSRank *mds, const cref_t<Message> &m) |
11fdf7f2 | 590 | : MDSInternalContext(mds), m(m) {} |
7c673cae | 591 | void finish(int r) override { |
11fdf7f2 TL |
592 | get_mds()->retry_dispatch(m); |
593 | } | |
594 | protected: | |
9f95a23c | 595 | cref_t<Message> m; |
11fdf7f2 TL |
596 | }; |
597 | ||
598 | class CF_MDS_RetryMessageFactory : public MDSContextFactory { | |
599 | public: | |
9f95a23c | 600 | CF_MDS_RetryMessageFactory(MDSRank *mds, const cref_t<Message> &m) |
11fdf7f2 TL |
601 | : mds(mds), m(m) {} |
602 | ||
603 | MDSContext *build() { | |
604 | return new C_MDS_RetryMessage(mds, m); | |
7c673cae | 605 | } |
11fdf7f2 TL |
606 | private: |
607 | MDSRank *mds; | |
9f95a23c | 608 | cref_t<Message> m; |
7c673cae FG |
609 | }; |
610 | ||
611 | /** | |
612 | * The aspect of MDSRank exposed to MDSDaemon but not subsystems: i.e. | |
613 | * the service/dispatcher stuff like init/shutdown that subsystems should | |
614 | * never touch. | |
615 | */ | |
92f5a8d4 | 616 | class MDSRankDispatcher : public MDSRank, public md_config_obs_t |
7c673cae FG |
617 | { |
618 | public: | |
9f95a23c TL |
619 | MDSRankDispatcher( |
620 | mds_rank_t whoami_, | |
621 | ceph::mutex &mds_lock_, | |
622 | LogChannelRef &clog_, | |
623 | SafeTimer &timer_, | |
624 | Beacon &beacon_, | |
625 | std::unique_ptr<MDSMap> &mdsmap_, | |
626 | Messenger *msgr, | |
627 | MonClient *monc_, | |
628 | MgrClient *mgrc, | |
629 | Context *respawn_hook_, | |
630 | Context *suicide_hook_); | |
631 | ||
7c673cae FG |
632 | void init(); |
633 | void tick(); | |
634 | void shutdown(); | |
9f95a23c TL |
635 | void handle_asok_command( |
636 | std::string_view command, | |
637 | const cmdmap_t& cmdmap, | |
638 | Formatter *f, | |
639 | const bufferlist &inbl, | |
640 | std::function<void(int,const std::string&,bufferlist&)> on_finish); | |
641 | void handle_mds_map(const cref_t<MMDSMap> &m, const MDSMap &oldmap); | |
7c673cae | 642 | void handle_osd_map(); |
7c673cae FG |
643 | void update_log_config(); |
644 | ||
92f5a8d4 TL |
645 | const char** get_tracked_conf_keys() const override final; |
646 | void handle_conf_change(const ConfigProxy& conf, const std::set<std::string>& changed) override; | |
647 | ||
7c673cae | 648 | void dump_sessions(const SessionFilter &filter, Formatter *f) const; |
9f95a23c TL |
649 | void evict_clients(const SessionFilter &filter, |
650 | std::function<void(int,const std::string&,bufferlist&)> on_finish); | |
7c673cae FG |
651 | |
652 | // Call into me from MDS::ms_dispatch | |
9f95a23c | 653 | bool ms_dispatch(const cref_t<Message> &m); |
7c673cae FG |
654 | }; |
655 | ||
656 | // This utility for MDS and MDSRank dispatchers. | |
657 | #define ALLOW_MESSAGES_FROM(peers) \ | |
658 | do { \ | |
659 | if (m->get_connection() && (m->get_connection()->get_peer_type() & (peers)) == 0) { \ | |
660 | dout(0) << __FILE__ << "." << __LINE__ << ": filtered out request, peer=" << m->get_connection()->get_peer_type() \ | |
661 | << " allowing=" << #peers << " message=" << *m << dendl; \ | |
7c673cae FG |
662 | return true; \ |
663 | } \ | |
664 | } while (0) | |
665 | ||
666 | #endif // MDS_RANK_H_ | |
667 |