]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2015 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef MDS_RANK_H_ | |
16 | #define MDS_RANK_H_ | |
17 | ||
11fdf7f2 | 18 | #include <string_view> |
94b18763 | 19 | |
7c673cae FG |
20 | #include "common/DecayCounter.h" |
21 | #include "common/LogClient.h" | |
22 | #include "common/Timer.h" | |
23 | #include "common/TrackedOp.h" | |
24 | ||
11fdf7f2 | 25 | #include "messages/MClientRequest.h" |
7c673cae | 26 | #include "messages/MCommand.h" |
11fdf7f2 | 27 | #include "messages/MMDSMap.h" |
7c673cae FG |
28 | |
29 | #include "Beacon.h" | |
30 | #include "DamageTable.h" | |
31 | #include "MDSMap.h" | |
32 | #include "SessionMap.h" | |
33 | #include "MDCache.h" | |
7c673cae | 34 | #include "MDLog.h" |
11fdf7f2 | 35 | #include "MDSContext.h" |
7c673cae | 36 | #include "PurgeQueue.h" |
91327a77 | 37 | #include "Server.h" |
7c673cae FG |
38 | #include "osdc/Journaler.h" |
39 | ||
40 | // Full .h import instead of forward declaration for PerfCounter, for the | |
41 | // benefit of those including this header and using MDSRank::logger | |
42 | #include "common/perf_counters.h" | |
43 | ||
44 | enum { | |
45 | l_mds_first = 2000, | |
46 | l_mds_request, | |
47 | l_mds_reply, | |
48 | l_mds_reply_latency, | |
49 | l_mds_forward, | |
50 | l_mds_dir_fetch, | |
51 | l_mds_dir_commit, | |
52 | l_mds_dir_split, | |
53 | l_mds_dir_merge, | |
54 | l_mds_inode_max, | |
55 | l_mds_inodes, | |
56 | l_mds_inodes_top, | |
57 | l_mds_inodes_bottom, | |
58 | l_mds_inodes_pin_tail, | |
59 | l_mds_inodes_pinned, | |
60 | l_mds_inodes_expired, | |
61 | l_mds_inodes_with_caps, | |
62 | l_mds_caps, | |
63 | l_mds_subtrees, | |
64 | l_mds_traverse, | |
65 | l_mds_traverse_hit, | |
66 | l_mds_traverse_forward, | |
67 | l_mds_traverse_discover, | |
68 | l_mds_traverse_dir_fetch, | |
69 | l_mds_traverse_remote_ino, | |
70 | l_mds_traverse_lock, | |
71 | l_mds_load_cent, | |
72 | l_mds_dispatch_queue_len, | |
73 | l_mds_exported, | |
74 | l_mds_exported_inodes, | |
75 | l_mds_imported, | |
76 | l_mds_imported_inodes, | |
11fdf7f2 TL |
77 | l_mds_openino_dir_fetch, |
78 | l_mds_openino_backtrace_fetch, | |
79 | l_mds_openino_peer_discover, | |
7c673cae FG |
80 | l_mds_last, |
81 | }; | |
82 | ||
83 | // memory utilization | |
84 | enum { | |
85 | l_mdm_first = 2500, | |
86 | l_mdm_ino, | |
87 | l_mdm_inoa, | |
88 | l_mdm_inos, | |
89 | l_mdm_dir, | |
90 | l_mdm_dira, | |
91 | l_mdm_dirs, | |
92 | l_mdm_dn, | |
93 | l_mdm_dna, | |
94 | l_mdm_dns, | |
95 | l_mdm_cap, | |
96 | l_mdm_capa, | |
97 | l_mdm_caps, | |
98 | l_mdm_rss, | |
99 | l_mdm_heap, | |
7c673cae FG |
100 | l_mdm_last, |
101 | }; | |
102 | ||
103 | namespace ceph { | |
104 | struct heartbeat_handle_d; | |
105 | } | |
106 | ||
7c673cae FG |
107 | class Locker; |
108 | class MDCache; | |
109 | class MDLog; | |
110 | class MDBalancer; | |
111 | class InoTable; | |
112 | class SnapServer; | |
113 | class SnapClient; | |
114 | class MDSTableServer; | |
115 | class MDSTableClient; | |
116 | class Messenger; | |
117 | class Objecter; | |
118 | class MonClient; | |
119 | class Finisher; | |
7c673cae | 120 | class ScrubStack; |
f64942e4 | 121 | class C_MDS_Send_Command_Reply; |
11fdf7f2 | 122 | class C_ExecAndReply; |
7c673cae FG |
123 | |
124 | /** | |
125 | * The public part of this class's interface is what's exposed to all | |
126 | * the various subsystems (server, mdcache, etc), such as pointers | |
127 | * to the other subsystems, and message-sending calls. | |
128 | */ | |
129 | class MDSRank { | |
130 | protected: | |
131 | const mds_rank_t whoami; | |
132 | ||
133 | // Incarnation as seen in MDSMap at the point where a rank is | |
134 | // assigned. | |
135 | int incarnation; | |
136 | ||
137 | public: | |
f64942e4 AA |
138 | |
139 | friend class C_Flush_Journal; | |
140 | friend class C_Drop_Cache; | |
141 | ||
11fdf7f2 TL |
142 | friend class C_CacheDropExecAndReply; |
143 | friend class C_ScrubExecAndReply; | |
144 | friend class C_ScrubControlExecAndReply; | |
145 | ||
7c673cae FG |
146 | mds_rank_t get_nodeid() const { return whoami; } |
147 | int64_t get_metadata_pool(); | |
148 | ||
149 | // Reference to global MDS::mds_lock, so that users of MDSRank don't | |
150 | // carry around references to the outer MDS, and we can substitute | |
151 | // a separate lock here in future potentially. | |
152 | Mutex &mds_lock; | |
153 | ||
94b18763 FG |
154 | mono_time get_starttime() const { |
155 | return starttime; | |
156 | } | |
157 | chrono::duration<double> get_uptime() const { | |
158 | mono_time now = mono_clock::now(); | |
159 | return chrono::duration<double>(now-starttime); | |
160 | } | |
161 | ||
b32b8144 FG |
162 | class CephContext *cct; |
163 | ||
7c673cae FG |
164 | bool is_daemon_stopping() const; |
165 | ||
166 | // Reference to global cluster log client, just to avoid initialising | |
167 | // a separate one here. | |
168 | LogChannelRef &clog; | |
169 | ||
170 | // Reference to global timer utility, because MDSRank and MDSDaemon | |
171 | // currently both use the same mds_lock, so it makes sense for them | |
172 | // to share a timer. | |
173 | SafeTimer &timer; | |
174 | ||
11fdf7f2 | 175 | std::unique_ptr<MDSMap> &mdsmap; /* MDSDaemon::mdsmap */ |
7c673cae FG |
176 | |
177 | Objecter *objecter; | |
178 | ||
179 | // sub systems | |
180 | Server *server; | |
181 | MDCache *mdcache; | |
182 | Locker *locker; | |
183 | MDLog *mdlog; | |
184 | MDBalancer *balancer; | |
185 | ScrubStack *scrubstack; | |
186 | DamageTable damage_table; | |
187 | ||
188 | ||
189 | InoTable *inotable; | |
190 | ||
191 | SnapServer *snapserver; | |
192 | SnapClient *snapclient; | |
193 | ||
194 | MDSTableClient *get_table_client(int t); | |
195 | MDSTableServer *get_table_server(int t); | |
196 | ||
197 | SessionMap sessionmap; | |
198 | Session *get_session(client_t client) { | |
199 | return sessionmap.get_session(entity_name_t::CLIENT(client.v)); | |
200 | } | |
11fdf7f2 | 201 | Session *get_session(const Message::const_ref &m); |
7c673cae FG |
202 | |
203 | PerfCounters *logger, *mlogger; | |
204 | OpTracker op_tracker; | |
205 | ||
206 | // The last different state I held before current | |
207 | MDSMap::DaemonState last_state; | |
208 | // The state assigned to me by the MDSMap | |
209 | MDSMap::DaemonState state; | |
210 | ||
211 | bool cluster_degraded; | |
212 | ||
213 | MDSMap::DaemonState get_state() const { return state; } | |
214 | MDSMap::DaemonState get_want_state() const { return beacon.get_want_state(); } | |
215 | ||
216 | bool is_creating() const { return state == MDSMap::STATE_CREATING; } | |
217 | bool is_starting() const { return state == MDSMap::STATE_STARTING; } | |
218 | bool is_standby() const { return state == MDSMap::STATE_STANDBY; } | |
219 | bool is_replay() const { return state == MDSMap::STATE_REPLAY; } | |
220 | bool is_standby_replay() const { return state == MDSMap::STATE_STANDBY_REPLAY; } | |
221 | bool is_resolve() const { return state == MDSMap::STATE_RESOLVE; } | |
222 | bool is_reconnect() const { return state == MDSMap::STATE_RECONNECT; } | |
223 | bool is_rejoin() const { return state == MDSMap::STATE_REJOIN; } | |
224 | bool is_clientreplay() const { return state == MDSMap::STATE_CLIENTREPLAY; } | |
225 | bool is_active() const { return state == MDSMap::STATE_ACTIVE; } | |
226 | bool is_stopping() const { return state == MDSMap::STATE_STOPPING; } | |
227 | bool is_any_replay() const { return (is_replay() || is_standby_replay()); } | |
228 | bool is_stopped() const { return mdsmap->is_stopped(whoami); } | |
229 | bool is_cluster_degraded() const { return cluster_degraded; } | |
11fdf7f2 | 230 | bool allows_multimds_snaps() const { return mdsmap->allows_multimds_snaps(); } |
7c673cae FG |
231 | |
232 | void handle_write_error(int err); | |
233 | ||
11fdf7f2 | 234 | void handle_conf_change(const ConfigProxy& conf, |
7c673cae FG |
235 | const std::set <std::string> &changed) |
236 | { | |
91327a77 AA |
237 | sessionmap.handle_conf_change(conf, changed); |
238 | server->handle_conf_change(conf, changed); | |
11fdf7f2 | 239 | mdcache->handle_conf_change(conf, changed, *mdsmap); |
7c673cae FG |
240 | purge_queue.handle_conf_change(conf, changed, *mdsmap); |
241 | } | |
242 | ||
c07f9fc5 | 243 | void update_mlogger(); |
7c673cae FG |
244 | protected: |
245 | // Flag to indicate we entered shutdown: anyone seeing this to be true | |
246 | // after taking mds_lock must drop out. | |
247 | bool stopping; | |
248 | ||
249 | // PurgeQueue is only used by StrayManager, but it is owned by MDSRank | |
250 | // because its init/shutdown happens at the top level. | |
251 | PurgeQueue purge_queue; | |
252 | ||
253 | class ProgressThread : public Thread { | |
254 | MDSRank *mds; | |
255 | Cond cond; | |
256 | public: | |
257 | explicit ProgressThread(MDSRank *mds_) : mds(mds_) {} | |
258 | void * entry() override; | |
259 | void shutdown(); | |
260 | void signal() {cond.Signal();} | |
261 | } progress_thread; | |
262 | ||
11fdf7f2 TL |
263 | list<Message::const_ref> waiting_for_nolaggy; |
264 | MDSContext::que finished_queue; | |
7c673cae FG |
265 | // Dispatch, retry, queues |
266 | int dispatch_depth; | |
267 | void inc_dispatch_depth() { ++dispatch_depth; } | |
268 | void dec_dispatch_depth() { --dispatch_depth; } | |
11fdf7f2 TL |
269 | void retry_dispatch(const Message::const_ref &m); |
270 | bool handle_deferrable_message(const Message::const_ref &m); | |
7c673cae | 271 | void _advance_queues(); |
11fdf7f2 | 272 | bool _dispatch(const Message::const_ref &m, bool new_msg); |
7c673cae FG |
273 | |
274 | ceph::heartbeat_handle_d *hb; // Heartbeat for threads using mds_lock | |
275 | ||
11fdf7f2 | 276 | bool is_stale_message(const Message::const_ref &m) const; |
7c673cae FG |
277 | |
278 | map<mds_rank_t, version_t> peer_mdsmap_epoch; | |
279 | ||
280 | ceph_tid_t last_tid; // for mds-initiated requests (e.g. stray rename) | |
281 | ||
11fdf7f2 TL |
282 | MDSContext::vec waiting_for_active, waiting_for_replay, waiting_for_rejoin, |
283 | waiting_for_reconnect, waiting_for_resolve; | |
284 | MDSContext::vec waiting_for_any_client_connection; | |
285 | MDSContext::que replay_queue; | |
286 | bool replaying_requests_done = false; | |
287 | ||
288 | map<mds_rank_t, MDSContext::vec > waiting_for_active_peer; | |
289 | map<epoch_t, MDSContext::vec > waiting_for_mdsmap; | |
7c673cae FG |
290 | |
291 | epoch_t osd_epoch_barrier; | |
292 | ||
293 | // Const reference to the beacon so that we can behave differently | |
294 | // when it's laggy. | |
295 | Beacon &beacon; | |
296 | ||
297 | /** | |
298 | * Emit clog warnings for any ops reported as warnings by optracker | |
299 | */ | |
300 | void check_ops_in_flight(); | |
301 | ||
302 | int mds_slow_req_count; | |
303 | ||
304 | /** | |
305 | * Share MDSMap with clients | |
306 | */ | |
307 | void bcast_mds_map(); // to mounted clients | |
308 | epoch_t last_client_mdsmap_bcast; | |
309 | ||
310 | map<mds_rank_t,DecayCounter> export_targets; /* targets this MDS is exporting to or wants/tries to */ | |
311 | ||
312 | void create_logger(); | |
313 | public: | |
314 | ||
11fdf7f2 | 315 | void queue_waiter(MDSContext *c) { |
91327a77 AA |
316 | finished_queue.push_back(c); |
317 | progress_thread.signal(); | |
318 | } | |
494da23a TL |
319 | void queue_waiter_front(MDSContext *c) { |
320 | finished_queue.push_front(c); | |
321 | progress_thread.signal(); | |
322 | } | |
11fdf7f2 TL |
323 | void queue_waiters(MDSContext::vec& ls) { |
324 | MDSContext::vec v; | |
325 | v.swap(ls); | |
326 | std::copy(v.begin(), v.end(), std::back_inserter(finished_queue)); | |
7c673cae FG |
327 | progress_thread.signal(); |
328 | } | |
11fdf7f2 TL |
329 | void queue_waiters_front(MDSContext::vec& ls) { |
330 | MDSContext::vec v; | |
331 | v.swap(ls); | |
332 | std::copy(v.rbegin(), v.rend(), std::front_inserter(finished_queue)); | |
91327a77 AA |
333 | progress_thread.signal(); |
334 | } | |
7c673cae FG |
335 | |
336 | MDSRank( | |
337 | mds_rank_t whoami_, | |
338 | Mutex &mds_lock_, | |
339 | LogChannelRef &clog_, | |
340 | SafeTimer &timer_, | |
341 | Beacon &beacon_, | |
11fdf7f2 | 342 | std::unique_ptr<MDSMap> & mdsmap_, |
7c673cae FG |
343 | Messenger *msgr, |
344 | MonClient *monc_, | |
345 | Context *respawn_hook_, | |
346 | Context *suicide_hook_); | |
347 | ||
348 | protected: | |
349 | ~MDSRank(); | |
350 | ||
351 | public: | |
352 | ||
353 | // Daemon lifetime functions: these guys break the abstraction | |
354 | // and call up into the parent MDSDaemon instance. It's kind | |
355 | // of unavoidable: if we want any depth into our calls | |
356 | // to be able to e.g. tear down the whole process, we have to | |
357 | // have a reference going all the way down. | |
358 | // >>> | |
359 | void suicide(); | |
360 | void respawn(); | |
361 | // <<< | |
362 | ||
363 | /** | |
364 | * Call this periodically if inside a potentially long running piece | |
365 | * of code while holding the mds_lock | |
366 | */ | |
367 | void heartbeat_reset(); | |
368 | ||
369 | /** | |
370 | * Report state DAMAGED to the mon, and then pass on to respawn(). Call | |
371 | * this when an unrecoverable error is encountered while attempting | |
372 | * to load an MDS rank's data structures. This is *not* for use with | |
373 | * errors affecting normal dirfrag/inode objects -- they should be handled | |
374 | * through cleaner scrub/repair mechanisms. | |
375 | * | |
376 | * Callers must already hold mds_lock. | |
377 | */ | |
378 | void damaged(); | |
379 | ||
380 | /** | |
381 | * Wrapper around `damaged` for users who are not | |
382 | * already holding mds_lock. | |
383 | * | |
384 | * Callers must not already hold mds_lock. | |
385 | */ | |
386 | void damaged_unlocked(); | |
387 | ||
91327a77 AA |
388 | double last_cleared_laggy() const { |
389 | return beacon.last_cleared_laggy(); | |
390 | } | |
391 | ||
392 | double get_dispatch_queue_max_age(utime_t now) const; | |
7c673cae | 393 | |
11fdf7f2 TL |
394 | void send_message_mds(const Message::ref& m, mds_rank_t mds); |
395 | void forward_message_mds(const MClientRequest::const_ref& req, mds_rank_t mds); | |
396 | void send_message_client_counted(const Message::ref& m, client_t client); | |
397 | void send_message_client_counted(const Message::ref& m, Session* session); | |
398 | void send_message_client_counted(const Message::ref& m, const ConnectionRef& connection); | |
399 | void send_message_client(const Message::ref& m, Session* session); | |
400 | void send_message(const Message::ref& m, const ConnectionRef& c); | |
7c673cae | 401 | |
11fdf7f2 | 402 | void wait_for_active_peer(mds_rank_t who, MDSContext *c) { |
7c673cae FG |
403 | waiting_for_active_peer[who].push_back(c); |
404 | } | |
11fdf7f2 TL |
405 | void wait_for_cluster_recovered(MDSContext *c) { |
406 | ceph_assert(cluster_degraded); | |
7c673cae FG |
407 | waiting_for_active_peer[MDS_RANK_NONE].push_back(c); |
408 | } | |
409 | ||
11fdf7f2 | 410 | void wait_for_any_client_connection(MDSContext *c) { |
28e407b8 AA |
411 | waiting_for_any_client_connection.push_back(c); |
412 | } | |
413 | void kick_waiters_for_any_client_connection(void) { | |
414 | finish_contexts(g_ceph_context, waiting_for_any_client_connection); | |
415 | } | |
11fdf7f2 | 416 | void wait_for_active(MDSContext *c) { |
7c673cae FG |
417 | waiting_for_active.push_back(c); |
418 | } | |
11fdf7f2 | 419 | void wait_for_replay(MDSContext *c) { |
7c673cae FG |
420 | waiting_for_replay.push_back(c); |
421 | } | |
11fdf7f2 | 422 | void wait_for_rejoin(MDSContext *c) { |
a8e16298 TL |
423 | waiting_for_rejoin.push_back(c); |
424 | } | |
11fdf7f2 | 425 | void wait_for_reconnect(MDSContext *c) { |
7c673cae FG |
426 | waiting_for_reconnect.push_back(c); |
427 | } | |
11fdf7f2 | 428 | void wait_for_resolve(MDSContext *c) { |
7c673cae FG |
429 | waiting_for_resolve.push_back(c); |
430 | } | |
11fdf7f2 | 431 | void wait_for_mdsmap(epoch_t e, MDSContext *c) { |
7c673cae FG |
432 | waiting_for_mdsmap[e].push_back(c); |
433 | } | |
11fdf7f2 | 434 | void enqueue_replay(MDSContext *c) { |
7c673cae FG |
435 | replay_queue.push_back(c); |
436 | } | |
437 | ||
438 | bool queue_one_replay(); | |
11fdf7f2 | 439 | void maybe_clientreplay_done(); |
7c673cae FG |
440 | |
441 | void set_osd_epoch_barrier(epoch_t e); | |
442 | epoch_t get_osd_epoch_barrier() const {return osd_epoch_barrier;} | |
443 | epoch_t get_osd_epoch() const; | |
444 | ||
445 | ceph_tid_t issue_tid() { return ++last_tid; } | |
446 | ||
447 | Finisher *finisher; | |
448 | ||
11fdf7f2 | 449 | MDSMap *get_mds_map() { return mdsmap.get(); } |
7c673cae | 450 | |
28e407b8 | 451 | uint64_t get_num_requests() const { return logger->get(l_mds_request); } |
7c673cae FG |
452 | |
453 | int get_mds_slow_req_count() const { return mds_slow_req_count; } | |
454 | ||
455 | void dump_status(Formatter *f) const; | |
456 | ||
11fdf7f2 | 457 | void hit_export_target(mds_rank_t rank, double amount=-1.0); |
7c673cae FG |
458 | bool is_export_target(mds_rank_t rank) { |
459 | const set<mds_rank_t>& map_targets = mdsmap->get_mds_info(get_nodeid()).export_targets; | |
460 | return map_targets.count(rank); | |
461 | } | |
462 | ||
31f18b77 | 463 | bool evict_client(int64_t session_id, bool wait, bool blacklist, |
11fdf7f2 TL |
464 | std::ostream& ss, Context *on_killed=nullptr); |
465 | ||
466 | void mark_base_recursively_scrubbed(inodeno_t ino); | |
31f18b77 | 467 | |
7c673cae FG |
468 | protected: |
469 | void dump_clientreplay_status(Formatter *f) const; | |
11fdf7f2 TL |
470 | void command_scrub_start(Formatter *f, |
471 | std::string_view path, std::string_view tag, | |
472 | const vector<string>& scrubop_vec, Context *on_finish); | |
473 | void command_tag_path(Formatter *f, std::string_view path, | |
474 | std::string_view tag); | |
475 | // scrub control commands | |
476 | void command_scrub_abort(Formatter *f, Context *on_finish); | |
477 | void command_scrub_pause(Formatter *f, Context *on_finish); | |
478 | void command_scrub_resume(Formatter *f); | |
479 | void command_scrub_status(Formatter *f); | |
480 | ||
481 | void command_flush_path(Formatter *f, std::string_view path); | |
7c673cae FG |
482 | void command_flush_journal(Formatter *f); |
483 | void command_get_subtrees(Formatter *f); | |
484 | void command_export_dir(Formatter *f, | |
11fdf7f2 | 485 | std::string_view path, mds_rank_t dest); |
7c673cae FG |
486 | bool command_dirfrag_split( |
487 | cmdmap_t cmdmap, | |
488 | std::ostream &ss); | |
489 | bool command_dirfrag_merge( | |
490 | cmdmap_t cmdmap, | |
491 | std::ostream &ss); | |
492 | bool command_dirfrag_ls( | |
493 | cmdmap_t cmdmap, | |
494 | std::ostream &ss, | |
495 | Formatter *f); | |
11fdf7f2 | 496 | int _command_export_dir(std::string_view path, mds_rank_t dest); |
7c673cae FG |
497 | CDir *_command_dirfrag_get( |
498 | const cmdmap_t &cmdmap, | |
499 | std::ostream &ss); | |
11fdf7f2 TL |
500 | void command_openfiles_ls(Formatter *f); |
501 | void command_dump_tree(const cmdmap_t &cmdmap, std::ostream &ss, Formatter *f); | |
502 | void command_dump_inode(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss); | |
f64942e4 AA |
503 | void command_cache_drop(uint64_t timeout, Formatter *f, Context *on_finish); |
504 | ||
7c673cae FG |
505 | protected: |
506 | Messenger *messenger; | |
507 | MonClient *monc; | |
508 | ||
509 | Context *respawn_hook; | |
510 | Context *suicide_hook; | |
511 | ||
512 | // Friended to access retry_dispatch | |
513 | friend class C_MDS_RetryMessage; | |
514 | ||
515 | // FIXME the state machine logic should be separable from the dispatch | |
516 | // logic that calls it. | |
517 | // >>> | |
518 | void calc_recovery_set(); | |
519 | void request_state(MDSMap::DaemonState s); | |
520 | ||
521 | bool standby_replaying; // true if current replay pass is in standby-replay mode | |
522 | ||
523 | typedef enum { | |
524 | // The MDSMap is available, configure default layouts and structures | |
525 | MDS_BOOT_INITIAL = 0, | |
526 | // We are ready to open some inodes | |
527 | MDS_BOOT_OPEN_ROOT, | |
528 | // We are ready to do a replay if needed | |
529 | MDS_BOOT_PREPARE_LOG, | |
530 | // Replay is complete | |
531 | MDS_BOOT_REPLAY_DONE | |
532 | } BootStep; | |
533 | friend class C_MDS_BootStart; | |
534 | friend class C_MDS_InternalBootStart; | |
535 | void boot_create(); // i am new mds. | |
536 | void boot_start(BootStep step=MDS_BOOT_INITIAL, int r=0); // starting|replay | |
537 | ||
538 | void replay_start(); | |
539 | void creating_done(); | |
540 | void starting_done(); | |
541 | void replay_done(); | |
542 | void standby_replay_restart(); | |
543 | void _standby_replay_restart_finish(int r, uint64_t old_read_pos); | |
544 | class C_MDS_StandbyReplayRestart; | |
545 | class C_MDS_StandbyReplayRestartFinish; | |
546 | ||
547 | void reopen_log(); | |
548 | ||
549 | void resolve_start(); | |
550 | void resolve_done(); | |
551 | void reconnect_start(); | |
552 | void reconnect_done(); | |
553 | void rejoin_joint_start(); | |
554 | void rejoin_start(); | |
555 | void rejoin_done(); | |
556 | void recovery_done(int oldstate); | |
557 | void clientreplay_start(); | |
558 | void clientreplay_done(); | |
559 | void active_start(); | |
560 | void stopping_start(); | |
561 | void stopping_done(); | |
562 | ||
563 | void validate_sessions(); | |
564 | // <<< | |
565 | ||
566 | // >>> | |
567 | void handle_mds_recovery(mds_rank_t who); | |
568 | void handle_mds_failure(mds_rank_t who); | |
569 | // <<< | |
570 | ||
571 | /* Update MDSMap export_targets for this rank. Called on ::tick(). */ | |
11fdf7f2 | 572 | void update_targets(); |
94b18763 | 573 | |
11fdf7f2 TL |
574 | friend class C_MDS_MonCommand; |
575 | void _mon_command_finish(int r, std::string_view cmd, std::string_view outs); | |
576 | void set_mdsmap_multimds_snaps_allowed(); | |
94b18763 FG |
577 | private: |
578 | mono_time starttime = mono_clock::zero(); | |
11fdf7f2 TL |
579 | |
580 | protected: | |
581 | Context *create_async_exec_context(C_ExecAndReply *ctx); | |
7c673cae FG |
582 | }; |
583 | ||
584 | /* This expects to be given a reference which it is responsible for. | |
585 | * The finish function calls functions which | |
586 | * will put the Message exactly once.*/ | |
587 | class C_MDS_RetryMessage : public MDSInternalContext { | |
7c673cae | 588 | public: |
11fdf7f2 TL |
589 | C_MDS_RetryMessage(MDSRank *mds, const Message::const_ref &m) |
590 | : MDSInternalContext(mds), m(m) {} | |
7c673cae | 591 | void finish(int r) override { |
11fdf7f2 TL |
592 | get_mds()->retry_dispatch(m); |
593 | } | |
594 | protected: | |
595 | Message::const_ref m; | |
596 | }; | |
597 | ||
598 | class CF_MDS_RetryMessageFactory : public MDSContextFactory { | |
599 | public: | |
600 | CF_MDS_RetryMessageFactory(MDSRank *mds, const Message::const_ref &m) | |
601 | : mds(mds), m(m) {} | |
602 | ||
603 | MDSContext *build() { | |
604 | return new C_MDS_RetryMessage(mds, m); | |
7c673cae | 605 | } |
11fdf7f2 TL |
606 | |
607 | private: | |
608 | MDSRank *mds; | |
609 | Message::const_ref m; | |
7c673cae FG |
610 | }; |
611 | ||
612 | /** | |
613 | * The aspect of MDSRank exposed to MDSDaemon but not subsystems: i.e. | |
614 | * the service/dispatcher stuff like init/shutdown that subsystems should | |
615 | * never touch. | |
616 | */ | |
617 | class MDSRankDispatcher : public MDSRank | |
618 | { | |
619 | public: | |
620 | void init(); | |
621 | void tick(); | |
622 | void shutdown(); | |
11fdf7f2 | 623 | bool handle_asok_command(std::string_view command, const cmdmap_t& cmdmap, |
7c673cae | 624 | Formatter *f, std::ostream& ss); |
11fdf7f2 | 625 | void handle_mds_map(const MMDSMap::const_ref &m, const MDSMap &oldmap); |
7c673cae | 626 | void handle_osd_map(); |
7c673cae FG |
627 | void update_log_config(); |
628 | ||
629 | bool handle_command( | |
630 | const cmdmap_t &cmdmap, | |
11fdf7f2 | 631 | const MCommand::const_ref &m, |
7c673cae FG |
632 | int *r, |
633 | std::stringstream *ds, | |
634 | std::stringstream *ss, | |
f64942e4 | 635 | Context **run_later, |
7c673cae FG |
636 | bool *need_reply); |
637 | ||
638 | void dump_sessions(const SessionFilter &filter, Formatter *f) const; | |
11fdf7f2 | 639 | void evict_clients(const SessionFilter &filter, const MCommand::const_ref &m); |
7c673cae FG |
640 | |
641 | // Call into me from MDS::ms_dispatch | |
11fdf7f2 | 642 | bool ms_dispatch(const Message::const_ref &m); |
7c673cae FG |
643 | |
644 | MDSRankDispatcher( | |
645 | mds_rank_t whoami_, | |
646 | Mutex &mds_lock_, | |
647 | LogChannelRef &clog_, | |
648 | SafeTimer &timer_, | |
649 | Beacon &beacon_, | |
11fdf7f2 | 650 | std::unique_ptr<MDSMap> &mdsmap_, |
7c673cae FG |
651 | Messenger *msgr, |
652 | MonClient *monc_, | |
653 | Context *respawn_hook_, | |
654 | Context *suicide_hook_); | |
655 | }; | |
656 | ||
657 | // This utility for MDS and MDSRank dispatchers. | |
658 | #define ALLOW_MESSAGES_FROM(peers) \ | |
659 | do { \ | |
660 | if (m->get_connection() && (m->get_connection()->get_peer_type() & (peers)) == 0) { \ | |
661 | dout(0) << __FILE__ << "." << __LINE__ << ": filtered out request, peer=" << m->get_connection()->get_peer_type() \ | |
662 | << " allowing=" << #peers << " message=" << *m << dendl; \ | |
7c673cae FG |
663 | return true; \ |
664 | } \ | |
665 | } while (0) | |
666 | ||
667 | #endif // MDS_RANK_H_ | |
668 |