]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2014 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef SCRUBSTACK_H_ | |
16 | #define SCRUBSTACK_H_ | |
17 | ||
18 | #include "CDir.h" | |
19 | #include "CDentry.h" | |
20 | #include "CInode.h" | |
21 | #include "MDSContext.h" | |
22 | #include "ScrubHeader.h" | |
23 | ||
9f95a23c | 24 | #include "common/LogClient.h" |
7c673cae | 25 | #include "include/elist.h" |
f67539c2 TL |
26 | #include "messages/MMDSScrub.h" |
27 | #include "messages/MMDSScrubStats.h" | |
7c673cae FG |
28 | |
29 | class MDCache; | |
30 | class Finisher; | |
31 | ||
32 | class ScrubStack { | |
7c673cae | 33 | public: |
9f95a23c TL |
34 | ScrubStack(MDCache *mdc, LogChannelRef &clog, Finisher *finisher_) : |
35 | mdcache(mdc), | |
36 | clog(clog), | |
7c673cae | 37 | finisher(finisher_), |
f67539c2 TL |
38 | scrub_stack(member_offset(MDSCacheObject, item_scrub)), |
39 | scrub_waiting(member_offset(MDSCacheObject, item_scrub)) {} | |
7c673cae | 40 | ~ScrubStack() { |
f67539c2 | 41 | ceph_assert(scrub_stack.empty()); |
11fdf7f2 | 42 | ceph_assert(!scrubs_in_progress); |
7c673cae FG |
43 | } |
44 | /** | |
f67539c2 TL |
45 | * Put the inode at either the top or bottom of the stack, with the |
46 | * given scrub params, and kick off more scrubbing. | |
47 | * @param in The inode to scrub | |
11fdf7f2 | 48 | * @param header The ScrubHeader propagated from wherever this scrub |
7c673cae | 49 | */ |
f67539c2 | 50 | int enqueue(CInode *in, ScrubHeaderRef& header, bool top); |
11fdf7f2 TL |
51 | /** |
52 | * Abort an ongoing scrub operation. The abort operation could be | |
53 | * delayed if there are in-progress scrub operations on going. The | |
54 | * caller should provide a context which is completed after all | |
55 | * in-progress scrub operations are completed and pending inodes | |
56 | * are removed from the scrub stack (with the context callbacks for | |
f67539c2 | 57 | * inodes completed with -CEPHFS_ECANCELED). |
11fdf7f2 TL |
58 | * @param on_finish Context callback to invoke after abort |
59 | */ | |
60 | void scrub_abort(Context *on_finish); | |
61 | ||
62 | /** | |
63 | * Pause scrub operations. Similar to abort, pause is delayed if | |
64 | * there are in-progress scrub operations on going. The caller | |
65 | * should provide a context which is completed after all in-progress | |
66 | * scrub operations are completed. Subsequent scrub operations are | |
67 | * queued until scrub is resumed. | |
68 | * @param on_finish Context callback to invoke after pause | |
69 | */ | |
70 | void scrub_pause(Context *on_finish); | |
71 | ||
72 | /** | |
73 | * Resume a paused scrub. Unlike abort or pause, this is instantaneous. | |
74 | * Pending pause operations are cancelled (context callbacks are | |
f67539c2 TL |
75 | * invoked with -CEPHFS_ECANCELED). |
76 | * @returns 0 (success) if resumed, -CEPHFS_EINVAL if an abort is in-progress. | |
11fdf7f2 TL |
77 | */ |
78 | bool scrub_resume(); | |
79 | ||
80 | /** | |
81 | * Get the current scrub status as human readable string. Some basic | |
82 | * information is returned such as number of inodes pending abort/pause. | |
83 | */ | |
84 | void scrub_status(Formatter *f); | |
85 | ||
9f95a23c TL |
86 | /** |
87 | * Get a high level scrub status summary such as current scrub state | |
88 | * and scrub paths. | |
89 | */ | |
90 | std::string_view scrub_summary(); | |
91 | ||
f91f0fd5 TL |
92 | static bool is_idle(std::string_view state_str) { |
93 | return state_str == "idle"; | |
94 | } | |
95 | ||
f67539c2 TL |
96 | bool is_scrubbing() const { return !scrub_stack.empty(); } |
97 | ||
98 | void advance_scrub_status(); | |
99 | ||
100 | void handle_mds_failure(mds_rank_t mds); | |
101 | ||
102 | void dispatch(const cref_t<Message> &m); | |
92f5a8d4 | 103 | |
aee94f69 TL |
104 | bool remove_inode_if_stacked(CInode *in); |
105 | ||
9f95a23c TL |
106 | MDCache *mdcache; |
107 | ||
108 | protected: | |
9f95a23c TL |
109 | |
110 | // reference to global cluster log client | |
111 | LogChannelRef &clog; | |
112 | ||
113 | /// A finisher needed so that we don't re-enter kick_off_scrubs | |
114 | Finisher *finisher; | |
115 | ||
116 | /// The stack of inodes we want to scrub | |
f67539c2 TL |
117 | elist<MDSCacheObject*> scrub_stack; |
118 | elist<MDSCacheObject*> scrub_waiting; | |
9f95a23c TL |
119 | /// current number of dentries we're actually scrubbing |
120 | int scrubs_in_progress = 0; | |
9f95a23c TL |
121 | int stack_size = 0; |
122 | ||
f67539c2 TL |
123 | struct scrub_remote_t { |
124 | std::string tag; | |
125 | std::set<mds_rank_t> gather_set; | |
126 | }; | |
127 | std::map<CInode*, scrub_remote_t> remote_scrubs; | |
128 | ||
129 | unsigned scrub_epoch = 2; | |
130 | unsigned scrub_epoch_fully_acked = 0; | |
131 | unsigned scrub_epoch_last_abort = 2; | |
132 | // check if any mds is aborting scrub after mds.0 starts | |
133 | bool scrub_any_peer_aborting = true; | |
134 | ||
135 | struct scrub_stat_t { | |
136 | unsigned epoch_acked = 0; | |
137 | std::set<std::string> scrubbing_tags; | |
138 | bool aborting = false; | |
139 | }; | |
140 | std::vector<scrub_stat_t> mds_scrub_stats; | |
141 | ||
142 | std::map<std::string, ScrubHeaderRef> scrubbing_map; | |
9f95a23c | 143 | |
f67539c2 | 144 | friend class C_RetryScrub; |
7c673cae | 145 | private: |
11fdf7f2 TL |
146 | // scrub abort is _not_ a state, rather it's an operation that's |
147 | // performed after in-progress scrubs are finished. | |
148 | enum State { | |
149 | STATE_RUNNING = 0, | |
150 | STATE_IDLE, | |
151 | STATE_PAUSING, | |
152 | STATE_PAUSED, | |
153 | }; | |
154 | friend std::ostream &operator<<(std::ostream &os, const State &state); | |
155 | ||
9f95a23c | 156 | friend class C_InodeValidated; |
11fdf7f2 | 157 | |
f67539c2 | 158 | int _enqueue(MDSCacheObject *obj, ScrubHeaderRef& header, bool top); |
7c673cae | 159 | /** |
f67539c2 | 160 | * Remove the inode/dirfrag from the stack. |
7c673cae | 161 | */ |
f67539c2 TL |
162 | inline void dequeue(MDSCacheObject *obj); |
163 | ||
7c673cae FG |
164 | /** |
165 | * Kick off as many scrubs as are appropriate, based on the current | |
166 | * state of the stack. | |
167 | */ | |
168 | void kick_off_scrubs(); | |
f67539c2 | 169 | |
7c673cae | 170 | /** |
f67539c2 TL |
171 | * Move the inode/dirfrag that can't be scrubbed immediately |
172 | * from scrub queue to waiting list. | |
7c673cae | 173 | */ |
f67539c2 | 174 | void add_to_waiting(MDSCacheObject *obj); |
7c673cae | 175 | /** |
f67539c2 | 176 | * Move the inode/dirfrag back to scrub queue. |
7c673cae | 177 | */ |
f67539c2 | 178 | void remove_from_waiting(MDSCacheObject *obj, bool kick=true); |
7c673cae | 179 | /** |
f67539c2 TL |
180 | * Validate authority of the inode. If current mds is not auth of the inode, |
181 | * forword scrub to auth mds. | |
7c673cae | 182 | */ |
f67539c2 | 183 | bool validate_inode_auth(CInode *in); |
7c673cae FG |
184 | |
185 | /** | |
186 | * Scrub a file inode. | |
11fdf7f2 | 187 | * @param in The inode to scrub |
7c673cae FG |
188 | */ |
189 | void scrub_file_inode(CInode *in); | |
190 | ||
191 | /** | |
192 | * Callback from completion of CInode::validate_disk_state | |
193 | * @param in The inode we were validating | |
194 | * @param r The return status from validate_disk_state | |
195 | * @param result Populated results from validate_disk_state | |
196 | */ | |
197 | void _validate_inode_done(CInode *in, int r, | |
198 | const CInode::validated_data &result); | |
7c673cae FG |
199 | |
200 | /** | |
f67539c2 TL |
201 | * Scrub a directory inode. It queues child dirfrags, then does |
202 | * final scrub of the inode. | |
7c673cae | 203 | * |
f67539c2 | 204 | * @param in The directory indoe to scrub |
11fdf7f2 | 205 | * @param added_children set to true if we pushed some of our children |
f67539c2 | 206 | * @param done set to true if we started to do final scrub |
7c673cae | 207 | */ |
f67539c2 | 208 | void scrub_dir_inode(CInode *in, bool *added_children, bool *done); |
7c673cae | 209 | /** |
f67539c2 TL |
210 | * Scrub a dirfrag. It queues child dentries, then does final |
211 | * scrub of the dirfrag. | |
7c673cae | 212 | * |
f67539c2 TL |
213 | * @param dir The dirfrag to scrub (must be auth) |
214 | * @param done set to true if we started to do final scrub | |
7c673cae | 215 | */ |
f67539c2 | 216 | void scrub_dirfrag(CDir *dir, bool *done); |
7c673cae FG |
217 | /** |
218 | * Scrub a directory-representing dentry. | |
219 | * | |
220 | * @param in The directory inode we're doing final scrub on. | |
221 | */ | |
222 | void scrub_dir_inode_final(CInode *in); | |
11fdf7f2 TL |
223 | /** |
224 | * Set scrub state | |
225 | * @param next_state State to move the scrub to. | |
226 | */ | |
227 | void set_state(State next_state); | |
228 | ||
229 | /** | |
230 | * Is scrub in one of transition states (running, pausing) | |
231 | */ | |
232 | bool scrub_in_transition_state(); | |
233 | ||
234 | /** | |
235 | * complete queued up contexts | |
236 | * @param r return value to complete contexts. | |
237 | */ | |
238 | void complete_control_contexts(int r); | |
239 | ||
f67539c2 TL |
240 | /** |
241 | * ask peer mds (rank > 0) to abort/pause/resume scrubs | |
242 | */ | |
243 | void send_state_message(int op); | |
244 | ||
11fdf7f2 TL |
245 | /** |
246 | * Abort pending scrubs for inodes waiting in the inode stack. | |
f67539c2 | 247 | * Completion context is complete with -CEPHFS_ECANCELED. |
11fdf7f2 TL |
248 | */ |
249 | void abort_pending_scrubs(); | |
9f95a23c TL |
250 | |
251 | /** | |
252 | * Return path for a given inode. | |
253 | * @param in inode to make path entry. | |
254 | */ | |
255 | std::string scrub_inode_path(CInode *in) { | |
256 | std::string path; | |
257 | in->make_path_string(path, true); | |
258 | return (path.empty() ? "/" : path.c_str()); | |
259 | } | |
260 | ||
261 | /** | |
262 | * Send scrub information (queued/finished scrub path and summary) | |
263 | * to cluster log. | |
264 | * @param in inode for which scrub has been queued or finished. | |
265 | */ | |
266 | void clog_scrub_summary(CInode *in=nullptr); | |
267 | ||
f67539c2 TL |
268 | void handle_scrub(const cref_t<MMDSScrub> &m); |
269 | void handle_scrub_stats(const cref_t<MMDSScrubStats> &m); | |
270 | ||
9f95a23c | 271 | State state = STATE_IDLE; |
f67539c2 | 272 | bool clear_stack = false; |
9f95a23c TL |
273 | |
274 | // list of pending context completions for asynchronous scrub | |
275 | // control operations. | |
276 | std::vector<Context *> control_ctxs; | |
7c673cae FG |
277 | }; |
278 | ||
279 | #endif /* SCRUBSTACK_H_ */ |