]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/scrubber_common.h
import ceph quincy 17.2.1
[ceph.git] / ceph / src / osd / scrubber_common.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #pragma once
4
5 #include "common/scrub_types.h"
6 #include "include/types.h"
7 #include "os/ObjectStore.h"
8
9 #include "OpRequest.h"
10
11 namespace ceph {
12 class Formatter;
13 }
14
15 namespace Scrub {
16
17 /// high/low OP priority
18 enum class scrub_prio_t : bool { low_priority = false, high_priority = true };
19
20 /// Identifies a specific scrub activation within an interval,
21 /// see ScrubPGgIF::m_current_token
22 using act_token_t = uint32_t;
23
24 /// "environment" preconditions affecting which PGs are eligible for scrubbing
25 struct ScrubPreconds {
26 bool allow_requested_repair_only{false};
27 bool load_is_low{true};
28 bool time_permit{true};
29 bool only_deadlined{false};
30 };
31
32 } // namespace Scrub
33
34
35 /**
36 * Flags affecting the scheduling and behaviour of the *next* scrub.
37 *
38 * we hold two of these flag collections: one
39 * for the next scrub, and one frozen at initiation (i.e. in pg::queue_scrub())
40 */
41 struct requested_scrub_t {
42
43 // flags to indicate explicitly requested scrubs (by admin):
44 // bool must_scrub, must_deep_scrub, must_repair, need_auto;
45
46 /**
47 * 'must_scrub' is set by an admin command (or by need_auto).
48 * Affects the priority of the scrubbing, and the sleep periods
49 * during the scrub.
50 */
51 bool must_scrub{false};
52
53 /**
54 * scrub must not be aborted.
55 * Set for explicitly requested scrubs, and for scrubs originated by the pairing
56 * process with the 'repair' flag set (in the RequestScrub event).
57 *
58 * Will be copied into the 'required' scrub flag upon scrub start.
59 */
60 bool req_scrub{false};
61
62 /**
63 * Set from:
64 * - scrub_requested() with need_auto param set, which only happens in
65 * - scrub_finish() - if deep_scrub_on_error is set, and we have errors
66 *
67 * If set, will prevent the OSD from casually postponing our scrub. When scrubbing
68 * starts, will cause must_scrub, must_deep_scrub and auto_repair to be set.
69 */
70 bool need_auto{false};
71
72 /**
73 * Set for scrub-after-recovery just before we initiate the recovery deep scrub,
74 * or if scrub_requested() was called with either need_auto ot repair.
75 * Affects PG_STATE_DEEP_SCRUB.
76 */
77 bool must_deep_scrub{false};
78
79 /**
80 * (An intermediary flag used by pg::sched_scrub() on the first time
81 * a planned scrub has all its resources). Determines whether the next
82 * repair/scrub will be 'deep'.
83 *
84 * Note: 'dumped' by PgScrubber::dump() and such. In reality, being a
85 * temporary that is set and reset by the same operation, will never
86 * appear externally to be set
87 */
88 bool time_for_deep{false};
89
90 bool deep_scrub_on_error{false};
91
92 /**
93 * If set, we should see must_deep_scrub and must_repair set, too
94 *
95 * - 'must_repair' is checked by the OSD when scheduling the scrubs.
96 * - also checked & cleared at pg::queue_scrub()
97 */
98 bool must_repair{false};
99
100 /*
101 * the value of auto_repair is determined in sched_scrub() (once per scrub. previous
102 * value is not remembered). Set if
103 * - allowed by configuration and backend, and
104 * - must_scrub is not set (i.e. - this is a periodic scrub),
105 * - time_for_deep was just set
106 */
107 bool auto_repair{false};
108
109 /**
110 * indicating that we are scrubbing post repair to verify everything is fixed.
111 * Otherwise - PG_STATE_FAILED_REPAIR will be asserted.
112 */
113 bool check_repair{false};
114 };
115
116 std::ostream& operator<<(std::ostream& out, const requested_scrub_t& sf);
117
118 /**
119 * The interface used by the PG when requesting scrub-related info or services
120 */
121 struct ScrubPgIF {
122
123 virtual ~ScrubPgIF() = default;
124
125 friend std::ostream& operator<<(std::ostream& out, const ScrubPgIF& s) { return s.show(out); }
126
127 virtual std::ostream& show(std::ostream& out) const = 0;
128
129 // --------------- triggering state-machine events:
130
131 virtual void initiate_regular_scrub(epoch_t epoch_queued) = 0;
132
133 virtual void initiate_scrub_after_repair(epoch_t epoch_queued) = 0;
134
135 virtual void send_scrub_resched(epoch_t epoch_queued) = 0;
136
137 virtual void active_pushes_notification(epoch_t epoch_queued) = 0;
138
139 virtual void update_applied_notification(epoch_t epoch_queued) = 0;
140
141 virtual void digest_update_notification(epoch_t epoch_queued) = 0;
142
143 virtual void send_scrub_unblock(epoch_t epoch_queued) = 0;
144
145 virtual void send_replica_maps_ready(epoch_t epoch_queued) = 0;
146
147 virtual void send_replica_pushes_upd(epoch_t epoch_queued) = 0;
148
149 virtual void send_start_replica(epoch_t epoch_queued, Scrub::act_token_t token) = 0;
150
151 virtual void send_sched_replica(epoch_t epoch_queued, Scrub::act_token_t token) = 0;
152
153 virtual void send_full_reset(epoch_t epoch_queued) = 0;
154
155 virtual void send_chunk_free(epoch_t epoch_queued) = 0;
156
157 virtual void send_chunk_busy(epoch_t epoch_queued) = 0;
158
159 virtual void send_local_map_done(epoch_t epoch_queued) = 0;
160
161 virtual void send_get_next_chunk(epoch_t epoch_queued) = 0;
162
163 virtual void send_scrub_is_finished(epoch_t epoch_queued) = 0;
164
165 virtual void send_maps_compared(epoch_t epoch_queued) = 0;
166
167 virtual void on_applied_when_primary(const eversion_t &applied_version) = 0;
168
169 // --------------------------------------------------
170
171 [[nodiscard]] virtual bool are_callbacks_pending()
172 const = 0; // currently only used for an assert
173
174 /**
175 * the scrubber is marked 'active':
176 * - for the primary: when all replica OSDs grant us the requested resources
177 * - for replicas: upon receiving the scrub request from the primary
178 */
179 [[nodiscard]] virtual bool is_scrub_active() const = 0;
180
181 /**
182 * 'true' until after the FSM processes the 'scrub-finished' event,
183 * and scrubbing is completely cleaned-up.
184 *
185 * In other words - holds longer than is_scrub_active(), thus preventing
186 * a rescrubbing of the same PG while the previous scrub has not fully
187 * terminated.
188 */
189 [[nodiscard]] virtual bool is_queued_or_active() const = 0;
190
191 /**
192 * Manipulate the 'scrubbing request has been queued, or - we are
193 * actually scrubbing' Scrubber's flag
194 *
195 * clear_queued_or_active() will also restart any blocked snaptrimming.
196 */
197 virtual void set_queued_or_active() = 0;
198 virtual void clear_queued_or_active() = 0;
199
200 /// are we waiting for resource reservation grants form our replicas?
201 [[nodiscard]] virtual bool is_reserving() const = 0;
202
203 /// handle a message carrying a replica map
204 virtual void map_from_replica(OpRequestRef op) = 0;
205
206 virtual void replica_scrub_op(OpRequestRef op) = 0;
207
208 virtual void set_op_parameters(requested_scrub_t&) = 0;
209
210 virtual void scrub_clear_state() = 0;
211
212 virtual void handle_query_state(ceph::Formatter* f) = 0;
213
214 virtual pg_scrubbing_status_t get_schedule() const = 0;
215
216 virtual void dump_scrubber(ceph::Formatter* f,
217 const requested_scrub_t& request_flags) const = 0;
218
219 /**
220 * Return true if soid is currently being scrubbed and pending IOs should block.
221 * May have a side effect of preempting an in-progress scrub -- will return false
222 * in that case.
223 *
224 * @param soid object to check for ongoing scrub
225 * @return boolean whether a request on soid should block until scrub completion
226 */
227 virtual bool write_blocked_by_scrub(const hobject_t& soid) = 0;
228
229 /// Returns whether any objects in the range [begin, end] are being scrubbed
230 virtual bool range_intersects_scrub(const hobject_t& start, const hobject_t& end) = 0;
231
232 /// the op priority, taken from the primary's request message
233 virtual Scrub::scrub_prio_t replica_op_priority() const = 0;
234
235 /// the priority of the on-going scrub (used when requeuing events)
236 virtual unsigned int scrub_requeue_priority(
237 Scrub::scrub_prio_t with_priority) const = 0;
238 virtual unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority,
239 unsigned int suggested_priority) const = 0;
240
241 virtual void add_callback(Context* context) = 0;
242
243 /// add to scrub statistics, but only if the soid is below the scrub start
244 virtual void stats_of_handled_objects(const object_stat_sum_t& delta_stats,
245 const hobject_t& soid) = 0;
246
247 /**
248 * the version of 'scrub_clear_state()' that does not try to invoke FSM services
249 * (thus can be called from FSM reactions)
250 */
251 virtual void clear_pgscrub_state() = 0;
252
253 /**
254 * triggers the 'RemotesReserved' (all replicas granted scrub resources)
255 * state-machine event
256 */
257 virtual void send_remotes_reserved(epoch_t epoch_queued) = 0;
258
259 /**
260 * triggers the 'ReservationFailure' (at least one replica denied us the requested
261 * resources) state-machine event
262 */
263 virtual void send_reservation_failure(epoch_t epoch_queued) = 0;
264
265 virtual void cleanup_store(ObjectStore::Transaction* t) = 0;
266
267 virtual bool get_store_errors(const scrub_ls_arg_t& arg,
268 scrub_ls_result_t& res_inout) const = 0;
269
270 // --------------- reservations -----------------------------------
271
272 /**
273 * message all replicas with a request to "unreserve" scrub
274 */
275 virtual void unreserve_replicas() = 0;
276
277 /**
278 * "forget" all replica reservations. No messages are sent to the
279 * previously-reserved.
280 *
281 * Used upon interval change. The replicas' state is guaranteed to
282 * be reset separately by the interval-change event.
283 */
284 virtual void discard_replica_reservations() = 0;
285
286 /**
287 * clear both local and OSD-managed resource reservation flags
288 */
289 virtual void clear_scrub_reservations() = 0;
290
291 /**
292 * Reserve local scrub resources (managed by the OSD)
293 *
294 * Fails if OSD's local-scrubs budget was exhausted
295 * \returns were local resources reserved?
296 */
297 virtual bool reserve_local() = 0;
298
299 /**
300 * Register/de-register with the OSD scrub queue
301 *
302 * Following our status as Primary or replica.
303 */
304 virtual void on_primary_change(const requested_scrub_t& request_flags) = 0;
305
306 /**
307 * Recalculate the required scrub time.
308 *
309 * This function assumes that the queue registration status is up-to-date,
310 * i.e. the OSD "knows our name" if-f we are the Primary.
311 */
312 virtual void update_scrub_job(const requested_scrub_t& request_flags) = 0;
313
314 virtual void on_maybe_registration_change(const requested_scrub_t& request_flags) = 0;
315
316 // on the replica:
317 virtual void handle_scrub_reserve_request(OpRequestRef op) = 0;
318 virtual void handle_scrub_reserve_release(OpRequestRef op) = 0;
319
320 // and on the primary:
321 virtual void handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from) = 0;
322 virtual void handle_scrub_reserve_reject(OpRequestRef op, pg_shard_t from) = 0;
323
324 virtual void rm_from_osd_scrubbing() = 0;
325
326 virtual void scrub_requested(scrub_level_t scrub_level,
327 scrub_type_t scrub_type,
328 requested_scrub_t& req_flags) = 0;
329
330 // --------------- debugging via the asok ------------------------------
331
332 virtual int asok_debug(std::string_view cmd,
333 std::string param,
334 Formatter* f,
335 std::stringstream& ss) = 0;
336 };