]> git.proxmox.com Git - ceph.git/blame - ceph/src/osd/scrubber_common.h
bump version to 19.2.0-pve1
[ceph.git] / ceph / src / osd / scrubber_common.h
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#pragma once
4
1e59de90 5#include <fmt/ranges.h>
f51cf556
TL
6#include "common/ceph_time.h"
7#include "common/fmt_common.h"
f67539c2
TL
8#include "common/scrub_types.h"
9#include "include/types.h"
f51cf556 10#include "messages/MOSDScrubReserve.h"
f67539c2
TL
11#include "os/ObjectStore.h"
12
13#include "OpRequest.h"
14
15namespace ceph {
16class Formatter;
17}
18
1e59de90 19struct PGPool;
f51cf556
TL
20using ScrubClock = ceph::coarse_real_clock;
21using ScrubTimePoint = ScrubClock::time_point;
1e59de90
TL
22
23namespace Scrub {
24 class ReplicaReservations;
f51cf556 25 struct ReplicaActive;
1e59de90
TL
26}
27
f51cf556
TL
28/// reservation-related data sent by the primary to the replicas,
29/// and used to match the responses to the requests
30struct AsyncScrubResData {
31 spg_t pgid;
32 pg_shard_t from;
33 epoch_t request_epoch;
34 MOSDScrubReserve::reservation_nonce_t nonce;
35 AsyncScrubResData(
36 spg_t pgid,
37 pg_shard_t from,
38 epoch_t request_epoch,
39 MOSDScrubReserve::reservation_nonce_t nonce)
40 : pgid{pgid}
41 , from{from}
42 , request_epoch{request_epoch}
43 , nonce{nonce}
44 {}
45 template <typename FormatContext>
46 auto fmt_print_ctx(FormatContext& ctx) const
47 {
48 return fmt::format_to(
49 ctx.out(), "pg[{}],f:{},ep:{},n:{}", pgid, from, request_epoch, nonce);
50 }
51};
52
53
54/// Facilitating scrub-related object access to private PG data
1e59de90
TL
55class ScrubberPasskey {
56private:
57 friend class Scrub::ReplicaReservations;
f51cf556 58 friend struct Scrub::ReplicaActive;
1e59de90
TL
59 friend class PrimaryLogScrub;
60 friend class PgScrubber;
61 friend class ScrubBackend;
62 ScrubberPasskey() {}
63 ScrubberPasskey(const ScrubberPasskey&) = default;
64 ScrubberPasskey& operator=(const ScrubberPasskey&) = delete;
65};
66
f51cf556
TL
67/// randomly returns true with probability equal to the passed parameter
68static inline bool random_bool_with_probability(double probability) {
69 return (ceph::util::generate_random_number<double>(0.0, 1.0) < probability);
70}
71
f67539c2
TL
72namespace Scrub {
73
74/// high/low OP priority
75enum class scrub_prio_t : bool { low_priority = false, high_priority = true };
76
20effc67
TL
77/// Identifies a specific scrub activation within an interval,
78/// see ScrubPGgIF::m_current_token
79using act_token_t = uint32_t;
80
81/// "environment" preconditions affecting which PGs are eligible for scrubbing
f51cf556
TL
82/// (note: struct size should be kept small, as it is copied around)
83struct OSDRestrictions {
84 /// high local OSD concurrency. Thus - only high priority scrubs are allowed
85 bool high_priority_only{false};
20effc67 86 bool allow_requested_repair_only{false};
20effc67 87 bool only_deadlined{false};
f51cf556
TL
88 bool load_is_low:1{true};
89 bool time_permit:1{true};
90};
91static_assert(sizeof(Scrub::OSDRestrictions) <= sizeof(uint32_t));
92
93} // namespace Scrub
94
95namespace fmt {
96template <>
97struct formatter<Scrub::OSDRestrictions> {
98 constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
99
100 template <typename FormatContext>
101 auto format(const Scrub::OSDRestrictions& conds, FormatContext& ctx)
102 {
103 return fmt::format_to(
104 ctx.out(),
105 "priority-only:{} overdue-only:{} load:{} time:{} repair-only:{}",
106 conds.high_priority_only,
107 conds.only_deadlined,
108 conds.load_is_low ? "ok" : "high",
109 conds.time_permit ? "ok" : "no",
110 conds.allow_requested_repair_only);
111 }
20effc67 112};
f51cf556
TL
113} // namespace fmt
114
115namespace Scrub {
116
117/**
118 * the result of the last attempt to schedule a scrub for a specific PG.
119 * The enum value itself is mostly used for logging purposes.
120 */
121enum class delay_cause_t {
122 none, ///< scrub attempt was successful
123 replicas, ///< failed to reserve replicas
124 flags, ///< noscrub or nodeep-scrub
125 pg_state, ///< e.g. snap-trimming
126 restricted_time, ///< time restrictions or busy CPU
127 local_resources, ///< too many scrubbing PGs
128 aborted, ///< scrub was aborted w/ unspecified reason
129 interval, ///< the interval had ended mid-scrub
130 scrub_params, ///< the specific scrub type is not allowed
131};
132} // namespace Scrub
133
134namespace fmt {
135// clang-format off
136template <>
137struct formatter<Scrub::delay_cause_t> : ::fmt::formatter<std::string_view> {
138 template <typename FormatContext>
139 auto format(Scrub::delay_cause_t cause, FormatContext& ctx)
140 {
141 using enum Scrub::delay_cause_t;
142 std::string_view desc;
143 switch (cause) {
144 case none: desc = "ok"; break;
145 case replicas: desc = "replicas"; break;
146 case flags: desc = "noscrub"; break;
147 case pg_state: desc = "pg-state"; break;
148 case restricted_time: desc = "time/load"; break;
149 case local_resources: desc = "local-cnt"; break;
150 case aborted: desc = "aborted"; break;
151 case interval: desc = "interval"; break;
152 case scrub_params: desc = "scrub-mode"; break;
153 // better to not have a default case, so that the compiler will warn
154 }
155 return ::fmt::formatter<string_view>::format(desc, ctx);
156 }
157};
158// clang-format on
159} // namespace fmt
160
161
162namespace Scrub {
20effc67 163
1e59de90
TL
164/// PG services used by the scrubber backend
165struct PgScrubBeListener {
166 virtual ~PgScrubBeListener() = default;
167
168 virtual const PGPool& get_pgpool() const = 0;
169 virtual pg_shard_t get_primary() const = 0;
170 virtual void force_object_missing(ScrubberPasskey,
171 const std::set<pg_shard_t>& peer,
172 const hobject_t& oid,
173 eversion_t version) = 0;
174 virtual const pg_info_t& get_pg_info(ScrubberPasskey) const = 0;
175
176 // query the PG backend for the on-disk size of an object
177 virtual uint64_t logical_to_ondisk_size(uint64_t logical_size) const = 0;
178
f51cf556 179 // used to verify our "cleanliness" before scrubbing
1e59de90
TL
180 virtual bool is_waiting_for_unreadable_object() const = 0;
181};
182
f67539c2
TL
183} // namespace Scrub
184
185
186/**
187 * Flags affecting the scheduling and behaviour of the *next* scrub.
188 *
189 * we hold two of these flag collections: one
190 * for the next scrub, and one frozen at initiation (i.e. in pg::queue_scrub())
191 */
192struct requested_scrub_t {
193
194 // flags to indicate explicitly requested scrubs (by admin):
195 // bool must_scrub, must_deep_scrub, must_repair, need_auto;
196
197 /**
198 * 'must_scrub' is set by an admin command (or by need_auto).
199 * Affects the priority of the scrubbing, and the sleep periods
200 * during the scrub.
201 */
202 bool must_scrub{false};
203
204 /**
205 * scrub must not be aborted.
1e59de90
TL
206 * Set for explicitly requested scrubs, and for scrubs originated by the
207 * pairing process with the 'repair' flag set (in the RequestScrub event).
f67539c2
TL
208 *
209 * Will be copied into the 'required' scrub flag upon scrub start.
210 */
211 bool req_scrub{false};
212
213 /**
214 * Set from:
215 * - scrub_requested() with need_auto param set, which only happens in
216 * - scrub_finish() - if deep_scrub_on_error is set, and we have errors
217 *
1e59de90
TL
218 * If set, will prevent the OSD from casually postponing our scrub. When
219 * scrubbing starts, will cause must_scrub, must_deep_scrub and auto_repair to
220 * be set.
f67539c2
TL
221 */
222 bool need_auto{false};
223
224 /**
1e59de90
TL
225 * Set for scrub-after-recovery just before we initiate the recovery deep
226 * scrub, or if scrub_requested() was called with either need_auto ot repair.
f67539c2
TL
227 * Affects PG_STATE_DEEP_SCRUB.
228 */
229 bool must_deep_scrub{false};
230
231 /**
232 * (An intermediary flag used by pg::sched_scrub() on the first time
233 * a planned scrub has all its resources). Determines whether the next
234 * repair/scrub will be 'deep'.
235 *
236 * Note: 'dumped' by PgScrubber::dump() and such. In reality, being a
237 * temporary that is set and reset by the same operation, will never
238 * appear externally to be set
239 */
240 bool time_for_deep{false};
241
242 bool deep_scrub_on_error{false};
243
244 /**
1e59de90 245 * If set, we should see must_deep_scrub & must_scrub, too
f67539c2
TL
246 *
247 * - 'must_repair' is checked by the OSD when scheduling the scrubs.
248 * - also checked & cleared at pg::queue_scrub()
249 */
250 bool must_repair{false};
251
252 /*
1e59de90
TL
253 * the value of auto_repair is determined in sched_scrub() (once per scrub.
254 * previous value is not remembered). Set if
f67539c2 255 * - allowed by configuration and backend, and
f51cf556 256 * - for periodic scrubs: time_for_deep was just set
f67539c2
TL
257 */
258 bool auto_repair{false};
259
260 /**
261 * indicating that we are scrubbing post repair to verify everything is fixed.
262 * Otherwise - PG_STATE_FAILED_REPAIR will be asserted.
263 */
264 bool check_repair{false};
1e59de90
TL
265
266 /**
267 * Used to indicate, both in client-facing listings and internally, that
268 * the planned scrub will be a deep one.
269 */
270 bool calculated_to_deep{false};
f67539c2
TL
271};
272
20effc67 273std::ostream& operator<<(std::ostream& out, const requested_scrub_t& sf);
f67539c2 274
1e59de90
TL
275template <>
276struct fmt::formatter<requested_scrub_t> {
277 constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
278
279 template <typename FormatContext>
280 auto format(const requested_scrub_t& rs, FormatContext& ctx)
281 {
282 return fmt::format_to(ctx.out(),
283 "(plnd:{}{}{}{}{}{}{}{}{}{})",
284 rs.must_repair ? " must_repair" : "",
285 rs.auto_repair ? " auto_repair" : "",
286 rs.check_repair ? " check_repair" : "",
287 rs.deep_scrub_on_error ? " deep_scrub_on_error" : "",
288 rs.must_deep_scrub ? " must_deep_scrub" : "",
289 rs.must_scrub ? " must_scrub" : "",
290 rs.time_for_deep ? " time_for_deep" : "",
291 rs.need_auto ? " need_auto" : "",
292 rs.req_scrub ? " req_scrub" : "",
293 rs.calculated_to_deep ? " deep" : "");
294 }
295};
296
f67539c2
TL
297/**
298 * The interface used by the PG when requesting scrub-related info or services
299 */
300struct ScrubPgIF {
301
20effc67 302 virtual ~ScrubPgIF() = default;
f67539c2 303
1e59de90
TL
304 friend std::ostream& operator<<(std::ostream& out, const ScrubPgIF& s)
305 {
306 return s.show(out);
307 }
f67539c2 308
20effc67 309 virtual std::ostream& show(std::ostream& out) const = 0;
f67539c2
TL
310
311 // --------------- triggering state-machine events:
312
313 virtual void initiate_regular_scrub(epoch_t epoch_queued) = 0;
314
315 virtual void initiate_scrub_after_repair(epoch_t epoch_queued) = 0;
316
317 virtual void send_scrub_resched(epoch_t epoch_queued) = 0;
318
319 virtual void active_pushes_notification(epoch_t epoch_queued) = 0;
320
321 virtual void update_applied_notification(epoch_t epoch_queued) = 0;
322
323 virtual void digest_update_notification(epoch_t epoch_queued) = 0;
324
325 virtual void send_scrub_unblock(epoch_t epoch_queued) = 0;
326
327 virtual void send_replica_maps_ready(epoch_t epoch_queued) = 0;
328
329 virtual void send_replica_pushes_upd(epoch_t epoch_queued) = 0;
330
1e59de90
TL
331 virtual void send_start_replica(epoch_t epoch_queued,
332 Scrub::act_token_t token) = 0;
20effc67 333
1e59de90
TL
334 virtual void send_sched_replica(epoch_t epoch_queued,
335 Scrub::act_token_t token) = 0;
20effc67 336
20effc67
TL
337 virtual void send_chunk_free(epoch_t epoch_queued) = 0;
338
339 virtual void send_chunk_busy(epoch_t epoch_queued) = 0;
340
341 virtual void send_local_map_done(epoch_t epoch_queued) = 0;
342
343 virtual void send_get_next_chunk(epoch_t epoch_queued) = 0;
f67539c2 344
20effc67
TL
345 virtual void send_scrub_is_finished(epoch_t epoch_queued) = 0;
346
f51cf556
TL
347 virtual void send_granted_by_reserver(const AsyncScrubResData& req) = 0;
348
1e59de90 349 virtual void on_applied_when_primary(const eversion_t& applied_version) = 0;
f67539c2
TL
350
351 // --------------------------------------------------
352
1e59de90
TL
353 [[nodiscard]] virtual bool are_callbacks_pending() const = 0; // currently
354 // only used
355 // for an
356 // assert
f67539c2
TL
357
358 /**
359 * the scrubber is marked 'active':
360 * - for the primary: when all replica OSDs grant us the requested resources
361 * - for replicas: upon receiving the scrub request from the primary
362 */
363 [[nodiscard]] virtual bool is_scrub_active() const = 0;
364
20effc67
TL
365 /**
366 * 'true' until after the FSM processes the 'scrub-finished' event,
367 * and scrubbing is completely cleaned-up.
368 *
369 * In other words - holds longer than is_scrub_active(), thus preventing
370 * a rescrubbing of the same PG while the previous scrub has not fully
371 * terminated.
372 */
373 [[nodiscard]] virtual bool is_queued_or_active() const = 0;
374
375 /**
376 * Manipulate the 'scrubbing request has been queued, or - we are
377 * actually scrubbing' Scrubber's flag
33c7a0ef
TL
378 *
379 * clear_queued_or_active() will also restart any blocked snaptrimming.
20effc67
TL
380 */
381 virtual void set_queued_or_active() = 0;
382 virtual void clear_queued_or_active() = 0;
383
f67539c2
TL
384 /// are we waiting for resource reservation grants form our replicas?
385 [[nodiscard]] virtual bool is_reserving() const = 0;
386
387 /// handle a message carrying a replica map
388 virtual void map_from_replica(OpRequestRef op) = 0;
389
390 virtual void replica_scrub_op(OpRequestRef op) = 0;
391
1e59de90 392 virtual void set_op_parameters(const requested_scrub_t&) = 0;
f67539c2 393
f51cf556
TL
394 /// stop any active scrubbing (on interval end) and unregister from
395 /// the OSD scrub queue
396 virtual void on_new_interval() = 0;
397
398 /// we are peered as primary, and the PG is active and clean
399 /// Scrubber's internal FSM should be ActivePrimary
400 virtual void on_primary_active_clean() = 0;
401
402 /// we are peered as a replica
403 virtual void on_replica_activate() = 0;
f67539c2
TL
404
405 virtual void handle_query_state(ceph::Formatter* f) = 0;
406
20effc67
TL
407 virtual pg_scrubbing_status_t get_schedule() const = 0;
408
f51cf556
TL
409 /// notify the scrubber about a scrub failure
410 virtual void penalize_next_scrub(Scrub::delay_cause_t cause) = 0;
411
412 // // perform 'scrub'/'deep_scrub' asok commands
413
414 /// ... by faking the "last scrub" stamps
415 virtual void on_operator_periodic_cmd(
416 ceph::Formatter* f,
417 scrub_level_t scrub_level,
418 int64_t offset) = 0;
419
420 /// ... by requesting an "operator initiated" scrub
421 virtual void on_operator_forced_scrub(
422 ceph::Formatter* f,
423 scrub_level_t scrub_level,
424 requested_scrub_t& request_flags) = 0;
425
20effc67
TL
426 virtual void dump_scrubber(ceph::Formatter* f,
427 const requested_scrub_t& request_flags) const = 0;
f67539c2
TL
428
429 /**
1e59de90
TL
430 * Return true if soid is currently being scrubbed and pending IOs should
431 * block. May have a side effect of preempting an in-progress scrub -- will
432 * return false in that case.
f67539c2
TL
433 *
434 * @param soid object to check for ongoing scrub
1e59de90
TL
435 * @return boolean whether a request on soid should block until scrub
436 * completion
f67539c2
TL
437 */
438 virtual bool write_blocked_by_scrub(const hobject_t& soid) = 0;
439
440 /// Returns whether any objects in the range [begin, end] are being scrubbed
1e59de90
TL
441 virtual bool range_intersects_scrub(const hobject_t& start,
442 const hobject_t& end) = 0;
f67539c2
TL
443
444 /// the op priority, taken from the primary's request message
445 virtual Scrub::scrub_prio_t replica_op_priority() const = 0;
446
447 /// the priority of the on-going scrub (used when requeuing events)
448 virtual unsigned int scrub_requeue_priority(
449 Scrub::scrub_prio_t with_priority) const = 0;
1e59de90
TL
450 virtual unsigned int scrub_requeue_priority(
451 Scrub::scrub_prio_t with_priority,
452 unsigned int suggested_priority) const = 0;
f67539c2
TL
453
454 virtual void add_callback(Context* context) = 0;
455
f67539c2
TL
456 /// add to scrub statistics, but only if the soid is below the scrub start
457 virtual void stats_of_handled_objects(const object_stat_sum_t& delta_stats,
458 const hobject_t& soid) = 0;
459
460 /**
f51cf556
TL
461 * clears both internal scrub state, and some PG-visible flags:
462 * - the two scrubbing PG state flags;
463 * - primary/replica scrub position (chunk boundaries);
464 * - primary/replica interaction state;
465 * - the backend state
466 * Also runs pending callbacks, and clears the active flags.
467 * Does not try to invoke FSM events.
f67539c2
TL
468 */
469 virtual void clear_pgscrub_state() = 0;
470
f67539c2
TL
471 virtual void cleanup_store(ObjectStore::Transaction* t) = 0;
472
473 virtual bool get_store_errors(const scrub_ls_arg_t& arg,
474 scrub_ls_result_t& res_inout) const = 0;
475
1e59de90
TL
476 /**
477 * force a periodic 'publish_stats_to_osd()' call, to update scrub-related
478 * counters and statistics.
479 */
480 virtual void update_scrub_stats(
481 ceph::coarse_real_clock::time_point now_is) = 0;
482
f67539c2
TL
483 // --------------- reservations -----------------------------------
484
f67539c2
TL
485 /**
486 * Reserve local scrub resources (managed by the OSD)
487 *
488 * Fails if OSD's local-scrubs budget was exhausted
489 * \returns were local resources reserved?
490 */
491 virtual bool reserve_local() = 0;
492
20effc67
TL
493 /**
494 * Recalculate the required scrub time.
495 *
496 * This function assumes that the queue registration status is up-to-date,
497 * i.e. the OSD "knows our name" if-f we are the Primary.
498 */
499 virtual void update_scrub_job(const requested_scrub_t& request_flags) = 0;
500
f51cf556
TL
501 /**
502 * route incoming replica-reservations requests/responses to the
503 * appropriate handler.
504 * As the ReplicaReservations object is to be owned by the ScrubMachine, we
505 * send all relevant messages to the ScrubMachine.
506 */
507 virtual void handle_scrub_reserve_msgs(OpRequestRef op) = 0;
20effc67 508
f51cf556
TL
509 virtual scrub_level_t scrub_requested(
510 scrub_level_t scrub_level,
511 scrub_type_t scrub_type,
512 requested_scrub_t& req_flags) = 0;
20effc67
TL
513
514 // --------------- debugging via the asok ------------------------------
515
516 virtual int asok_debug(std::string_view cmd,
517 std::string param,
518 Formatter* f,
519 std::stringstream& ss) = 0;
f67539c2 520};