ceph/src/osd/scrubber_common.h

   1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
   2 // vim: ts=8 sw=2 smarttab
   3 #pragma once
   4
   5 #include <fmt/ranges.h>
   6
   7 #include "common/scrub_types.h"
   8 #include "include/types.h"
   9 #include "os/ObjectStore.h"
  10
  11 #include "OpRequest.h"
  12
  13 namespace ceph {
  14 class Formatter;
  15 }
  16
  17 struct PGPool;
  18
  19 namespace Scrub {
  20   class ReplicaReservations;
  21 }
  22
  23 /// Facilitating scrub-realated object access to private PG data
  24 class ScrubberPasskey {
  25 private:
  26   friend class Scrub::ReplicaReservations;
  27   friend class PrimaryLogScrub;
  28   friend class PgScrubber;
  29   friend class ScrubBackend;
  30   ScrubberPasskey() {}
  31   ScrubberPasskey(const ScrubberPasskey&) = default;
  32   ScrubberPasskey& operator=(const ScrubberPasskey&) = delete;
  33 };
  34
  35 namespace Scrub {
  36
  37 /// high/low OP priority
  38 enum class scrub_prio_t : bool { low_priority = false, high_priority = true };
  39
  40 /// Identifies a specific scrub activation within an interval,
  41 /// see ScrubPGgIF::m_current_token
  42 using act_token_t = uint32_t;
  43
  44 /// "environment" preconditions affecting which PGs are eligible for scrubbing
  45 struct ScrubPreconds {
  46   bool allow_requested_repair_only{false};
  47   bool load_is_low{true};
  48   bool time_permit{true};
  49   bool only_deadlined{false};
  50 };
  51
  52 /// PG services used by the scrubber backend
  53 struct PgScrubBeListener {
  54   virtual ~PgScrubBeListener() = default;
  55
  56   virtual const PGPool& get_pgpool() const = 0;
  57   virtual pg_shard_t get_primary() const = 0;
  58   virtual void force_object_missing(ScrubberPasskey,
  59                                     const std::set<pg_shard_t>& peer,
  60                                     const hobject_t& oid,
  61                                     eversion_t version) = 0;
  62   virtual const pg_info_t& get_pg_info(ScrubberPasskey) const = 0;
  63
  64   // query the PG backend for the on-disk size of an object
  65   virtual uint64_t logical_to_ondisk_size(uint64_t logical_size) const = 0;
  66
  67   // used to verify our "cleaness" before scrubbing
  68   virtual bool is_waiting_for_unreadable_object() const = 0;
  69 };
  70
  71 }  // namespace Scrub
  72
  73
  74 /**
  75  * Flags affecting the scheduling and behaviour of the *next* scrub.
  76  *
  77  * we hold two of these flag collections: one
  78  * for the next scrub, and one frozen at initiation (i.e. in pg::queue_scrub())
  79  */
  80 struct requested_scrub_t {
  81
  82   // flags to indicate explicitly requested scrubs (by admin):
  83   // bool must_scrub, must_deep_scrub, must_repair, need_auto;
  84
  85   /**
  86    * 'must_scrub' is set by an admin command (or by need_auto).
  87    *  Affects the priority of the scrubbing, and the sleep periods
  88    *  during the scrub.
  89    */
  90   bool must_scrub{false};
  91
  92   /**
  93    * scrub must not be aborted.
  94    * Set for explicitly requested scrubs, and for scrubs originated by the
  95    * pairing process with the 'repair' flag set (in the RequestScrub event).
  96    *
  97    * Will be copied into the 'required' scrub flag upon scrub start.
  98    */
  99   bool req_scrub{false};
 100
 101   /**
 102    * Set from:
 103    *  - scrub_requested() with need_auto param set, which only happens in
 104    *  - scrub_finish() - if deep_scrub_on_error is set, and we have errors
 105    *
 106    * If set, will prevent the OSD from casually postponing our scrub. When
 107    * scrubbing starts, will cause must_scrub, must_deep_scrub and auto_repair to
 108    * be set.
 109    */
 110   bool need_auto{false};
 111
 112   /**
 113    * Set for scrub-after-recovery just before we initiate the recovery deep
 114    * scrub, or if scrub_requested() was called with either need_auto ot repair.
 115    * Affects PG_STATE_DEEP_SCRUB.
 116    */
 117   bool must_deep_scrub{false};
 118
 119   /**
 120    * (An intermediary flag used by pg::sched_scrub() on the first time
 121    * a planned scrub has all its resources). Determines whether the next
 122    * repair/scrub will be 'deep'.
 123    *
 124    * Note: 'dumped' by PgScrubber::dump() and such. In reality, being a
 125    * temporary that is set and reset by the same operation, will never
 126    * appear externally to be set
 127    */
 128   bool time_for_deep{false};
 129
 130   bool deep_scrub_on_error{false};
 131
 132   /**
 133    * If set, we should see must_deep_scrub & must_scrub, too
 134    *
 135    * - 'must_repair' is checked by the OSD when scheduling the scrubs.
 136    * - also checked & cleared at pg::queue_scrub()
 137    */
 138   bool must_repair{false};
 139
 140   /*
 141    * the value of auto_repair is determined in sched_scrub() (once per scrub.
 142    * previous value is not remembered). Set if
 143    * - allowed by configuration and backend, and
 144    * - must_scrub is not set (i.e. - this is a periodic scrub),
 145    * - time_for_deep was just set
 146    */
 147   bool auto_repair{false};
 148
 149   /**
 150    * indicating that we are scrubbing post repair to verify everything is fixed.
 151    * Otherwise - PG_STATE_FAILED_REPAIR will be asserted.
 152    */
 153   bool check_repair{false};
 154
 155   /**
 156    * Used to indicate, both in client-facing listings and internally, that
 157    * the planned scrub will be a deep one.
 158    */
 159   bool calculated_to_deep{false};
 160 };
 161
 162 std::ostream& operator<<(std::ostream& out, const requested_scrub_t& sf);
 163
 164 template <>
 165 struct fmt::formatter<requested_scrub_t> {
 166   constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
 167
 168   template <typename FormatContext>
 169   auto format(const requested_scrub_t& rs, FormatContext& ctx)
 170   {
 171     return fmt::format_to(ctx.out(),
 172                           "(plnd:{}{}{}{}{}{}{}{}{}{})",
 173                           rs.must_repair ? " must_repair" : "",
 174                           rs.auto_repair ? " auto_repair" : "",
 175                           rs.check_repair ? " check_repair" : "",
 176                           rs.deep_scrub_on_error ? " deep_scrub_on_error" : "",
 177                           rs.must_deep_scrub ? " must_deep_scrub" : "",
 178                           rs.must_scrub ? " must_scrub" : "",
 179                           rs.time_for_deep ? " time_for_deep" : "",
 180                           rs.need_auto ? " need_auto" : "",
 181                           rs.req_scrub ? " req_scrub" : "",
 182                           rs.calculated_to_deep ? " deep" : "");
 183   }
 184 };
 185
 186 /**
 187  *  The interface used by the PG when requesting scrub-related info or services
 188  */
 189 struct ScrubPgIF {
 190
 191   virtual ~ScrubPgIF() = default;
 192
 193   friend std::ostream& operator<<(std::ostream& out, const ScrubPgIF& s)
 194   {
 195     return s.show(out);
 196   }
 197
 198   virtual std::ostream& show(std::ostream& out) const = 0;
 199
 200   // --------------- triggering state-machine events:
 201
 202   virtual void initiate_regular_scrub(epoch_t epoch_queued) = 0;
 203
 204   virtual void initiate_scrub_after_repair(epoch_t epoch_queued) = 0;
 205
 206   virtual void send_scrub_resched(epoch_t epoch_queued) = 0;
 207
 208   virtual void active_pushes_notification(epoch_t epoch_queued) = 0;
 209
 210   virtual void update_applied_notification(epoch_t epoch_queued) = 0;
 211
 212   virtual void digest_update_notification(epoch_t epoch_queued) = 0;
 213
 214   virtual void send_scrub_unblock(epoch_t epoch_queued) = 0;
 215
 216   virtual void send_replica_maps_ready(epoch_t epoch_queued) = 0;
 217
 218   virtual void send_replica_pushes_upd(epoch_t epoch_queued) = 0;
 219
 220   virtual void send_start_replica(epoch_t epoch_queued,
 221                                   Scrub::act_token_t token) = 0;
 222
 223   virtual void send_sched_replica(epoch_t epoch_queued,
 224                                   Scrub::act_token_t token) = 0;
 225
 226   virtual void send_full_reset(epoch_t epoch_queued) = 0;
 227
 228   virtual void send_chunk_free(epoch_t epoch_queued) = 0;
 229
 230   virtual void send_chunk_busy(epoch_t epoch_queued) = 0;
 231
 232   virtual void send_local_map_done(epoch_t epoch_queued) = 0;
 233
 234   virtual void send_get_next_chunk(epoch_t epoch_queued) = 0;
 235
 236   virtual void send_scrub_is_finished(epoch_t epoch_queued) = 0;
 237
 238   virtual void on_applied_when_primary(const eversion_t& applied_version) = 0;
 239
 240   // --------------------------------------------------
 241
 242   [[nodiscard]] virtual bool are_callbacks_pending() const = 0;  // currently
 243                                                                  // only used
 244                                                                  // for an
 245                                                                  // assert
 246
 247   /**
 248    * the scrubber is marked 'active':
 249    * - for the primary: when all replica OSDs grant us the requested resources
 250    * - for replicas: upon receiving the scrub request from the primary
 251    */
 252   [[nodiscard]] virtual bool is_scrub_active() const = 0;
 253
 254   /**
 255    * 'true' until after the FSM processes the 'scrub-finished' event,
 256    * and scrubbing is completely cleaned-up.
 257    *
 258    * In other words - holds longer than is_scrub_active(), thus preventing
 259    * a rescrubbing of the same PG while the previous scrub has not fully
 260    * terminated.
 261    */
 262   [[nodiscard]] virtual bool is_queued_or_active() const = 0;
 263
 264   /**
 265    * Manipulate the 'scrubbing request has been queued, or - we are
 266    * actually scrubbing' Scrubber's flag
 267    *
 268    * clear_queued_or_active() will also restart any blocked snaptrimming.
 269    */
 270   virtual void set_queued_or_active() = 0;
 271   virtual void clear_queued_or_active() = 0;
 272
 273   /// are we waiting for resource reservation grants form our replicas?
 274   [[nodiscard]] virtual bool is_reserving() const = 0;
 275
 276   /// handle a message carrying a replica map
 277   virtual void map_from_replica(OpRequestRef op) = 0;
 278
 279   virtual void replica_scrub_op(OpRequestRef op) = 0;
 280
 281   virtual void set_op_parameters(const requested_scrub_t&) = 0;
 282
 283   virtual void scrub_clear_state() = 0;
 284
 285   virtual void handle_query_state(ceph::Formatter* f) = 0;
 286
 287   virtual pg_scrubbing_status_t get_schedule() const = 0;
 288
 289   virtual void dump_scrubber(ceph::Formatter* f,
 290                              const requested_scrub_t& request_flags) const = 0;
 291
 292   /**
 293    * Return true if soid is currently being scrubbed and pending IOs should
 294    * block. May have a side effect of preempting an in-progress scrub -- will
 295    * return false in that case.
 296    *
 297    * @param soid object to check for ongoing scrub
 298    * @return boolean whether a request on soid should block until scrub
 299    * completion
 300    */
 301   virtual bool write_blocked_by_scrub(const hobject_t& soid) = 0;
 302
 303   /// Returns whether any objects in the range [begin, end] are being scrubbed
 304   virtual bool range_intersects_scrub(const hobject_t& start,
 305                                       const hobject_t& end) = 0;
 306
 307   /// the op priority, taken from the primary's request message
 308   virtual Scrub::scrub_prio_t replica_op_priority() const = 0;
 309
 310   /// the priority of the on-going scrub (used when requeuing events)
 311   virtual unsigned int scrub_requeue_priority(
 312     Scrub::scrub_prio_t with_priority) const = 0;
 313   virtual unsigned int scrub_requeue_priority(
 314     Scrub::scrub_prio_t with_priority,
 315     unsigned int suggested_priority) const = 0;
 316
 317   virtual void add_callback(Context* context) = 0;
 318
 319   /// add to scrub statistics, but only if the soid is below the scrub start
 320   virtual void stats_of_handled_objects(const object_stat_sum_t& delta_stats,
 321                                         const hobject_t& soid) = 0;
 322
 323   /**
 324    * the version of 'scrub_clear_state()' that does not try to invoke FSM
 325    * services (thus can be called from FSM reactions)
 326    */
 327   virtual void clear_pgscrub_state() = 0;
 328
 329   /**
 330    *  triggers the 'RemotesReserved' (all replicas granted scrub resources)
 331    *  state-machine event
 332    */
 333   virtual void send_remotes_reserved(epoch_t epoch_queued) = 0;
 334
 335   /**
 336    * triggers the 'ReservationFailure' (at least one replica denied us the
 337    * requested resources) state-machine event
 338    */
 339   virtual void send_reservation_failure(epoch_t epoch_queued) = 0;
 340
 341   virtual void cleanup_store(ObjectStore::Transaction* t) = 0;
 342
 343   virtual bool get_store_errors(const scrub_ls_arg_t& arg,
 344                                 scrub_ls_result_t& res_inout) const = 0;
 345
 346   /**
 347    * force a periodic 'publish_stats_to_osd()' call, to update scrub-related
 348    * counters and statistics.
 349    */
 350   virtual void update_scrub_stats(
 351     ceph::coarse_real_clock::time_point now_is) = 0;
 352
 353   // --------------- reservations -----------------------------------
 354
 355   /**
 356    *  message all replicas with a request to "unreserve" scrub
 357    */
 358   virtual void unreserve_replicas() = 0;
 359
 360   /**
 361    *  "forget" all replica reservations. No messages are sent to the
 362    *  previously-reserved.
 363    *
 364    *  Used upon interval change. The replicas' state is guaranteed to
 365    *  be reset separately by the interval-change event.
 366    */
 367   virtual void discard_replica_reservations() = 0;
 368
 369   /**
 370    * clear both local and OSD-managed resource reservation flags
 371    */
 372   virtual void clear_scrub_reservations() = 0;
 373
 374   /**
 375    * Reserve local scrub resources (managed by the OSD)
 376    *
 377    * Fails if OSD's local-scrubs budget was exhausted
 378    * \returns were local resources reserved?
 379    */
 380   virtual bool reserve_local() = 0;
 381
 382   /**
 383    * Register/de-register with the OSD scrub queue
 384    *
 385    * Following our status as Primary or replica.
 386    */
 387   virtual void on_primary_change(
 388     std::string_view caller,
 389     const requested_scrub_t& request_flags) = 0;
 390
 391   /**
 392    * Recalculate the required scrub time.
 393    *
 394    * This function assumes that the queue registration status is up-to-date,
 395    * i.e. the OSD "knows our name" if-f we are the Primary.
 396    */
 397   virtual void update_scrub_job(const requested_scrub_t& request_flags) = 0;
 398
 399   // on the replica:
 400   virtual void handle_scrub_reserve_request(OpRequestRef op) = 0;
 401   virtual void handle_scrub_reserve_release(OpRequestRef op) = 0;
 402
 403   // and on the primary:
 404   virtual void handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from) = 0;
 405   virtual void handle_scrub_reserve_reject(OpRequestRef op,
 406                                            pg_shard_t from) = 0;
 407
 408   virtual void rm_from_osd_scrubbing() = 0;
 409
 410   virtual void scrub_requested(scrub_level_t scrub_level,
 411                                scrub_type_t scrub_type,
 412                                requested_scrub_t& req_flags) = 0;
 413
 414   // --------------- debugging via the asok ------------------------------
 415
 416   virtual int asok_debug(std::string_view cmd,
 417                          std::string param,
 418                          Formatter* f,
 419                          std::stringstream& ss) = 0;
 420 };