ceph/src/osd/scrubber_common.h

   1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
   2 // vim: ts=8 sw=2 smarttab
   3 #pragma once
   4
   5 #include "common/scrub_types.h"
   6 #include "include/types.h"
   7 #include "os/ObjectStore.h"
   8
   9 #include "OpRequest.h"
  10
  11 namespace ceph {
  12 class Formatter;
  13 }
  14
  15 namespace Scrub {
  16
  17 /// high/low OP priority
  18 enum class scrub_prio_t : bool { low_priority = false, high_priority = true };
  19
  20 /// Identifies a specific scrub activation within an interval,
  21 /// see ScrubPGgIF::m_current_token
  22 using act_token_t = uint32_t;
  23
  24 /// "environment" preconditions affecting which PGs are eligible for scrubbing
  25 struct ScrubPreconds {
  26   bool allow_requested_repair_only{false};
  27   bool load_is_low{true};
  28   bool time_permit{true};
  29   bool only_deadlined{false};
  30 };
  31
  32 }  // namespace Scrub
  33
  34
  35 /**
  36  * Flags affecting the scheduling and behaviour of the *next* scrub.
  37  *
  38  * we hold two of these flag collections: one
  39  * for the next scrub, and one frozen at initiation (i.e. in pg::queue_scrub())
  40  */
  41 struct requested_scrub_t {
  42
  43   // flags to indicate explicitly requested scrubs (by admin):
  44   // bool must_scrub, must_deep_scrub, must_repair, need_auto;
  45
  46   /**
  47    * 'must_scrub' is set by an admin command (or by need_auto).
  48    *  Affects the priority of the scrubbing, and the sleep periods
  49    *  during the scrub.
  50    */
  51   bool must_scrub{false};
  52
  53   /**
  54    * scrub must not be aborted.
  55    * Set for explicitly requested scrubs, and for scrubs originated by the pairing
  56    * process with the 'repair' flag set (in the RequestScrub event).
  57    *
  58    * Will be copied into the 'required' scrub flag upon scrub start.
  59    */
  60   bool req_scrub{false};
  61
  62   /**
  63    * Set from:
  64    *  - scrub_requested() with need_auto param set, which only happens in
  65    *  - scrub_finish() - if deep_scrub_on_error is set, and we have errors
  66    *
  67    * If set, will prevent the OSD from casually postponing our scrub. When scrubbing
  68    * starts, will cause must_scrub, must_deep_scrub and auto_repair to be set.
  69    */
  70   bool need_auto{false};
  71
  72   /**
  73    * Set for scrub-after-recovery just before we initiate the recovery deep scrub,
  74    * or if scrub_requested() was called with either need_auto ot repair.
  75    * Affects PG_STATE_DEEP_SCRUB.
  76    */
  77   bool must_deep_scrub{false};
  78
  79   /**
  80    * (An intermediary flag used by pg::sched_scrub() on the first time
  81    * a planned scrub has all its resources). Determines whether the next
  82    * repair/scrub will be 'deep'.
  83    *
  84    * Note: 'dumped' by PgScrubber::dump() and such. In reality, being a
  85    * temporary that is set and reset by the same operation, will never
  86    * appear externally to be set
  87    */
  88   bool time_for_deep{false};
  89
  90   bool deep_scrub_on_error{false};
  91
  92   /**
  93    * If set, we should see must_deep_scrub and must_repair set, too
  94    *
  95    * - 'must_repair' is checked by the OSD when scheduling the scrubs.
  96    * - also checked & cleared at pg::queue_scrub()
  97    */
  98   bool must_repair{false};
  99
 100   /*
 101    * the value of auto_repair is determined in sched_scrub() (once per scrub. previous
 102    * value is not remembered). Set if
 103    * - allowed by configuration and backend, and
 104    * - must_scrub is not set (i.e. - this is a periodic scrub),
 105    * - time_for_deep was just set
 106    */
 107   bool auto_repair{false};
 108
 109   /**
 110    * indicating that we are scrubbing post repair to verify everything is fixed.
 111    * Otherwise - PG_STATE_FAILED_REPAIR will be asserted.
 112    */
 113   bool check_repair{false};
 114 };
 115
 116 std::ostream& operator<<(std::ostream& out, const requested_scrub_t& sf);
 117
 118 /**
 119  *  The interface used by the PG when requesting scrub-related info or services
 120  */
 121 struct ScrubPgIF {
 122
 123   virtual ~ScrubPgIF() = default;
 124
 125   friend std::ostream& operator<<(std::ostream& out, const ScrubPgIF& s) { return s.show(out); }
 126
 127   virtual std::ostream& show(std::ostream& out) const = 0;
 128
 129   // --------------- triggering state-machine events:
 130
 131   virtual void initiate_regular_scrub(epoch_t epoch_queued) = 0;
 132
 133   virtual void initiate_scrub_after_repair(epoch_t epoch_queued) = 0;
 134
 135   virtual void send_scrub_resched(epoch_t epoch_queued) = 0;
 136
 137   virtual void active_pushes_notification(epoch_t epoch_queued) = 0;
 138
 139   virtual void update_applied_notification(epoch_t epoch_queued) = 0;
 140
 141   virtual void digest_update_notification(epoch_t epoch_queued) = 0;
 142
 143   virtual void send_scrub_unblock(epoch_t epoch_queued) = 0;
 144
 145   virtual void send_replica_maps_ready(epoch_t epoch_queued) = 0;
 146
 147   virtual void send_replica_pushes_upd(epoch_t epoch_queued) = 0;
 148
 149   virtual void send_start_replica(epoch_t epoch_queued, Scrub::act_token_t token) = 0;
 150
 151   virtual void send_sched_replica(epoch_t epoch_queued, Scrub::act_token_t token) = 0;
 152
 153   virtual void send_full_reset(epoch_t epoch_queued) = 0;
 154
 155   virtual void send_chunk_free(epoch_t epoch_queued) = 0;
 156
 157   virtual void send_chunk_busy(epoch_t epoch_queued) = 0;
 158
 159   virtual void send_local_map_done(epoch_t epoch_queued) = 0;
 160
 161   virtual void send_get_next_chunk(epoch_t epoch_queued) = 0;
 162
 163   virtual void send_scrub_is_finished(epoch_t epoch_queued) = 0;
 164
 165   virtual void send_maps_compared(epoch_t epoch_queued) = 0;
 166
 167   virtual void on_applied_when_primary(const eversion_t &applied_version) = 0;
 168
 169   // --------------------------------------------------
 170
 171   [[nodiscard]] virtual bool are_callbacks_pending()
 172     const = 0;  // currently only used for an assert
 173
 174   /**
 175    * the scrubber is marked 'active':
 176    * - for the primary: when all replica OSDs grant us the requested resources
 177    * - for replicas: upon receiving the scrub request from the primary
 178    */
 179   [[nodiscard]] virtual bool is_scrub_active() const = 0;
 180
 181   /**
 182    * 'true' until after the FSM processes the 'scrub-finished' event,
 183    * and scrubbing is completely cleaned-up.
 184    *
 185    * In other words - holds longer than is_scrub_active(), thus preventing
 186    * a rescrubbing of the same PG while the previous scrub has not fully
 187    * terminated.
 188    */
 189   [[nodiscard]] virtual bool is_queued_or_active() const = 0;
 190
 191   /**
 192    * Manipulate the 'scrubbing request has been queued, or - we are
 193    * actually scrubbing' Scrubber's flag
 194    *
 195    * clear_queued_or_active() will also restart any blocked snaptrimming.
 196    */
 197   virtual void set_queued_or_active() = 0;
 198   virtual void clear_queued_or_active() = 0;
 199
 200   /// are we waiting for resource reservation grants form our replicas?
 201   [[nodiscard]] virtual bool is_reserving() const = 0;
 202
 203   /// handle a message carrying a replica map
 204   virtual void map_from_replica(OpRequestRef op) = 0;
 205
 206   virtual void replica_scrub_op(OpRequestRef op) = 0;
 207
 208   virtual void set_op_parameters(requested_scrub_t&) = 0;
 209
 210   virtual void scrub_clear_state() = 0;
 211
 212   virtual void handle_query_state(ceph::Formatter* f) = 0;
 213
 214   virtual pg_scrubbing_status_t get_schedule() const = 0;
 215
 216   virtual void dump_scrubber(ceph::Formatter* f,
 217                              const requested_scrub_t& request_flags) const = 0;
 218
 219   /**
 220    * Return true if soid is currently being scrubbed and pending IOs should block.
 221    * May have a side effect of preempting an in-progress scrub -- will return false
 222    * in that case.
 223    *
 224    * @param soid object to check for ongoing scrub
 225    * @return boolean whether a request on soid should block until scrub completion
 226    */
 227   virtual bool write_blocked_by_scrub(const hobject_t& soid) = 0;
 228
 229   /// Returns whether any objects in the range [begin, end] are being scrubbed
 230   virtual bool range_intersects_scrub(const hobject_t& start, const hobject_t& end) = 0;
 231
 232   /// the op priority, taken from the primary's request message
 233   virtual Scrub::scrub_prio_t replica_op_priority() const = 0;
 234
 235   /// the priority of the on-going scrub (used when requeuing events)
 236   virtual unsigned int scrub_requeue_priority(
 237     Scrub::scrub_prio_t with_priority) const = 0;
 238   virtual unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority,
 239                                               unsigned int suggested_priority) const = 0;
 240
 241   virtual void add_callback(Context* context) = 0;
 242
 243   /// add to scrub statistics, but only if the soid is below the scrub start
 244   virtual void stats_of_handled_objects(const object_stat_sum_t& delta_stats,
 245                                         const hobject_t& soid) = 0;
 246
 247   /**
 248    * the version of 'scrub_clear_state()' that does not try to invoke FSM services
 249    * (thus can be called from FSM reactions)
 250    */
 251   virtual void clear_pgscrub_state() = 0;
 252
 253   /**
 254    *  triggers the 'RemotesReserved' (all replicas granted scrub resources)
 255    *  state-machine event
 256    */
 257   virtual void send_remotes_reserved(epoch_t epoch_queued) = 0;
 258
 259   /**
 260    * triggers the 'ReservationFailure' (at least one replica denied us the requested
 261    * resources) state-machine event
 262    */
 263   virtual void send_reservation_failure(epoch_t epoch_queued) = 0;
 264
 265   virtual void cleanup_store(ObjectStore::Transaction* t) = 0;
 266
 267   virtual bool get_store_errors(const scrub_ls_arg_t& arg,
 268                                 scrub_ls_result_t& res_inout) const = 0;
 269
 270   // --------------- reservations -----------------------------------
 271
 272   /**
 273    *  message all replicas with a request to "unreserve" scrub
 274    */
 275   virtual void unreserve_replicas() = 0;
 276
 277   /**
 278    *  "forget" all replica reservations. No messages are sent to the
 279    *  previously-reserved.
 280    *
 281    *  Used upon interval change. The replicas' state is guaranteed to
 282    *  be reset separately by the interval-change event.
 283    */
 284   virtual void discard_replica_reservations() = 0;
 285
 286   /**
 287    * clear both local and OSD-managed resource reservation flags
 288    */
 289   virtual void clear_scrub_reservations() = 0;
 290
 291   /**
 292    * Reserve local scrub resources (managed by the OSD)
 293    *
 294    * Fails if OSD's local-scrubs budget was exhausted
 295    * \returns were local resources reserved?
 296    */
 297   virtual bool reserve_local() = 0;
 298
 299   /**
 300    * Register/de-register with the OSD scrub queue
 301    *
 302    * Following our status as Primary or replica.
 303    */
 304   virtual void on_primary_change(const requested_scrub_t& request_flags) = 0;
 305
 306   /**
 307    * Recalculate the required scrub time.
 308    *
 309    * This function assumes that the queue registration status is up-to-date,
 310    * i.e. the OSD "knows our name" if-f we are the Primary.
 311    */
 312   virtual void update_scrub_job(const requested_scrub_t& request_flags) = 0;
 313
 314   virtual void on_maybe_registration_change(const requested_scrub_t& request_flags) = 0;
 315
 316   // on the replica:
 317   virtual void handle_scrub_reserve_request(OpRequestRef op) = 0;
 318   virtual void handle_scrub_reserve_release(OpRequestRef op) = 0;
 319
 320   // and on the primary:
 321   virtual void handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from) = 0;
 322   virtual void handle_scrub_reserve_reject(OpRequestRef op, pg_shard_t from) = 0;
 323
 324   virtual void rm_from_osd_scrubbing() = 0;
 325
 326   virtual void scrub_requested(scrub_level_t scrub_level,
 327                                scrub_type_t scrub_type,
 328                                requested_scrub_t& req_flags) = 0;
 329
 330   // --------------- debugging via the asok ------------------------------
 331
 332   virtual int asok_debug(std::string_view cmd,
 333                          std::string param,
 334                          Formatter* f,
 335                          std::stringstream& ss) = 0;
 336 };