]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/crimson/osd/pg.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / crimson / osd / pg.h
index 34676ee7a1098877ed558cb95406a9707d65dbf1..776cdd635565f28cb16e417b32c471d4cd97cb77 100644 (file)
@@ -1,16 +1,14 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 smarttab expandtab
 
 #pragma once
 
 #include <memory>
 #include <optional>
-#include <boost/intrusive_ptr.hpp>
 #include <boost/smart_ptr/intrusive_ref_counter.hpp>
 #include <boost/smart_ptr/local_shared_ptr.hpp>
 #include <seastar/core/future.hh>
 #include <seastar/core/shared_future.hh>
-#include <seastar/core/sleep.hh>
 
 #include "common/dout.h"
 #include "crimson/net/Fwd.h"
 #include "crimson/osd/object_context.h"
 #include "osd/PeeringState.h"
 
+#include "crimson/common/interruptible_future.h"
 #include "crimson/common/type_helpers.h"
 #include "crimson/os/futurized_collection.h"
 #include "crimson/osd/backfill_state.h"
+#include "crimson/osd/pg_interval_interrupt_condition.h"
+#include "crimson/osd/ops_executer.h"
 #include "crimson/osd/osd_operations/client_request.h"
 #include "crimson/osd/osd_operations/peering_event.h"
 #include "crimson/osd/osd_operations/replicated_request.h"
@@ -79,6 +80,11 @@ class PG : public boost::intrusive_ref_counter<
   seastar::timer<seastar::lowres_clock> renew_lease_timer;
 
 public:
+  template <typename T = void>
+  using interruptible_future =
+    ::crimson::interruptible::interruptible_future<
+      ::crimson::osd::IOInterruptCondition, T>;
+
   PG(spg_t pgid,
      pg_shard_t pg_shard,
      crimson::os::CollectionRef coll_ref,
@@ -151,6 +157,14 @@ public:
     // Not needed yet -- mainly for scrub scheduling
   }
 
+  /// Notify PG that Primary/Replica status has changed (to update scrub registration)
+  void on_primary_status_change(bool was_primary, bool now_primary) final {
+  }
+
+  /// Need to reschedule next scrub. Assuming no change in role
+  void reschedule_scrub() final {
+  }
+
   void scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type) final;
 
   uint64_t get_snap_trimq_size() const final {
@@ -158,9 +172,9 @@ public:
   }
 
   void send_cluster_message(
-    int osd, MessageRef m,
+    int osd, MessageURef m,
     epoch_t epoch, bool share_map_update=false) final {
-    (void)shard_services.send_to_osd(osd, m, epoch);
+    (void)shard_services.send_to_osd(osd, std::move(m), epoch);
   }
 
   void send_pg_created(pg_t pgid) final {
@@ -259,10 +273,10 @@ public:
        }));
   }
 
-  void update_heartbeat_peers(set<int> peers) final {
+  void update_heartbeat_peers(std::set<int> peers) final {
     // Not needed yet
   }
-  void set_probe_targets(const set<pg_shard_t> &probe_set) final {
+  void set_probe_targets(const std::set<pg_shard_t> &probe_set) final {
     // Not needed yet
   }
   void clear_probe_targets() final {
@@ -274,18 +288,6 @@ public:
   void clear_want_pg_temp() final {
     shard_services.remove_want_pg_temp(pgid.pgid);
   }
-  void publish_stats_to_osd() final {
-    if (!is_primary())
-      return;
-
-    (void) peering_state.prepare_stats_for_publish(
-      false,
-      pg_stat_t(),
-      object_stat_collection_t());
-  }
-  void clear_publish_stats() final {
-    // Not needed yet
-  }
   void check_recovery_sources(const OSDMapRef& newmap) final {
     // Not needed yet
   }
@@ -448,13 +450,8 @@ public:
   bool is_backfilling() const final {
     return peering_state.is_backfilling();
   }
-  pg_stat_t get_stats() {
-    auto stats = peering_state.prepare_stats_for_publish(
-      false,
-      pg_stat_t(),
-      object_stat_collection_t());
-    ceph_assert(stats);
-    return *stats;
+  uint64_t get_last_user_version() const {
+    return get_info().last_user_version;
   }
   bool get_need_up_thru() const {
     return peering_state.get_need_up_thru();
@@ -479,7 +476,6 @@ public:
     int acting_primary,
     const pg_history_t& history,
     const PastIntervals& pim,
-    bool backfill,
     ceph::os::Transaction &t);
 
   seastar::future<> read_state(crimson::os::FuturizedStore* store);
@@ -491,7 +487,7 @@ public:
   void handle_activate_map(PeeringCtx &rctx);
   void handle_initialize(PeeringCtx &rctx);
 
-  static hobject_t get_oid(const MOSDOp &m);
+  static hobject_t get_oid(const hobject_t& hobj);
   static RWState::State get_lock_type(const OpInfo &op_info);
   static std::optional<hobject_t> resolve_oid(
     const SnapSet &snapset,
@@ -499,27 +495,49 @@ public:
 
   using load_obc_ertr = crimson::errorator<
     crimson::ct_error::object_corrupted>;
-
-  load_obc_ertr::future<crimson::osd::ObjectContextRef>
+  using load_obc_iertr =
+    ::crimson::interruptible::interruptible_errorator<
+      ::crimson::osd::IOInterruptCondition,
+      load_obc_ertr>;
+  using interruptor = ::crimson::interruptible::interruptor<
+    ::crimson::osd::IOInterruptCondition>;
+  load_obc_iertr::future<
+    std::pair<crimson::osd::ObjectContextRef, bool>>
+  get_or_load_clone_obc(
+    hobject_t oid, crimson::osd::ObjectContextRef head_obc);
+
+  load_obc_iertr::future<
+    std::pair<crimson::osd::ObjectContextRef, bool>>
+  get_or_load_head_obc(hobject_t oid);
+
+  load_obc_iertr::future<crimson::osd::ObjectContextRef>
   load_head_obc(ObjectContextRef obc);
 
-  load_obc_ertr::future<>
+  load_obc_iertr::future<>
   reload_obc(crimson::osd::ObjectContext& obc) const;
 
 public:
   using with_obc_func_t =
-    std::function<load_obc_ertr::future<> (ObjectContextRef)>;
+    std::function<load_obc_iertr::future<> (ObjectContextRef)>;
+
+  using obc_accessing_list_t = boost::intrusive::list<
+    ObjectContext,
+    ObjectContext::obc_accessing_option_t>;
+  obc_accessing_list_t obc_set_accessing;
 
   template<RWState::State State>
-  load_obc_ertr::future<> with_head_obc(hobject_t oid, with_obc_func_t&& func);
+  load_obc_iertr::future<> with_head_obc(hobject_t oid, with_obc_func_t&& func);
 
-  load_obc_ertr::future<> with_locked_obc(
-    Ref<MOSDOp> &m,
+  template<RWState::State State>
+  interruptible_future<> with_locked_obc(
+    ObjectContextRef obc,
+    with_obc_func_t&& f);
+  load_obc_iertr::future<> with_locked_obc(
+    const hobject_t &hobj,
     const OpInfo &op_info,
-    Operation *op,
     with_obc_func_t&& f);
 
-  seastar::future<> handle_rep_op(Ref<MOSDRepOp> m);
+  interruptible_future<> handle_rep_op(Ref<MOSDRepOp> m);
   void handle_rep_op_reply(crimson::net::ConnectionRef conn,
                           const MOSDRepOpReply& m);
 
@@ -528,35 +546,73 @@ public:
 
 private:
   template<RWState::State State>
-  load_obc_ertr::future<> with_clone_obc(hobject_t oid, with_obc_func_t&& func);
+  load_obc_iertr::future<> with_head_obc(
+    ObjectContextRef obc,
+    bool existed,
+    with_obc_func_t&& func);
+  template<RWState::State State>
+  interruptible_future<> with_existing_head_obc(
+    ObjectContextRef head,
+    with_obc_func_t&& func);
+
+  template<RWState::State State>
+  load_obc_iertr::future<> with_clone_obc(hobject_t oid, with_obc_func_t&& func);
+  template<RWState::State State>
+  interruptible_future<> with_existing_clone_obc(
+    ObjectContextRef clone, with_obc_func_t&& func);
 
-  load_obc_ertr::future<ObjectContextRef> get_locked_obc(
+  load_obc_iertr::future<ObjectContextRef> get_locked_obc(
     Operation *op,
     const hobject_t &oid,
     RWState::State type);
 
-  void do_peering_event(
-    const boost::statechart::event_base &evt,
-    PeeringCtx &rctx);
-  osd_op_params_t&& fill_op_params_bump_pg_version(
-    osd_op_params_t&& osd_op_p,
-    Ref<MOSDOp> m,
+  void fill_op_params_bump_pg_version(
+    osd_op_params_t& osd_op_p,
     const bool user_modify);
-  seastar::future<Ref<MOSDOpReply>> handle_failed_op(
-    const std::error_code& e,
-    ObjectContextRef obc,
-    const OpsExecuter& ox,
-    const MOSDOp& m) const;
-  seastar::future<Ref<MOSDOpReply>> do_osd_ops(
+  using do_osd_ops_ertr = crimson::errorator<
+   crimson::ct_error::eagain>;
+  using do_osd_ops_iertr =
+    ::crimson::interruptible::interruptible_errorator<
+      ::crimson::osd::IOInterruptCondition,
+      ::crimson::errorator<crimson::ct_error::eagain>>;
+  template <typename Ret = void>
+  using pg_rep_op_fut_t =
+    std::tuple<interruptible_future<>,
+               do_osd_ops_iertr::future<Ret>>;
+  do_osd_ops_iertr::future<pg_rep_op_fut_t<MURef<MOSDOpReply>>> do_osd_ops(
     Ref<MOSDOp> m,
     ObjectContextRef obc,
     const OpInfo &op_info);
-  seastar::future<Ref<MOSDOpReply>> do_pg_ops(Ref<MOSDOp> m);
-  seastar::future<> submit_transaction(const OpInfo& op_info,
-                                      const std::vector<OSDOp>& ops,
-                                      ObjectContextRef&& obc,
-                                      ceph::os::Transaction&& txn,
-                                      const osd_op_params_t& oop);
+  using do_osd_ops_success_func_t =
+    std::function<do_osd_ops_iertr::future<>()>;
+  using do_osd_ops_failure_func_t =
+    std::function<do_osd_ops_iertr::future<>(const std::error_code&)>;
+  struct do_osd_ops_params_t;
+  do_osd_ops_iertr::future<pg_rep_op_fut_t<>> do_osd_ops(
+    ObjectContextRef obc,
+    std::vector<OSDOp>& ops,
+    const OpInfo &op_info,
+    const do_osd_ops_params_t& params,
+    do_osd_ops_success_func_t success_func,
+    do_osd_ops_failure_func_t failure_func);
+  template <class Ret, class SuccessFunc, class FailureFunc>
+  do_osd_ops_iertr::future<pg_rep_op_fut_t<Ret>> do_osd_ops_execute(
+    seastar::lw_shared_ptr<OpsExecuter> ox,
+    std::vector<OSDOp>& ops,
+    const OpInfo &op_info,
+    SuccessFunc&& success_func,
+    FailureFunc&& failure_func);
+  interruptible_future<MURef<MOSDOpReply>> do_pg_ops(Ref<MOSDOp> m);
+  std::tuple<interruptible_future<>, interruptible_future<>>
+  submit_transaction(
+    const OpInfo& op_info,
+    const std::vector<OSDOp>& ops,
+    ObjectContextRef&& obc,
+    ceph::os::Transaction&& txn,
+    osd_op_params_t&& oop);
+  interruptible_future<> repair_object(
+    const hobject_t& oid,
+    eversion_t& v);
 
 private:
   OSDMapGate osdmap_gate;
@@ -574,7 +630,6 @@ public:
     return shard_services;
   }
   seastar::future<> stop();
-
 private:
   std::unique_ptr<PGBackend> backend;
   std::unique_ptr<RecoveryBackend> recovery_backend;
@@ -582,6 +637,15 @@ private:
 
   PeeringState peering_state;
   eversion_t projected_last_update;
+
+public:
+  // PeeringListener
+  void publish_stats_to_osd() final;
+  void clear_publish_stats() final;
+  pg_stat_t get_stats() const;
+private:
+  std::optional<pg_stat_t> pg_stats;
+
 public:
   RecoveryBackend* get_recovery_backend() final {
     return recovery_backend.get();
@@ -602,7 +666,7 @@ public:
   epoch_t get_last_peering_reset() const final {
     return peering_state.get_last_peering_reset();
   }
-  const set<pg_shard_t> &get_acting_recovery_backfill() const {
+  const std::set<pg_shard_t> &get_acting_recovery_backfill() const {
     return peering_state.get_acting_recovery_backfill();
   }
   bool is_backfill_target(pg_shard_t osd) const {
@@ -614,13 +678,16 @@ public:
   uint64_t min_peer_features() const {
     return peering_state.get_min_peer_features();
   }
-  const map<hobject_t, set<pg_shard_t>>&
+  const std::map<hobject_t, std::set<pg_shard_t>>&
   get_missing_loc_shards() const {
     return peering_state.get_missing_loc().get_missing_locs();
   }
-  const map<pg_shard_t, pg_missing_t> &get_shard_missing() const {
+  const std::map<pg_shard_t, pg_missing_t> &get_shard_missing() const {
     return peering_state.get_peer_missing();
   }
+  epoch_t get_interval_start_epoch() const {
+    return get_info().history.same_interval_since;
+  }
   const pg_missing_const_i* get_shard_missing(pg_shard_t shard) const {
     if (shard == pg_whoami)
       return &get_local_missing();
@@ -632,6 +699,7 @@ public:
        return &it->second;
     }
   }
+  interruptible_future<std::tuple<bool, int>> already_complete(const osd_reqid_t& reqid);
   int get_recovery_op_priority() const {
     int64_t pri = 0;
     get_pool().info.opts.get(pool_opts_t::RECOVERY_OP_PRIORITY, &pri);
@@ -642,6 +710,13 @@ public:
     return seastar::now();
   }
 
+  bool old_peering_msg(epoch_t reply_epoch, epoch_t query_epoch) const;
+
+  template <typename MsgType>
+  bool can_discard_replica_op(const MsgType& m) const {
+    return can_discard_replica_op(m, m.map_epoch);
+  }
+
 private:
   // instead of seastar::gate, we use a boolean flag to indicate
   // whether the system is shutting down, as we don't need to track
@@ -668,18 +743,20 @@ private:
 
   friend std::ostream& operator<<(std::ostream&, const PG& pg);
   friend class ClientRequest;
+  friend struct CommonClientRequest;
   friend class PGAdvanceMap;
   friend class PeeringEvent;
   friend class RepRequest;
   friend class BackfillRecovery;
   friend struct PGFacade;
+  friend class InternalClientRequest;
+  friend class WatchTimeoutRequest;
 private:
   seastar::future<bool> find_unfound() {
     return seastar::make_ready_future<bool>(true);
   }
 
-  template <typename MsgType>
-  bool can_discard_replica_op(const MsgType& m) const;
+  bool can_discard_replica_op(const Message& m, epoch_t m_map_epoch) const;
   bool can_discard_op(const MOSDOp& m) const;
   bool is_missing_object(const hobject_t& soid) const {
     return peering_state.get_pg_log().get_missing().get_items().count(soid);
@@ -691,12 +768,41 @@ private:
        oid, get_actingset(), v);
   }
   bool is_degraded_or_backfilling_object(const hobject_t& soid) const;
-  const set<pg_shard_t> &get_actingset() const {
+  const std::set<pg_shard_t> &get_actingset() const {
     return peering_state.get_actingset();
   }
 
 private:
   BackfillRecovery::BackfillRecoveryPipeline backfill_pipeline;
+
+  friend class IOInterruptCondition;
+};
+
+struct PG::do_osd_ops_params_t {
+  crimson::net::ConnectionRef get_connection() const {
+    return nullptr;
+  }
+  osd_reqid_t get_reqid() const {
+    return reqid;
+  }
+  utime_t get_mtime() const {
+    return mtime;
+  };
+  epoch_t get_map_epoch() const {
+    return map_epoch;
+  }
+  entity_inst_t get_orig_source_inst() const {
+    return orig_source_inst;
+  }
+  uint64_t get_features() const {
+    return features;
+  }
+  crimson::net::ConnectionRef conn;
+  osd_reqid_t reqid;
+  utime_t mtime;
+  epoch_t map_epoch;
+  entity_inst_t orig_source_inst;
+  uint64_t features;
 };
 
 std::ostream& operator<<(std::ostream&, const PG& pg);