]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/crimson/os/seastore/seastore.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / crimson / os / seastore / seastore.h
index 798442c34888198e89dfc9d361b718522982439d..d1c325432d7ff93b485f47e2611586d6c6e31f6d 100644 (file)
 #include <optional>
 #include <seastar/core/future.hh>
 
-#include "osd/osd_types.h"
 #include "include/uuid.h"
 
 #include "os/Transaction.h"
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/futurized_collection.h"
 #include "crimson/os/futurized_store.h"
-#include "transaction.h"
+
+#include "crimson/os/seastore/transaction.h"
+#include "crimson/os/seastore/onode_manager.h"
+#include "crimson/os/seastore/omap_manager.h"
+#include "crimson/os/seastore/collection_manager.h"
 
 namespace crimson::os::seastore {
 
-class SeastoreCollection;
-class SegmentManager;
-class OnodeManager;
 class Onode;
 using OnodeRef = boost::intrusive_ptr<Onode>;
-class Journal;
-class LBAManager;
 class TransactionManager;
-class Cache;
 
-class SeaStore final : public FuturizedStore {
-  uuid_d osd_fsid;
+class SeastoreCollection final : public FuturizedCollection {
+public:
+  template <typename... T>
+  SeastoreCollection(T&&... args) :
+    FuturizedCollection(std::forward<T>(args)...) {}
+
+  seastar::shared_mutex ordering_lock;
+};
 
+class SeaStore final : public FuturizedStore {
 public:
+  class MDStore {
+  public:
+    using base_iertr = crimson::errorator<
+      crimson::ct_error::input_output_error
+    >;
+
+    using write_meta_ertr = base_iertr;
+    using write_meta_ret = write_meta_ertr::future<>;
+    virtual write_meta_ret write_meta(
+      const std::string &key,
+      const std::string &val
+    ) = 0;
+
+    using read_meta_ertr = base_iertr;
+    using read_meta_ret = write_meta_ertr::future<std::optional<std::string>>;
+    virtual read_meta_ret read_meta(const std::string &key) = 0;
 
-  SeaStore(const std::string& path);
-  ~SeaStore() final;
+    virtual ~MDStore() {}
+  };
+  using MDStoreRef = std::unique_ptr<MDStore>;
 
+  SeaStore(
+    const std::string& root,
+    MDStoreRef mdstore,
+    SegmentManagerRef sm,
+    TransactionManagerRef tm,
+    CollectionManagerRef cm,
+    OnodeManagerRef om);
+  SeaStore(
+    const std::string& root,
+    SegmentManagerRef sm,
+    TransactionManagerRef tm,
+    CollectionManagerRef cm,
+    OnodeManagerRef om);
+  ~SeaStore();
+    
   seastar::future<> stop() final;
-  seastar::future<> mount() final;
+  mount_ertr::future<> mount() final;
   seastar::future<> umount() final;
 
-  seastar::future<> mkfs(uuid_d new_osd_fsid) final;
+  mkfs_ertr::future<> mkfs(uuid_d new_osd_fsid) final;
   seastar::future<store_statfs_t> stat() const final;
 
   read_errorator::future<ceph::bufferlist> read(
@@ -58,7 +94,7 @@ public:
     const ghobject_t& oid,
     interval_set<uint64_t>& m,
     uint32_t op_flags = 0) final;
-  get_attr_errorator::future<ceph::bufferptr> get_attr(
+  get_attr_errorator::future<ceph::bufferlist> get_attr(
     CollectionRef c,
     const ghobject_t& oid,
     std::string_view name) const final;
@@ -76,7 +112,10 @@ public:
     const omap_keys_t& keys) final;
 
   /// Retrieves paged set of values > start (if present)
-  read_errorator::future<std::tuple<bool, omap_values_t>> omap_get_values(
+  using omap_get_values_ret_bare_t = std::tuple<bool, omap_values_t>;
+  using omap_get_values_ret_t = read_errorator::future<
+    omap_get_values_ret_bare_t>;
+  omap_get_values_ret_t omap_get_values(
     CollectionRef c,           ///< [in] collection
     const ghobject_t &oid,     ///< [in] oid
     const std::optional<std::string> &start ///< [in] start, empty for begin
@@ -117,65 +156,239 @@ public:
   unsigned get_max_attr_name_length() const final {
     return 256;
   }
+  enum class op_type_t : uint8_t {
+    TRANSACTION = 0,
+    READ,
+    WRITE,
+    GET_ATTR,
+    GET_ATTRS,
+    STAT,
+    OMAP_GET_VALUES,
+    OMAP_LIST,
+    MAX
+  };
 
 private:
-  std::unique_ptr<SegmentManager> segment_manager;
-  std::unique_ptr<SegmentCleaner> segment_cleaner;
-  std::unique_ptr<Cache> cache;
-  std::unique_ptr<Journal> journal;
-  std::unique_ptr<LBAManager> lba_manager;
-  std::unique_ptr<TransactionManager> transaction_manager;
-  std::unique_ptr<OnodeManager> onode_manager;
-
-
-  using write_ertr = crimson::errorator<
-    crimson::ct_error::input_output_error>;
-  write_ertr::future<> _do_transaction_step(
-    TransactionRef &trans,
+  struct internal_context_t {
+    CollectionRef ch;
+    ceph::os::Transaction ext_transaction;
+
+    internal_context_t(
+      CollectionRef ch,
+      ceph::os::Transaction &&_ext_transaction,
+      TransactionRef &&transaction)
+      : ch(ch), ext_transaction(std::move(_ext_transaction)),
+       transaction(std::move(transaction)),
+       iter(ext_transaction.begin()) {}
+
+    TransactionRef transaction;
+
+    ceph::os::Transaction::iterator iter;
+    std::chrono::steady_clock::time_point begin_timestamp = std::chrono::steady_clock::now();
+
+    void reset_preserve_handle(TransactionManager &tm) {
+      tm.reset_transaction_preserve_handle(*transaction);
+      iter = ext_transaction.begin();
+    }
+  };
+
+  static void on_error(ceph::os::Transaction &t);
+
+  template <typename F>
+  auto repeat_with_internal_context(
+    CollectionRef ch,
+    ceph::os::Transaction &&t,
+    Transaction::src_t src,
+    const char* tname,
+    op_type_t op_type,
+    F &&f) {
+    return seastar::do_with(
+      internal_context_t(
+       ch, std::move(t),
+       transaction_manager->create_transaction(src, tname)),
+      std::forward<F>(f),
+      [this, op_type](auto &ctx, auto &f) {
+       return ctx.transaction->get_handle().take_collection_lock(
+         static_cast<SeastoreCollection&>(*(ctx.ch)).ordering_lock
+       ).then([&, this] {
+         return repeat_eagain([&, this] {
+           ctx.reset_preserve_handle(*transaction_manager);
+           return std::invoke(f, ctx);
+         }).handle_error(
+           crimson::ct_error::eagain::pass_further{},
+           crimson::ct_error::all_same_way([&ctx](auto e) {
+             on_error(ctx.ext_transaction);
+           })
+         );
+       }).then([this, op_type, &ctx] {
+         add_latency_sample(op_type,
+             std::chrono::steady_clock::now() - ctx.begin_timestamp);
+       });
+      }
+    );
+  }
+
+  template <typename Ret, typename F>
+  auto repeat_with_onode(
+    CollectionRef ch,
+    const ghobject_t &oid,
+    Transaction::src_t src,
+    const char* tname,
+    op_type_t op_type,
+    F &&f) const {
+    auto begin_time = std::chrono::steady_clock::now();
+    return seastar::do_with(
+      oid, Ret{}, std::forward<F>(f),
+      [this, src, op_type, begin_time, tname
+      ](auto &oid, auto &ret, auto &f)
+    {
+      return repeat_eagain([&, this, src, tname] {
+        return transaction_manager->with_transaction_intr(
+          src,
+          tname,
+          [&, this](auto& t)
+        {
+          return onode_manager->get_onode(t, oid
+          ).si_then([&](auto onode) {
+            return seastar::do_with(std::move(onode), [&](auto& onode) {
+              return f(t, *onode);
+            });
+          }).si_then([&ret](auto _ret) {
+            ret = _ret;
+          });
+        });
+      }).safe_then([&ret, op_type, begin_time, this] {
+        const_cast<SeaStore*>(this)->add_latency_sample(op_type,
+                   std::chrono::steady_clock::now() - begin_time);
+        return seastar::make_ready_future<Ret>(ret);
+      });
+    });
+  }
+
+  using _omap_get_value_iertr = OMapManager::base_iertr::extend<
+    crimson::ct_error::enodata
+    >;
+  using _omap_get_value_ret = _omap_get_value_iertr::future<ceph::bufferlist>;
+  _omap_get_value_ret _omap_get_value(
+    Transaction &t,
+    omap_root_t &&root,
+    std::string_view key) const;
+
+  using _omap_get_values_iertr = OMapManager::base_iertr;
+  using _omap_get_values_ret = _omap_get_values_iertr::future<omap_values_t>;
+  _omap_get_values_ret _omap_get_values(
+    Transaction &t,
+    omap_root_t &&root,
+    const omap_keys_t &keys) const;
+
+  using _omap_list_bare_ret = OMapManager::omap_list_bare_ret;
+  using _omap_list_ret = OMapManager::omap_list_ret;
+  _omap_list_ret _omap_list(
+    Onode &onode,
+    const omap_root_le_t& omap_root,
+    Transaction& t,
+    const std::optional<std::string>& start,
+    OMapManager::omap_list_config_t config) const;
+
+  friend class SeaStoreOmapIterator;
+  omap_get_values_ret_t omap_list(
+    CollectionRef ch,
+    const ghobject_t &oid,
+    const std::optional<std::string> &_start,
+    OMapManager::omap_list_config_t config);
+
+  std::string root;
+  MDStoreRef mdstore;
+  SegmentManagerRef segment_manager;
+  std::vector<SegmentManagerRef> secondaries;
+  TransactionManagerRef transaction_manager;
+  CollectionManagerRef collection_manager;
+  OnodeManagerRef onode_manager;
+  const uint32_t max_object_size = 0;
+
+  using tm_iertr = TransactionManager::base_iertr;
+  using tm_ret = tm_iertr::future<>;
+  tm_ret _do_transaction_step(
+    internal_context_t &ctx,
     CollectionRef &col,
     std::vector<OnodeRef> &onodes,
     ceph::os::Transaction::iterator &i);
 
-  write_ertr::future<> _remove(
-    TransactionRef &trans,
+  tm_ret _remove(
+    internal_context_t &ctx,
     OnodeRef &onode);
-  write_ertr::future<> _touch(
-    TransactionRef &trans,
+  tm_ret _touch(
+    internal_context_t &ctx,
     OnodeRef &onode);
-  write_ertr::future<> _write(
-    TransactionRef &trans,
+  tm_ret _write(
+    internal_context_t &ctx,
     OnodeRef &onode,
-    uint64_t offset, size_t len, const ceph::bufferlist& bl,
+    uint64_t offset, size_t len,
+    ceph::bufferlist &&bl,
     uint32_t fadvise_flags);
-  write_ertr::future<> _omap_set_values(
-    TransactionRef &trans,
+  tm_ret _omap_set_values(
+    internal_context_t &ctx,
     OnodeRef &onode,
     std::map<std::string, ceph::bufferlist> &&aset);
-  write_ertr::future<> _omap_set_header(
-    TransactionRef &trans,
+  tm_ret _omap_set_header(
+    internal_context_t &ctx,
     OnodeRef &onode,
-    const ceph::bufferlist &header);
-  write_ertr::future<> _omap_rmkeys(
-    TransactionRef &trans,
+    ceph::bufferlist &&header);
+  tm_ret _omap_rmkeys(
+    internal_context_t &ctx,
     OnodeRef &onode,
-    const omap_keys_t& aset);
-  write_ertr::future<> _omap_rmkeyrange(
-    TransactionRef &trans,
+    omap_keys_t &&aset);
+  tm_ret _omap_rmkeyrange(
+    internal_context_t &ctx,
     OnodeRef &onode,
-    const std::string &first,
-    const std::string &last);
-  write_ertr::future<> _truncate(
-    TransactionRef &trans,
+    std::string first,
+    std::string last);
+  tm_ret _truncate(
+    internal_context_t &ctx,
     OnodeRef &onode, uint64_t size);
-  write_ertr::future<> _setattrs(
-    TransactionRef &trans,
+  tm_ret _setattrs(
+    internal_context_t &ctx,
     OnodeRef &onode,
-    std::map<std::string,bufferptr>& aset);
-  write_ertr::future<> _create_collection(
-    TransactionRef &trans,
+    std::map<std::string,bufferlist>&& aset);
+  tm_ret _create_collection(
+    internal_context_t &ctx,
     const coll_t& cid, int bits);
+  tm_ret _remove_collection(
+    internal_context_t &ctx,
+    const coll_t& cid);
+  using omap_set_kvs_ret = tm_iertr::future<>;
+  omap_set_kvs_ret _omap_set_kvs(
+    OnodeRef &onode,
+    const omap_root_le_t& omap_root,
+    Transaction& t,
+    omap_root_le_t& mutable_omap_root,
+    std::map<std::string, ceph::bufferlist>&& kvs);
 
   boost::intrusive_ptr<SeastoreCollection> _get_collection(const coll_t& cid);
+
+  static constexpr auto LAT_MAX = static_cast<std::size_t>(op_type_t::MAX);
+  struct {
+    std::array<seastar::metrics::histogram, LAT_MAX> op_lat;
+  } stats;
+
+  seastar::metrics::histogram& get_latency(
+      op_type_t op_type) {
+    assert(static_cast<std::size_t>(op_type) < stats.op_lat.size());
+    return stats.op_lat[static_cast<std::size_t>(op_type)];
+  }
+
+  void add_latency_sample(op_type_t op_type,
+       std::chrono::steady_clock::duration dur) {
+    seastar::metrics::histogram& lat = get_latency(op_type);
+    lat.sample_count++;
+    lat.sample_sum += std::chrono::duration_cast<std::chrono::milliseconds>(dur).count();
+  }
+  seastar::metrics::metric_group metrics;
+  void register_metrics();
+  seastar::future<> write_fsid(uuid_d new_osd_fsid);
 };
 
+seastar::future<std::unique_ptr<SeaStore>> make_seastore(
+  const std::string &device,
+  const ConfigValues &config);
 }