]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/librbd/ImageCtx.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / librbd / ImageCtx.cc
index 8375d1a6390647ff63d4e552c5a70861b69612b2..4026fb934595af9651644dc871979d6ee0ec50dd 100644 (file)
@@ -24,7 +24,6 @@
 #include "librbd/operation/ResizeRequest.h"
 #include "librbd/Types.h"
 #include "librbd/Utils.h"
-#include "librbd/LibrbdWriteback.h"
 #include "librbd/exclusive_lock/AutomaticPolicy.h"
 #include "librbd/exclusive_lock/StandardPolicy.h"
 #include "librbd/io/AioCompletion.h"
@@ -77,15 +76,14 @@ public:
 
 class SafeTimerSingleton : public SafeTimer {
 public:
-  Mutex lock;
+  ceph::mutex lock = ceph::make_mutex("librbd::Journal::SafeTimerSingleton::lock");
 
   explicit SafeTimerSingleton(CephContext *cct)
-      : SafeTimer(cct, lock, true),
-        lock("librbd::Journal::SafeTimerSingleton::lock") {
+      : SafeTimer(cct, lock, true) {
     init();
   }
   ~SafeTimerSingleton() {
-    Mutex::Locker locker(lock);
+    std::lock_guard locker{lock};
     shutdown();
   }
 };
@@ -102,19 +100,16 @@ public:
       snap_id(CEPH_NOSNAP),
       snap_exists(true),
       read_only(ro),
+      read_only_flags(ro ? IMAGE_READ_ONLY_FLAG_USER : 0U),
       exclusive_locked(false),
       name(image_name),
       image_watcher(NULL),
       journal(NULL),
-      owner_lock(util::unique_lock_name("librbd::ImageCtx::owner_lock", this)),
-      md_lock(util::unique_lock_name("librbd::ImageCtx::md_lock", this)),
-      snap_lock(util::unique_lock_name("librbd::ImageCtx::snap_lock", this)),
-      timestamp_lock(util::unique_lock_name("librbd::ImageCtx::timestamp_lock", this)),
-      parent_lock(util::unique_lock_name("librbd::ImageCtx::parent_lock", this)),
-      object_map_lock(util::unique_lock_name("librbd::ImageCtx::object_map_lock", this)),
-      async_ops_lock(util::unique_lock_name("librbd::ImageCtx::async_ops_lock", this)),
-      copyup_list_lock(util::unique_lock_name("librbd::ImageCtx::copyup_list_lock", this)),
-      completed_reqs_lock(util::unique_lock_name("librbd::ImageCtx::completed_reqs_lock", this)),
+      owner_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::owner_lock", this))),
+      image_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::image_lock", this))),
+      timestamp_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::timestamp_lock", this))),
+      async_ops_lock(ceph::make_mutex(util::unique_lock_name("librbd::ImageCtx::async_ops_lock", this))),
+      copyup_list_lock(ceph::make_mutex(util::unique_lock_name("librbd::ImageCtx::copyup_list_lock", this))),
       extra_read_flags(0),
       old_format(false),
       order(0), size(0), features(0),
@@ -127,6 +122,8 @@ public:
       operations(new Operations<>(*this)),
       exclusive_lock(nullptr), object_map(nullptr),
       io_work_queue(nullptr), op_work_queue(nullptr),
+      external_callback_completions(32),
+      event_socket_completions(32),
       asok_hook(nullptr),
       trace_endpoint("librbd")
   {
@@ -313,7 +310,11 @@ public:
   }
 
   int ImageCtx::get_read_flags(snap_t snap_id) {
-    int flags = librados::OPERATION_NOFLAG | extra_read_flags;
+    int flags = librados::OPERATION_NOFLAG | read_flags;
+    if (flags != 0)
+      return flags;
+
+    flags = librados::OPERATION_NOFLAG | extra_read_flags;
     if (snap_id == LIBRADOS_SNAP_HEAD)
       return flags;
 
@@ -325,7 +326,7 @@ public:
   }
 
   int ImageCtx::snap_set(uint64_t in_snap_id) {
-    ceph_assert(snap_lock.is_wlocked());
+    ceph_assert(ceph_mutex_is_wlocked(image_lock));
     auto it = snap_info.find(in_snap_id);
     if (in_snap_id != CEPH_NOSNAP && it != snap_info.end()) {
       snap_id = in_snap_id;
@@ -342,7 +343,7 @@ public:
 
   void ImageCtx::snap_unset()
   {
-    ceph_assert(snap_lock.is_wlocked());
+    ceph_assert(ceph_mutex_is_wlocked(image_lock));
     snap_id = CEPH_NOSNAP;
     snap_namespace = {};
     snap_name = "";
@@ -355,7 +356,7 @@ public:
   snap_t ImageCtx::get_snap_id(const cls::rbd::SnapshotNamespace& in_snap_namespace,
                                const string& in_snap_name) const
   {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     auto it = snap_ids.find({in_snap_namespace, in_snap_name});
     if (it != snap_ids.end()) {
       return it->second;
@@ -365,7 +366,7 @@ public:
 
   const SnapInfo* ImageCtx::get_snap_info(snap_t in_snap_id) const
   {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     map<snap_t, SnapInfo>::const_iterator it =
       snap_info.find(in_snap_id);
     if (it != snap_info.end())
@@ -376,7 +377,7 @@ public:
   int ImageCtx::get_snap_name(snap_t in_snap_id,
                              string *out_snap_name) const
   {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     const SnapInfo *info = get_snap_info(in_snap_id);
     if (info) {
       *out_snap_name = info->name;
@@ -388,7 +389,7 @@ public:
   int ImageCtx::get_snap_namespace(snap_t in_snap_id,
                                   cls::rbd::SnapshotNamespace *out_snap_namespace) const
   {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     const SnapInfo *info = get_snap_info(in_snap_id);
     if (info) {
       *out_snap_namespace = info->snap_namespace;
@@ -410,7 +411,7 @@ public:
 
   uint64_t ImageCtx::get_current_size() const
   {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     return size;
   }
 
@@ -420,9 +421,7 @@ public:
   }
 
   string ImageCtx::get_object_name(uint64_t num) const {
-    char buf[object_prefix.length() + 32];
-    snprintf(buf, sizeof(buf), format_string, num);
-    return string(buf);
+    return util::data_object_name(this, num);
   }
 
   uint64_t ImageCtx::get_stripe_unit() const
@@ -457,20 +456,20 @@ public:
 
   void ImageCtx::set_access_timestamp(utime_t at)
   {
-    ceph_assert(timestamp_lock.is_wlocked());
+    ceph_assert(ceph_mutex_is_wlocked(timestamp_lock));
     access_timestamp = at;
   }
 
   void ImageCtx::set_modify_timestamp(utime_t mt)
   {
-    ceph_assert(timestamp_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(timestamp_lock));
     modify_timestamp = mt;
   }
 
   int ImageCtx::is_snap_protected(snap_t in_snap_id,
                                  bool *is_protected) const
   {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     const SnapInfo *info = get_snap_info(in_snap_id);
     if (info) {
       *is_protected =
@@ -483,7 +482,7 @@ public:
   int ImageCtx::is_snap_unprotected(snap_t in_snap_id,
                                    bool *is_unprotected) const
   {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     const SnapInfo *info = get_snap_info(in_snap_id);
     if (info) {
       *is_unprotected =
@@ -500,7 +499,7 @@ public:
                           uint8_t protection_status, uint64_t flags,
                           utime_t timestamp)
   {
-    ceph_assert(snap_lock.is_wlocked());
+    ceph_assert(ceph_mutex_is_wlocked(image_lock));
     snaps.push_back(id);
     SnapInfo info(in_snap_name, in_snap_namespace,
                  in_size, parent, protection_status, flags, timestamp);
@@ -512,7 +511,7 @@ public:
                         string in_snap_name,
                         snap_t id)
   {
-    ceph_assert(snap_lock.is_wlocked());
+    ceph_assert(ceph_mutex_is_wlocked(image_lock));
     snaps.erase(std::remove(snaps.begin(), snaps.end(), id), snaps.end());
     snap_info.erase(id);
     snap_ids.erase({in_snap_namespace, in_snap_name});
@@ -520,7 +519,7 @@ public:
 
   uint64_t ImageCtx::get_image_size(snap_t in_snap_id) const
   {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     if (in_snap_id == CEPH_NOSNAP) {
       if (!resize_reqs.empty() &&
           resize_reqs.front()->shrinking()) {
@@ -537,40 +536,40 @@ public:
   }
 
   uint64_t ImageCtx::get_object_count(snap_t in_snap_id) const {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     uint64_t image_size = get_image_size(in_snap_id);
     return Striper::get_num_objects(layout, image_size);
   }
 
   bool ImageCtx::test_features(uint64_t features) const
   {
-    RWLock::RLocker l(snap_lock);
-    return test_features(features, snap_lock);
+    std::shared_lock l{image_lock};
+    return test_features(features, image_lock);
   }
 
   bool ImageCtx::test_features(uint64_t in_features,
-                               const RWLock &in_snap_lock) const
+                               const ceph::shared_mutex &in_image_lock) const
   {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     return ((features & in_features) == in_features);
   }
 
   bool ImageCtx::test_op_features(uint64_t in_op_features) const
   {
-    RWLock::RLocker snap_locker(snap_lock);
-    return test_op_features(in_op_features, snap_lock);
+    std::shared_lock l{image_lock};
+    return test_op_features(in_op_features, image_lock);
   }
 
   bool ImageCtx::test_op_features(uint64_t in_op_features,
-                                  const RWLock &in_snap_lock) const
+                                  const ceph::shared_mutex &in_image_lock) const
   {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     return ((op_features & in_op_features) == in_op_features);
   }
 
   int ImageCtx::get_flags(librados::snap_t _snap_id, uint64_t *_flags) const
   {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     if (_snap_id == CEPH_NOSNAP) {
       *_flags = flags;
       return 0;
@@ -586,15 +585,16 @@ public:
   int ImageCtx::test_flags(librados::snap_t in_snap_id,
                            uint64_t flags, bool *flags_set) const
   {
-    RWLock::RLocker l(snap_lock);
-    return test_flags(in_snap_id, flags, snap_lock, flags_set);
+    std::shared_lock l{image_lock};
+    return test_flags(in_snap_id, flags, image_lock, flags_set);
   }
 
   int ImageCtx::test_flags(librados::snap_t in_snap_id,
-                           uint64_t flags, const RWLock &in_snap_lock,
+                           uint64_t flags,
+                           const ceph::shared_mutex &in_image_lock,
                            bool *flags_set) const
   {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     uint64_t snap_flags;
     int r = get_flags(in_snap_id, &snap_flags);
     if (r < 0) {
@@ -606,7 +606,7 @@ public:
 
   int ImageCtx::update_flags(snap_t in_snap_id, uint64_t flag, bool enabled)
   {
-    ceph_assert(snap_lock.is_wlocked());
+    ceph_assert(ceph_mutex_is_wlocked(image_lock));
     uint64_t *_flags;
     if (in_snap_id == CEPH_NOSNAP) {
       _flags = &flags;
@@ -628,8 +628,7 @@ public:
 
   const ParentImageInfo* ImageCtx::get_parent_info(snap_t in_snap_id) const
   {
-    ceph_assert(snap_lock.is_locked());
-    ceph_assert(parent_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     if (in_snap_id == CEPH_NOSNAP)
       return &parent_md;
     const SnapInfo *info = get_snap_info(in_snap_id);
@@ -664,7 +663,7 @@ public:
 
   int ImageCtx::get_parent_overlap(snap_t in_snap_id, uint64_t *overlap) const
   {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     const auto info = get_parent_info(in_snap_id);
     if (info) {
       *overlap = info->overlap;
@@ -708,7 +707,7 @@ public:
 
   void ImageCtx::cancel_async_requests(Context *on_finish) {
     {
-      Mutex::Locker async_ops_locker(async_ops_lock);
+      std::lock_guard async_ops_locker{async_ops_lock};
       if (!async_requests.empty()) {
         ldout(cct, 10) << "canceling async requests: count="
                        << async_requests.size() << dendl;
@@ -724,13 +723,6 @@ public:
     on_finish->complete(0);
   }
 
-  void ImageCtx::clear_pending_completions() {
-    Mutex::Locker l(completed_reqs_lock);
-    ldout(cct, 10) << "clear pending AioCompletion: count="
-                   << completed_reqs.size() << dendl;
-    completed_reqs.clear();
-  }
-
   void ImageCtx::apply_metadata(const std::map<std::string, bufferlist> &meta,
                                 bool thread_safe) {
     ldout(cct, 20) << __func__ << dendl;
@@ -779,8 +771,6 @@ public:
     bool skip_partial_discard = true;
     ASSIGN_OPTION(non_blocking_aio, bool);
     ASSIGN_OPTION(cache, bool);
-    ASSIGN_OPTION(cache_writethrough_until_flush, bool);
-    ASSIGN_OPTION(cache_max_dirty, Option::size_t);
     ASSIGN_OPTION(sparse_read_threshold_bytes, Option::size_t);
     ASSIGN_OPTION(readahead_max_bytes, Option::size_t);
     ASSIGN_OPTION(readahead_disable_after_bytes, Option::size_t);
@@ -810,6 +800,19 @@ public:
       alloc_hint_flags |= librados::ALLOC_HINT_FLAG_INCOMPRESSIBLE;
     }
 
+    librados::Rados rados(md_ctx);
+    int8_t require_osd_release;
+    int r = rados.get_min_compatible_osd(&require_osd_release);
+    if (r == 0 && require_osd_release >= CEPH_RELEASE_OCTOPUS) {
+      read_flags = 0;
+      auto read_policy = config.get_val<std::string>("rbd_read_from_replica_policy");
+      if (read_policy == "balance") {
+        read_flags |= CEPH_OSD_FLAG_BALANCE_READS;
+      } else if (read_policy == "localize") {
+        read_flags |= CEPH_OSD_FLAG_LOCALIZE_READS;
+      }
+    }
+
     io_work_queue->apply_qos_schedule_tick_min(
       config.get_val<uint64_t>("rbd_qos_schedule_tick_min"));
 
@@ -837,6 +840,12 @@ public:
       RBD_QOS_WRITE_BPS_THROTTLE,
       config.get_val<uint64_t>("rbd_qos_write_bps_limit"),
       config.get_val<uint64_t>("rbd_qos_write_bps_burst"));
+
+    if (!disable_zero_copy &&
+        config.get_val<bool>("rbd_disable_zero_copy_writes")) {
+      ldout(cct, 5) << this << ": disabling zero-copy writes" << dendl;
+      disable_zero_copy = true;
+    }
   }
 
   ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() {
@@ -853,8 +862,8 @@ public:
 
   void ImageCtx::set_image_name(const std::string &image_name) {
     // update the name so rename can be invoked repeatedly
-    RWLock::RLocker owner_locker(owner_lock);
-    RWLock::WLocker snap_locker(snap_lock);
+    std::shared_lock owner_locker{owner_lock};
+    std::unique_lock image_locker{image_lock};
     name = image_name;
     if (old_format) {
       header_oid = util::old_header_name(image_name);
@@ -872,35 +881,31 @@ public:
   }
 
   exclusive_lock::Policy *ImageCtx::get_exclusive_lock_policy() const {
-    ceph_assert(owner_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(owner_lock));
     ceph_assert(exclusive_lock_policy != nullptr);
     return exclusive_lock_policy;
   }
 
   void ImageCtx::set_exclusive_lock_policy(exclusive_lock::Policy *policy) {
-    ceph_assert(owner_lock.is_wlocked());
+    ceph_assert(ceph_mutex_is_wlocked(owner_lock));
     ceph_assert(policy != nullptr);
     delete exclusive_lock_policy;
     exclusive_lock_policy = policy;
   }
 
   journal::Policy *ImageCtx::get_journal_policy() const {
-    ceph_assert(snap_lock.is_locked());
+    ceph_assert(ceph_mutex_is_locked(image_lock));
     ceph_assert(journal_policy != nullptr);
     return journal_policy;
   }
 
   void ImageCtx::set_journal_policy(journal::Policy *policy) {
-    ceph_assert(snap_lock.is_wlocked());
+    ceph_assert(ceph_mutex_is_wlocked(image_lock));
     ceph_assert(policy != nullptr);
     delete journal_policy;
     journal_policy = policy;
   }
 
-  bool ImageCtx::is_writeback_cache_enabled() const {
-    return (cache && cache_max_dirty > 0);
-  }
-
   void ImageCtx::get_thread_pool_instance(CephContext *cct,
                                           ThreadPool **thread_pool,
                                           ContextWQ **op_work_queue) {
@@ -912,7 +917,7 @@ public:
   }
 
   void ImageCtx::get_timer_instance(CephContext *cct, SafeTimer **timer,
-                                    Mutex **timer_lock) {
+                                    ceph::mutex **timer_lock) {
     auto safe_timer_singleton =
       &cct->lookup_or_create_singleton_object<SafeTimerSingleton>(
        "librbd::journal::safe_timer", false, cct);