]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/librbd/api/DiffIterate.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / librbd / api / DiffIterate.cc
index 527193b90533c15b9308aad641d1f017168e979c..dcd07ea56a3dde924aeec10b48c5a5dcf2fe514f 100644 (file)
@@ -9,7 +9,6 @@
 #include "librbd/internal.h"
 #include "librbd/io/AioCompletion.h"
 #include "librbd/io/ImageDispatchSpec.h"
-#include "librbd/io/ImageRequestWQ.h"
 #include "librbd/object_map/DiffRequest.h"
 #include "include/rados/librados.hpp"
 #include "include/interval_set.h"
@@ -17,7 +16,6 @@
 #include "common/Cond.h"
 #include "common/Throttle.h"
 #include "osdc/Striper.h"
-#include "librados/snap_set_diff.h"
 #include <boost/tuple/tuple.hpp>
 #include <list>
 #include <map>
@@ -36,6 +34,7 @@ struct DiffContext {
   DiffIterate<>::Callback callback;
   void *callback_arg;
   bool whole_object;
+  bool include_parent;
   uint64_t from_snap_id;
   uint64_t end_snap_id;
   interval_set<uint64_t> parent_diff;
@@ -43,37 +42,38 @@ struct DiffContext {
 
   template <typename I>
   DiffContext(I &image_ctx, DiffIterate<>::Callback callback,
-              void *callback_arg, bool _whole_object, uint64_t _from_snap_id,
-              uint64_t _end_snap_id)
+              void *callback_arg, bool _whole_object, bool _include_parent,
+              uint64_t _from_snap_id, uint64_t _end_snap_id)
     : callback(callback), callback_arg(callback_arg),
-      whole_object(_whole_object), from_snap_id(_from_snap_id),
-      end_snap_id(_end_snap_id),
+      whole_object(_whole_object), include_parent(_include_parent),
+      from_snap_id(_from_snap_id), end_snap_id(_end_snap_id),
       throttle(image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"), true) {
   }
 };
 
+template <typename I>
 class C_DiffObject : public Context {
 public:
-  template <typename I>
-  C_DiffObject(I &image_ctx, librados::IoCtx &head_ctx,
-               DiffContext &diff_context, const std::string &oid,
-               uint64_t offset, const std::vector<ObjectExtent> &object_extents)
-    : m_cct(image_ctx.cct), m_head_ctx(head_ctx),
-      m_diff_context(diff_context), m_oid(oid), m_offset(offset),
-      m_object_extents(object_extents), m_snap_ret(0) {
+  C_DiffObject(I &image_ctx, DiffContext &diff_context, uint64_t image_offset,
+               uint64_t image_length)
+    : m_image_ctx(image_ctx), m_cct(image_ctx.cct),
+      m_diff_context(diff_context), m_image_offset(image_offset),
+      m_image_length(image_length) {
   }
 
   void send() {
-    C_OrderedThrottle *ctx = m_diff_context.throttle.start_op(this);
-    librados::AioCompletion *rados_completion =
-      util::create_rados_callback(ctx);
-
-    librados::ObjectReadOperation op;
-    op.list_snaps(&m_snap_set, &m_snap_ret);
-
-    int r = m_head_ctx.aio_operate(m_oid, rados_completion, &op, NULL);
-    ceph_assert(r == 0);
-    rados_completion->release();
+    Context* ctx = m_diff_context.throttle.start_op(this);
+    auto aio_comp = io::AioCompletion::create_and_start(
+      ctx, util::get_image_ctx(&m_image_ctx), io::AIO_TYPE_GENERIC);
+    auto req = io::ImageDispatchSpec::create_list_snaps(
+      m_image_ctx, io::IMAGE_DISPATCH_LAYER_INTERNAL_START,
+      aio_comp, {{m_image_offset, m_image_length}},
+      {m_diff_context.from_snap_id, m_diff_context.end_snap_id},
+      (m_diff_context.include_parent ?
+        0 : io::LIST_SNAPS_FLAG_DISABLE_LIST_FROM_PARENT) |
+      (m_diff_context.whole_object ? io::LIST_SNAPS_FLAG_WHOLE_OBJECT : 0),
+      &m_snapshot_delta, {});
+    req->send();
   }
 
 protected:
@@ -82,144 +82,70 @@ protected:
 
   void finish(int r) override {
     CephContext *cct = m_cct;
-    if (r == 0 && m_snap_ret < 0) {
-      r = m_snap_ret;
-    }
 
-    Diffs diffs;
-    if (r == 0) {
-      ldout(cct, 20) << "object " << m_oid << ": list_snaps complete" << dendl;
-      compute_diffs(&diffs);
-    } else if (r == -ENOENT) {
-      ldout(cct, 20) << "object " << m_oid << ": list_snaps (not found)"
-                     << dendl;
-      r = 0;
-      compute_parent_overlap(&diffs);
-    } else {
-      ldout(cct, 20) << "object " << m_oid << ": list_snaps failed: "
-                     << cpp_strerror(r) << dendl;
+    if (r < 0) {
+      ldout(cct, 20) << "list_snaps failed: " << m_image_offset << "~"
+                     << m_image_length << ": " << cpp_strerror(r) << dendl;
     }
 
-    if (r == 0) {
-      for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) {
-        r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(),
-                                    m_diff_context.callback_arg);
-        if (r < 0) {
-          break;
-        }
+    Diffs diffs;
+    ldout(cct, 20) << "image extent " << m_image_offset << "~"
+                     << m_image_length << ": list_snaps complete" << dendl;
+
+    compute_diffs(&diffs);
+    for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) {
+      r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(),
+                                  m_diff_context.callback_arg);
+      if (r < 0) {
+        break;
       }
     }
     m_diff_context.throttle.end_op(r);
   }
 
 private:
+  I& m_image_ctx;
   CephContext *m_cct;
-  librados::IoCtx &m_head_ctx;
   DiffContext &m_diff_context;
-  std::string m_oid;
-  uint64_t m_offset;
-  std::vector<ObjectExtent> m_object_extents;
+  uint64_t m_image_offset;
+  uint64_t m_image_length;
 
-  librados::snap_set_t m_snap_set;
-  int m_snap_ret;
+  io::SnapshotDelta m_snapshot_delta;
 
   void compute_diffs(Diffs *diffs) {
     CephContext *cct = m_cct;
 
-    // calc diff from from_snap_id -> to_snap_id
-    interval_set<uint64_t> diff;
-    uint64_t end_size;
-    bool end_exists;
-    librados::snap_t clone_end_snap_id;
-    bool whole_object;
-    calc_snap_set_diff(cct, m_snap_set, m_diff_context.from_snap_id,
-                       m_diff_context.end_snap_id, &diff, &end_size,
-                       &end_exists, &clone_end_snap_id, &whole_object);
-    if (whole_object) {
-      ldout(cct, 1) << "object " << m_oid << ": need to provide full object"
-                    << dendl;
-    }
-    ldout(cct, 20) << "  diff " << diff << " end_exists=" << end_exists
-                   << dendl;
-    if (diff.empty() && !whole_object) {
-      if (m_diff_context.from_snap_id == 0 && !end_exists) {
-        compute_parent_overlap(diffs);
-      }
-      return;
-    } else if (m_diff_context.whole_object || whole_object) {
-      // provide the full object extents to the callback
-      for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
-           q != m_object_extents.end(); ++q) {
-        diffs->push_back(boost::make_tuple(m_offset + q->offset, q->length,
-                                           end_exists));
-      }
-      return;
-    }
-
-    for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
-         q != m_object_extents.end(); ++q) {
-      ldout(cct, 20) << "diff_iterate object " << m_oid << " extent "
-                     << q->offset << "~" << q->length << " from "
-                     << q->buffer_extents << dendl;
-      uint64_t opos = q->offset;
-      for (vector<pair<uint64_t,uint64_t> >::iterator r =
-             q->buffer_extents.begin();
-           r != q->buffer_extents.end(); ++r) {
-        interval_set<uint64_t> overlap;  // object extents
-        overlap.insert(opos, r->second);
-        overlap.intersection_of(diff);
-        ldout(cct, 20) << " opos " << opos
-                       << " buf " << r->first << "~" << r->second
-                       << " overlap " << overlap << dendl;
-        for (interval_set<uint64_t>::iterator s = overlap.begin();
-              s != overlap.end(); ++s) {
-          uint64_t su_off = s.get_start() - opos;
-          uint64_t logical_off = m_offset + r->first + su_off;
-          ldout(cct, 20) << "   overlap extent " << s.get_start() << "~"
-                         << s.get_len() << " logical " << logical_off << "~"
-                         << s.get_len() << dendl;
-          diffs->push_back(boost::make_tuple(logical_off, s.get_len(),
-                           end_exists));
+    // merge per-snapshot deltas into an aggregate
+    io::SparseExtents aggregate_snapshot_extents;
+    for (auto& [key, snapshot_extents] : m_snapshot_delta) {
+      for (auto& snapshot_extent : snapshot_extents) {
+        auto state = snapshot_extent.get_val().state;
+
+        // ignore DNE object (and parent)
+        if ((state == io::SPARSE_EXTENT_STATE_DNE) ||
+            (key == io::INITIAL_WRITE_READ_SNAP_IDS &&
+             state == io::SPARSE_EXTENT_STATE_ZEROED)) {
+          continue;
         }
-        opos += r->second;
+
+        aggregate_snapshot_extents.insert(
+          snapshot_extent.get_off(), snapshot_extent.get_len(),
+          {state, snapshot_extent.get_len()});
       }
-      ceph_assert(opos == q->offset + q->length);
     }
-  }
 
-  void compute_parent_overlap(Diffs *diffs) {
-    if (m_diff_context.from_snap_id == 0 &&
-        !m_diff_context.parent_diff.empty()) {
-      // report parent diff instead
-      for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
-           q != m_object_extents.end(); ++q) {
-        for (vector<pair<uint64_t,uint64_t> >::iterator r =
-               q->buffer_extents.begin();
-             r != q->buffer_extents.end(); ++r) {
-          interval_set<uint64_t> o;
-          o.insert(m_offset + r->first, r->second);
-          o.intersection_of(m_diff_context.parent_diff);
-          ldout(m_cct, 20) << " reporting parent overlap " << o << dendl;
-          for (interval_set<uint64_t>::iterator s = o.begin(); s != o.end();
-               ++s) {
-            diffs->push_back(boost::make_tuple(s.get_start(), s.get_len(),
-                             true));
-          }
-        }
-      }
+    // build delta callback set
+    for (auto& snapshot_extent : aggregate_snapshot_extents) {
+      ldout(cct, 20) << "off=" << snapshot_extent.get_off() << ", "
+                     << "len=" << snapshot_extent.get_len() << ", "
+                     << "state=" << snapshot_extent.get_val().state << dendl;
+      diffs->emplace_back(
+        snapshot_extent.get_off(), snapshot_extent.get_len(),
+        snapshot_extent.get_val().state == io::SPARSE_EXTENT_STATE_DATA);
     }
   }
 };
 
-int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) {
-  // it's possible for a discard to create a hole in the parent image -- ignore
-  if (exists) {
-    interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg);
-    diff->insert(off, len);
-  }
-  return 0;
-}
-
 } // anonymous namespace
 
 template <typename I>
@@ -244,10 +170,10 @@ int DiffIterate<I>::diff_iterate(I *ictx,
     std::shared_lock owner_locker{ictx->owner_lock};
     auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, ictx,
                                                         io::AIO_TYPE_FLUSH);
-    auto req = io::ImageDispatchSpec<I>::create_flush_request(
-      *ictx, aio_comp, io::FLUSH_SOURCE_INTERNAL, {});
+    auto req = io::ImageDispatchSpec::create_flush(
+      *ictx, io::IMAGE_DISPATCH_LAYER_INTERNAL_START,
+      aio_comp, io::FLUSH_SOURCE_INTERNAL, {});
     req->send();
-    delete req;
   }
   int r = flush_ctx.wait();
   if (r < 0) {
@@ -278,16 +204,15 @@ int DiffIterate<I>::execute() {
 
   ceph_assert(m_image_ctx.data_ctx.is_valid());
 
-  librados::IoCtx head_ctx;
   librados::snap_t from_snap_id = 0;
   librados::snap_t end_snap_id;
   uint64_t from_size = 0;
   uint64_t end_size;
   {
     std::shared_lock image_locker{m_image_ctx.image_lock};
-    head_ctx.dup(m_image_ctx.data_ctx);
     if (m_from_snap_name) {
-      from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace, m_from_snap_name);
+      from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace,
+                                             m_from_snap_name);
       from_size = m_image_ctx.get_image_size(from_snap_id);
     }
     end_snap_id = m_image_ctx.snap_id;
@@ -324,34 +249,13 @@ int DiffIterate<I>::execute() {
     }
   }
 
-  // we must list snaps via the head, not end snap
-  head_ctx.snap_set_read(CEPH_SNAPDIR);
-
   ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to "
                 << end_snap_id << " size from " << from_size
                 << " to " << end_size << dendl;
-
-  // check parent overlap only if we are comparing to the beginning of time
   DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg,
-                           m_whole_object, from_snap_id, end_snap_id);
-  if (m_include_parent && from_snap_id == 0) {
-    std::shared_lock image_locker{m_image_ctx.image_lock};
-    uint64_t overlap = 0;
-    m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &overlap);
-    r = 0;
-    if (m_image_ctx.parent && overlap > 0) {
-      ldout(cct, 10) << " first getting parent diff" << dendl;
-      DiffIterate diff_parent(*m_image_ctx.parent, {},
-                             nullptr, 0, overlap,
-                              m_include_parent, m_whole_object,
-                              &simple_diff_cb,
-                              &diff_context.parent_diff);
-      r = diff_parent.execute();
-    }
-    if (r < 0) {
-      return r;
-    }
-  }
+                           m_whole_object,
+                           m_include_parent && from_snap_id == 0, from_snap_id,
+                           end_snap_id);
 
   uint64_t period = m_image_ctx.get_stripe_period();
   uint64_t off = m_offset;
@@ -361,25 +265,23 @@ int DiffIterate<I>::execute() {
     uint64_t period_off = off - (off % period);
     uint64_t read_len = min(period_off + period - off, left);
 
-    // map to extents
-    map<object_t,vector<ObjectExtent> > object_extents;
-    Striper::file_to_extents(cct, m_image_ctx.format_string,
-                             &m_image_ctx.layout, off, read_len, 0,
-                             object_extents, 0);
+    if (fast_diff_enabled) {
+      // map to extents
+      map<object_t,vector<ObjectExtent> > object_extents;
+      Striper::file_to_extents(cct, m_image_ctx.format_string,
+                               &m_image_ctx.layout, off, read_len, 0,
+                               object_extents, 0);
 
-    // get snap info for each object
-    for (map<object_t,vector<ObjectExtent> >::iterator p =
-           object_extents.begin();
-         p != object_extents.end(); ++p) {
-      ldout(cct, 20) << "object " << p->first << dendl;
+      // get snap info for each object
+      for (auto& [object, extents] : object_extents) {
+        ldout(cct, 20) << "object " << object << dendl;
 
-      if (fast_diff_enabled) {
-        const uint64_t object_no = p->second.front().objectno;
+        const uint64_t object_no = extents.front().objectno;
         uint8_t diff_state = object_diff_state[object_no];
         if (diff_state == object_map::DIFF_STATE_HOLE &&
             from_snap_id == 0 && !diff_context.parent_diff.empty()) {
           // no data in child object -- report parent diff instead
-          for (auto& oe : p->second) {
+          for (auto& oe : extents) {
             for (auto& be : oe.buffer_extents) {
               interval_set<uint64_t> o;
               o.insert(off + be.first, be.second);
@@ -397,25 +299,22 @@ int DiffIterate<I>::execute() {
         } else if (diff_state == object_map::DIFF_STATE_HOLE_UPDATED ||
                    diff_state == object_map::DIFF_STATE_DATA_UPDATED) {
           bool updated = (diff_state == object_map::DIFF_STATE_DATA_UPDATED);
-          for (std::vector<ObjectExtent>::iterator q = p->second.begin();
-               q != p->second.end(); ++q) {
-            r = m_callback(off + q->offset, q->length, updated, m_callback_arg);
+          for (auto& oe : extents) {
+            r = m_callback(off + oe.offset, oe.length, updated, m_callback_arg);
             if (r < 0) {
               return r;
             }
           }
         }
-      } else {
-        C_DiffObject *diff_object = new C_DiffObject(m_image_ctx, head_ctx,
-                                                     diff_context,
-                                                     p->first.name, off,
-                                                     p->second);
-        diff_object->send();
-
-        if (diff_context.throttle.pending_error()) {
-          r = diff_context.throttle.wait_for_ret();
-          return r;
-        }
+      }
+    }  else {
+      auto diff_object = new C_DiffObject<I>(m_image_ctx, diff_context, off,
+                                             read_len);
+      diff_object->send();
+
+      if (diff_context.throttle.pending_error()) {
+        r = diff_context.throttle.wait_for_ret();
+        return r;
       }
     }