]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/osd/ReplicatedBackend.cc
update sources to 12.2.7
[ceph.git] / ceph / src / osd / ReplicatedBackend.cc
index 081204a033fd91f4885f18d57b5ead56129a7d20..602a0f08f76b1338de66995acd2ce87a971612f0 100644 (file)
@@ -703,103 +703,140 @@ void ReplicatedBackend::do_repop_reply(OpRequestRef op)
   }
 }
 
-void ReplicatedBackend::be_deep_scrub(
+int ReplicatedBackend::be_deep_scrub(
   const hobject_t &poid,
-  uint32_t seed,
-  ScrubMap::object &o,
-  ThreadPool::TPHandle &handle)
+  ScrubMap &map,
+  ScrubMapBuilder &pos,
+  ScrubMap::object &o)
 {
-  dout(10) << __func__ << " " << poid << " seed " 
-          << std::hex << seed << std::dec << dendl;
-  bufferhash h(seed), oh(seed);
-  bufferlist bl, hdrbl;
+  dout(10) << __func__ << " " << poid << " pos " << pos << dendl;
   int r;
-  __u64 pos = 0;
+  uint32_t fadvise_flags = CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
+                           CEPH_OSD_OP_FLAG_FADVISE_DONTNEED;
 
-  uint32_t fadvise_flags = CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL | CEPH_OSD_OP_FLAG_FADVISE_DONTNEED;
-
-  while (true) {
-    handle.reset_tp_timeout();
-    r = store->read(
-         ch,
-         ghobject_t(
-           poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
-         pos,
-         cct->_conf->osd_deep_scrub_stride, bl,
-         fadvise_flags);
-    if (r <= 0)
-      break;
-
-    h << bl;
-    pos += bl.length();
-    bl.clear();
+  utime_t sleeptime;
+  sleeptime.set_from_double(cct->_conf->osd_debug_deep_scrub_sleep);
+  if (sleeptime != utime_t()) {
+    lgeneric_derr(cct) << __func__ << " sleeping for " << sleeptime << dendl;
+    sleeptime.sleep();
   }
-  if (r == -EIO) {
-    dout(25) << __func__ << "  " << poid << " got "
-            << r << " on read, read_error" << dendl;
-    o.read_error = true;
-    return;
-  }
-  o.digest = h.digest();
-  o.digest_present = true;
 
-  bl.clear();
-  r = store->omap_get_header(
-    coll,
-    ghobject_t(
-      poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
-    &hdrbl, true);
-  // NOTE: bobtail to giant, we would crc the head as (len, head).
-  // that changes at the same time we start using a non-zero seed.
-  if (r == 0 && hdrbl.length()) {
-    dout(25) << "CRC header " << string(hdrbl.c_str(), hdrbl.length())
-             << dendl;
-    if (seed == 0) {
-      // legacy
-      bufferlist bl;
-      ::encode(hdrbl, bl);
-      oh << bl;
-    } else {
-      oh << hdrbl;
+  assert(poid == pos.ls[pos.pos]);
+  if (!pos.data_done()) {
+    if (pos.data_pos == 0) {
+      pos.data_hash = bufferhash(-1);
+    }
+
+    bufferlist bl;
+    r = store->read(
+      ch,
+      ghobject_t(
+       poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
+      pos.data_pos,
+      cct->_conf->osd_deep_scrub_stride, bl,
+      fadvise_flags);
+    if (r < 0) {
+      dout(20) << __func__ << "  " << poid << " got "
+              << r << " on read, read_error" << dendl;
+      o.read_error = true;
+      return 0;
+    }
+    if (r > 0) {
+      pos.data_hash << bl;
+    }
+    pos.data_pos += r;
+    if (r == cct->_conf->osd_deep_scrub_stride) {
+      dout(20) << __func__ << "  " << poid << " more data, digest so far 0x"
+              << std::hex << pos.data_hash.digest() << std::dec << dendl;
+      return -EINPROGRESS;
+    }
+    // done with bytes
+    pos.data_pos = -1;
+    o.digest = pos.data_hash.digest();
+    o.digest_present = true;
+    dout(20) << __func__ << "  " << poid << " done with data, digest 0x"
+            << std::hex << o.digest << std::dec << dendl;
+  }
+
+  // omap header
+  if (pos.omap_pos.empty()) {
+    pos.omap_hash = bufferhash(-1);
+
+    bufferlist hdrbl;
+    r = store->omap_get_header(
+      coll,
+      ghobject_t(
+       poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
+      &hdrbl, true);
+    if (r == -EIO) {
+      dout(20) << __func__ << "  " << poid << " got "
+              << r << " on omap header read, read_error" << dendl;
+      o.read_error = true;
+      return 0;
+    }
+    if (r == 0 && hdrbl.length()) {
+      dout(25) << "CRC header " << string(hdrbl.c_str(), hdrbl.length())
+              << dendl;
+      pos.omap_hash << hdrbl;
     }
-  } else if (r == -EIO) {
-    dout(25) << __func__ << "  " << poid << " got "
-            << r << " on omap header read, read_error" << dendl;
-    o.read_error = true;
-    return;
   }
 
+  // omap
   ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(
     coll,
     ghobject_t(
       poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
   assert(iter);
-  for (iter->seek_to_first(); iter->status() == 0 && iter->valid();
-    iter->next(false)) {
-    handle.reset_tp_timeout();
-
-    dout(25) << "CRC key " << iter->key() << " value:\n";
-    iter->value().hexdump(*_dout);
-    *_dout << dendl;
-
+  if (pos.omap_pos.length()) {
+    iter->lower_bound(pos.omap_pos);
+  } else {
+    iter->seek_to_first();
+  }
+  int max = g_conf->osd_deep_scrub_keys;
+  while (iter->status() == 0 && iter->valid()) {
+    pos.omap_bytes += iter->value().length();
+    ++pos.omap_keys;
+    --max;
+    // fixme: we can do this more efficiently.
+    bufferlist bl;
     ::encode(iter->key(), bl);
     ::encode(iter->value(), bl);
-    oh << bl;
-    bl.clear();
+    pos.omap_hash << bl;
+
+    iter->next();
+
+    if (iter->valid() && max == 0) {
+      pos.omap_pos = iter->key();
+      return -EINPROGRESS;
+    }
+    if (iter->status() < 0) {
+      dout(25) << __func__ << "  " << poid
+              << " on omap scan, db status error" << dendl;
+      o.read_error = true;
+      return 0;
+    }
   }
 
-  if (iter->status() < 0) {
-    dout(25) << __func__ << "  " << poid
-             << " on omap scan, db status error" << dendl;
-    o.read_error = true;
-    return;
+  if (pos.omap_keys > cct->_conf->
+       osd_deep_scrub_large_omap_object_key_threshold ||
+      pos.omap_bytes > cct->_conf->
+       osd_deep_scrub_large_omap_object_value_sum_threshold) {
+    dout(25) << __func__ << " " << poid
+            << " large omap object detected. Object has " << pos.omap_keys
+            << " keys and size " << pos.omap_bytes << " bytes" << dendl;
+    o.large_omap_object_found = true;
+    o.large_omap_object_key_count = pos.omap_keys;
+    o.large_omap_object_value_size = pos.omap_bytes;
+    map.has_large_omap_object_errors = true;
   }
 
-  //Store final calculated CRC32 of omap header & key/values
-  o.omap_digest = oh.digest();
+  o.omap_digest = pos.omap_hash.digest();
   o.omap_digest_present = true;
-  dout(20) << __func__ << "  " << poid << " omap_digest "
+  dout(20) << __func__ << " done with " << poid << " omap_digest "
           << std::hex << o.omap_digest << std::dec << dendl;
+
+  // done!
+  return 0;
 }
 
 void ReplicatedBackend::_do_push(OpRequestRef op)
@@ -1094,6 +1131,8 @@ void ReplicatedBackend::do_repop(OpRequestRef op)
   // we better not be missing this.
   assert(!parent->get_log().get_missing().is_missing(soid));
 
+  parent->maybe_preempt_replica_scrub(soid);
+
   int ackerosd = m->get_source().num();
 
   op->mark_started();