X-Git-Url: https://git.proxmox.com/?p=ceph.git;a=blobdiff_plain;f=ceph%2Fsrc%2Fosd%2FReplicatedBackend.cc;h=602a0f08f76b1338de66995acd2ce87a971612f0;hp=081204a033fd91f4885f18d57b5ead56129a7d20;hb=28e407b858acd3bddc89f68583571f771bb42e46;hpb=dfcb7b53b2e4fcd2a5af0240d4975adc711ab96e diff --git a/ceph/src/osd/ReplicatedBackend.cc b/ceph/src/osd/ReplicatedBackend.cc index 081204a03..602a0f08f 100644 --- a/ceph/src/osd/ReplicatedBackend.cc +++ b/ceph/src/osd/ReplicatedBackend.cc @@ -703,103 +703,140 @@ void ReplicatedBackend::do_repop_reply(OpRequestRef op) } } -void ReplicatedBackend::be_deep_scrub( +int ReplicatedBackend::be_deep_scrub( const hobject_t &poid, - uint32_t seed, - ScrubMap::object &o, - ThreadPool::TPHandle &handle) + ScrubMap &map, + ScrubMapBuilder &pos, + ScrubMap::object &o) { - dout(10) << __func__ << " " << poid << " seed " - << std::hex << seed << std::dec << dendl; - bufferhash h(seed), oh(seed); - bufferlist bl, hdrbl; + dout(10) << __func__ << " " << poid << " pos " << pos << dendl; int r; - __u64 pos = 0; + uint32_t fadvise_flags = CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL | + CEPH_OSD_OP_FLAG_FADVISE_DONTNEED; - uint32_t fadvise_flags = CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL | CEPH_OSD_OP_FLAG_FADVISE_DONTNEED; - - while (true) { - handle.reset_tp_timeout(); - r = store->read( - ch, - ghobject_t( - poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), - pos, - cct->_conf->osd_deep_scrub_stride, bl, - fadvise_flags); - if (r <= 0) - break; - - h << bl; - pos += bl.length(); - bl.clear(); + utime_t sleeptime; + sleeptime.set_from_double(cct->_conf->osd_debug_deep_scrub_sleep); + if (sleeptime != utime_t()) { + lgeneric_derr(cct) << __func__ << " sleeping for " << sleeptime << dendl; + sleeptime.sleep(); } - if (r == -EIO) { - dout(25) << __func__ << " " << poid << " got " - << r << " on read, read_error" << dendl; - o.read_error = true; - return; - } - o.digest = h.digest(); - o.digest_present = true; - bl.clear(); - r = store->omap_get_header( - coll, - ghobject_t( - poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), - &hdrbl, true); - // NOTE: bobtail to giant, we would crc the head as (len, head). - // that changes at the same time we start using a non-zero seed. - if (r == 0 && hdrbl.length()) { - dout(25) << "CRC header " << string(hdrbl.c_str(), hdrbl.length()) - << dendl; - if (seed == 0) { - // legacy - bufferlist bl; - ::encode(hdrbl, bl); - oh << bl; - } else { - oh << hdrbl; + assert(poid == pos.ls[pos.pos]); + if (!pos.data_done()) { + if (pos.data_pos == 0) { + pos.data_hash = bufferhash(-1); + } + + bufferlist bl; + r = store->read( + ch, + ghobject_t( + poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), + pos.data_pos, + cct->_conf->osd_deep_scrub_stride, bl, + fadvise_flags); + if (r < 0) { + dout(20) << __func__ << " " << poid << " got " + << r << " on read, read_error" << dendl; + o.read_error = true; + return 0; + } + if (r > 0) { + pos.data_hash << bl; + } + pos.data_pos += r; + if (r == cct->_conf->osd_deep_scrub_stride) { + dout(20) << __func__ << " " << poid << " more data, digest so far 0x" + << std::hex << pos.data_hash.digest() << std::dec << dendl; + return -EINPROGRESS; + } + // done with bytes + pos.data_pos = -1; + o.digest = pos.data_hash.digest(); + o.digest_present = true; + dout(20) << __func__ << " " << poid << " done with data, digest 0x" + << std::hex << o.digest << std::dec << dendl; + } + + // omap header + if (pos.omap_pos.empty()) { + pos.omap_hash = bufferhash(-1); + + bufferlist hdrbl; + r = store->omap_get_header( + coll, + ghobject_t( + poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), + &hdrbl, true); + if (r == -EIO) { + dout(20) << __func__ << " " << poid << " got " + << r << " on omap header read, read_error" << dendl; + o.read_error = true; + return 0; + } + if (r == 0 && hdrbl.length()) { + dout(25) << "CRC header " << string(hdrbl.c_str(), hdrbl.length()) + << dendl; + pos.omap_hash << hdrbl; } - } else if (r == -EIO) { - dout(25) << __func__ << " " << poid << " got " - << r << " on omap header read, read_error" << dendl; - o.read_error = true; - return; } + // omap ObjectMap::ObjectMapIterator iter = store->get_omap_iterator( coll, ghobject_t( poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard)); assert(iter); - for (iter->seek_to_first(); iter->status() == 0 && iter->valid(); - iter->next(false)) { - handle.reset_tp_timeout(); - - dout(25) << "CRC key " << iter->key() << " value:\n"; - iter->value().hexdump(*_dout); - *_dout << dendl; - + if (pos.omap_pos.length()) { + iter->lower_bound(pos.omap_pos); + } else { + iter->seek_to_first(); + } + int max = g_conf->osd_deep_scrub_keys; + while (iter->status() == 0 && iter->valid()) { + pos.omap_bytes += iter->value().length(); + ++pos.omap_keys; + --max; + // fixme: we can do this more efficiently. + bufferlist bl; ::encode(iter->key(), bl); ::encode(iter->value(), bl); - oh << bl; - bl.clear(); + pos.omap_hash << bl; + + iter->next(); + + if (iter->valid() && max == 0) { + pos.omap_pos = iter->key(); + return -EINPROGRESS; + } + if (iter->status() < 0) { + dout(25) << __func__ << " " << poid + << " on omap scan, db status error" << dendl; + o.read_error = true; + return 0; + } } - if (iter->status() < 0) { - dout(25) << __func__ << " " << poid - << " on omap scan, db status error" << dendl; - o.read_error = true; - return; + if (pos.omap_keys > cct->_conf-> + osd_deep_scrub_large_omap_object_key_threshold || + pos.omap_bytes > cct->_conf-> + osd_deep_scrub_large_omap_object_value_sum_threshold) { + dout(25) << __func__ << " " << poid + << " large omap object detected. Object has " << pos.omap_keys + << " keys and size " << pos.omap_bytes << " bytes" << dendl; + o.large_omap_object_found = true; + o.large_omap_object_key_count = pos.omap_keys; + o.large_omap_object_value_size = pos.omap_bytes; + map.has_large_omap_object_errors = true; } - //Store final calculated CRC32 of omap header & key/values - o.omap_digest = oh.digest(); + o.omap_digest = pos.omap_hash.digest(); o.omap_digest_present = true; - dout(20) << __func__ << " " << poid << " omap_digest " + dout(20) << __func__ << " done with " << poid << " omap_digest " << std::hex << o.omap_digest << std::dec << dendl; + + // done! + return 0; } void ReplicatedBackend::_do_push(OpRequestRef op) @@ -1094,6 +1131,8 @@ void ReplicatedBackend::do_repop(OpRequestRef op) // we better not be missing this. assert(!parent->get_log().get_missing().is_missing(soid)); + parent->maybe_preempt_replica_scrub(soid); + int ackerosd = m->get_source().num(); op->mark_started();