X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=ceph%2Fsrc%2Fosd%2FPG.cc;h=05ad63c69c121e55205399aeb2a9a65721dd5af9;hb=224ce89bce8186937e77bdbda572a650953f8c23;hp=e753310534bd86bac4155092a9de1d1f30451655;hpb=06dae762bfef1ba116dc58483c7b68b733d853e0;p=ceph.git diff --git a/ceph/src/osd/PG.cc b/ceph/src/osd/PG.cc index e75331053..05ad63c69 100644 --- a/ceph/src/osd/PG.cc +++ b/ceph/src/osd/PG.cc @@ -994,7 +994,7 @@ void PG::clear_primary_state() PG::Scrubber::Scrubber() : reserved(false), reserve_failed(false), epoch_start(0), - active(false), queue_snap_trim(false), + active(false), waiting_on(0), shallow_errors(0), deep_errors(0), fixed(0), must_scrub(false), must_deep_scrub(false), must_repair(false), auto_repair(false), @@ -1677,7 +1677,7 @@ void PG::activate(ObjectStore::Transaction& t, * behind. */ // backfill - osd->clog->info() << info.pgid << " starting backfill to osd." << peer + osd->clog->debug() << info.pgid << " starting backfill to osd." << peer << " from (" << pi.log_tail << "," << pi.last_update << "] " << pi.last_backfill << " to " << info.last_update; @@ -2003,27 +2003,16 @@ struct C_PG_FinishRecovery : public Context { void PG::mark_clean() { - // only mark CLEAN if we have the desired number of replicas AND we - // are not remapped. - if (actingset.size() == get_osdmap()->get_pg_size(info.pgid.pgid) && - up == acting) + if (actingset.size() == get_osdmap()->get_pg_size(info.pgid.pgid)) { state_set(PG_STATE_CLEAN); - - // NOTE: this is actually a bit premature: we haven't purged the - // strays yet. - info.history.last_epoch_clean = get_osdmap()->get_epoch(); - info.history.last_interval_clean = info.history.same_interval_since; - - past_intervals.clear(); - dirty_big_info = true; - - if (is_active()) { - /* The check is needed because if we are below min_size we're not - * actually active */ - kick_snap_trim(); + info.history.last_epoch_clean = get_osdmap()->get_epoch(); + info.history.last_interval_clean = info.history.same_interval_since; + past_intervals.clear(); + dirty_big_info = true; + dirty_info = true; } - dirty_info = true; + kick_snap_trim(); } unsigned PG::get_recovery_priority() @@ -4034,6 +4023,52 @@ void PG::_scan_snaps(ScrubMap &smap) } } +void PG::_repair_oinfo_oid(ScrubMap &smap) +{ + for (map::reverse_iterator i = smap.objects.rbegin(); + i != smap.objects.rend(); + ++i) { + const hobject_t &hoid = i->first; + ScrubMap::object &o = i->second; + + bufferlist bl; + if (o.attrs.find(OI_ATTR) == o.attrs.end()) { + continue; + } + bl.push_back(o.attrs[OI_ATTR]); + object_info_t oi; + try { + oi.decode(bl); + } catch(...) { + continue; + } + if (oi.soid != hoid) { + ObjectStore::Transaction t; + OSDriver::OSTransaction _t(osdriver.get_transaction(&t)); + osd->clog->error() << "osd." << osd->whoami + << " found object info error on pg " + << info.pgid + << " oid " << hoid << " oid in object info: " + << oi.soid + << "...repaired"; + // Fix object info + oi.soid = hoid; + bl.clear(); + ::encode(oi, bl, get_osdmap()->get_features(CEPH_ENTITY_TYPE_OSD, nullptr)); + + bufferptr bp(bl.c_str(), bl.length()); + o.attrs[OI_ATTR] = bp; + + t.setattr(coll, ghobject_t(hoid), OI_ATTR, bl); + int r = osd->store->apply_transaction(osr.get(), std::move(t)); + if (r != 0) { + derr << __func__ << ": apply_transaction got " << cpp_strerror(r) + << dendl; + } + } + } +} + /* * build a scrub map over a chunk without releasing the lock * only used by chunky scrub @@ -4066,6 +4101,7 @@ int PG::build_scrub_map_chunk( get_pgbackend()->be_scan_list(map, ls, deep, seed, handle); _scan_rollback_obs(rollback_obs, handle); _scan_snaps(map); + _repair_oinfo_oid(map); dout(20) << __func__ << " done" << dendl; return 0; @@ -4100,7 +4136,14 @@ void PG::repair_object( eversion_t v; bufferlist bv; bv.push_back(po.attrs[OI_ATTR]); - object_info_t oi(bv); + object_info_t oi; + try { + bufferlist::iterator bliter = bv.begin(); + ::decode(oi, bliter); + } catch (...) { + dout(0) << __func__ << ": Need version of replica, bad object_info_t: " << soid << dendl; + assert(0); + } if (bad_peer != primary) { peer_missing[bad_peer].add(soid, oi.version, eversion_t()); } else { @@ -4611,6 +4654,11 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle) scrubber.state = PG::Scrubber::INACTIVE; done = true; + if (!snap_trimq.empty()) { + dout(10) << "scrub finished, requeuing snap_trimmer" << dendl; + snap_trimmer_scrub_complete(); + } + break; default: @@ -4635,11 +4683,6 @@ void PG::scrub_clear_state() requeue_ops(waiting_for_scrub); - if (scrubber.queue_snap_trim) { - dout(10) << "scrub finished, requeuing snap_trimmer" << dendl; - snap_trimmer_scrub_complete(); - } - scrubber.reset(); // type-specific state clear @@ -6266,6 +6309,37 @@ PG::RecoveryState::Backfilling::Backfilling(my_context ctx) pg->publish_stats_to_osd(); } +boost::statechart::result +PG::RecoveryState::Backfilling::react(const CancelBackfill &) +{ + PG *pg = context< RecoveryMachine >().pg; + pg->osd->local_reserver.cancel_reservation(pg->info.pgid); + // XXX: Add a new pg state so user can see why backfill isn't proceeding + // Can't use PG_STATE_BACKFILL_WAIT since it means waiting for reservations + //pg->state_set(PG_STATE_BACKFILL_STALLED????); + + for (set::iterator it = pg->backfill_targets.begin(); + it != pg->backfill_targets.end(); + ++it) { + assert(*it != pg->pg_whoami); + ConnectionRef con = pg->osd->get_con_osd_cluster( + it->osd, pg->get_osdmap()->get_epoch()); + if (con) { + pg->osd->send_message_osd_cluster( + new MBackfillReserve( + MBackfillReserve::REJECT, + spg_t(pg->info.pgid.pgid, it->shard), + pg->get_osdmap()->get_epoch()), + con.get()); + } + } + + pg->waiting_on_backfill.clear(); + + pg->schedule_backfill_full_retry(); + return transit(); +} + boost::statechart::result PG::RecoveryState::Backfilling::react(const RemoteReservationRejected &) { @@ -6736,10 +6810,10 @@ PG::RecoveryState::Recovering::Recovering(my_context ctx) pg->queue_recovery(); } -void PG::RecoveryState::Recovering::release_reservations() +void PG::RecoveryState::Recovering::release_reservations(bool cancel) { PG *pg = context< RecoveryMachine >().pg; - assert(!pg->pg_log.get_missing().have_missing()); + assert(cancel || !pg->pg_log.get_missing().have_missing()); // release remote reservations for (set::const_iterator i = @@ -6779,6 +6853,17 @@ PG::RecoveryState::Recovering::react(const RequestBackfill &evt) return transit(); } +boost::statechart::result +PG::RecoveryState::Recovering::react(const CancelRecovery &evt) +{ + PG *pg = context< RecoveryMachine >().pg; + pg->state_clear(PG_STATE_RECOVERING); + pg->osd->local_reserver.cancel_reservation(pg->info.pgid); + release_reservations(true); + pg->schedule_recovery_full_retry(); + return transit(); +} + void PG::RecoveryState::Recovering::exit() { context< RecoveryMachine >().log_exit(state_name, enter_time); @@ -6842,11 +6927,14 @@ PG::RecoveryState::Clean::Clean(my_context ctx) ceph_abort(); } pg->finish_recovery(*context< RecoveryMachine >().get_on_safe_context_list()); - pg->mark_clean(); + + if (pg->is_active()) { + pg->mark_clean(); + } pg->share_pg_info(); pg->publish_stats_to_osd(); - + pg->requeue_ops(pg->waiting_for_clean_to_primary_repair); } void PG::RecoveryState::Clean::exit()