dout(10) << __func__ << ": canceling recovery op for obj " << hoid
<< dendl;
assert(recovery_ops.count(hoid));
+ eversion_t v = recovery_ops[hoid].v;
recovery_ops.erase(hoid);
list<pg_shard_t> fl;
fl.push_back(i.first);
}
get_parent()->failed_push(fl, hoid);
+ get_parent()->backfill_add_missing(hoid, v);
+ get_parent()->finish_degraded_object(hoid);
}
struct OnRecoveryReadComplete :
op.soid,
op.recovery_info,
recovery_ops[op.soid].obc,
+ false,
&m->t);
} else {
get_parent()->on_local_recover(
op.soid,
op.recovery_info,
ObjectContextRef(),
+ false,
&m->t);
}
}
stat.num_bytes_recovered = op.recovery_info.size;
stat.num_keys_recovered = 0; // ??? op ... omap_entries.size(); ?
stat.num_objects_recovered = 1;
- get_parent()->on_global_recover(op.hoid, stat);
+ get_parent()->on_global_recover(op.hoid, stat, false);
dout(10) << __func__ << ": WRITING return " << op << dendl;
recovery_ops.erase(op.hoid);
return;
RecoveryOp &op = recovery_ops.insert(make_pair(i->hoid, *i)).first->second;
continue_recovery_op(op, &m);
}
+
dispatch_recovery_messages(m, priority);
+ send_recovery_deletes(priority, h->deletes);
delete _h;
}
-void ECBackend::recover_object(
+int ECBackend::recover_object(
const hobject_t &hoid,
eversion_t v,
ObjectContextRef head,
}
}
dout(10) << __func__ << ": built op " << h->ops.back() << dendl;
+ return 0;
}
bool ECBackend::can_handle_while_inactive(
return false;
}
-bool ECBackend::handle_message(
+bool ECBackend::_handle_message(
OpRequestRef _op)
{
dout(10) << __func__ << ": " << *_op->get_req() << dendl;
hinfo = get_hash_info(i->first);
if (!hinfo) {
r = -EIO;
- get_parent()->clog_error() << __func__ << ": No hinfo for " << i->first;
+ get_parent()->clog_error() << "Corruption detected: object " << i->first
+ << " is missing hash_info";
dout(5) << __func__ << ": No hinfo for " << i->first << dendl;
goto error;
}
ghobject_t(i->first, ghobject_t::NO_GEN, shard),
j->get<0>(),
j->get<1>(),
- bl, j->get<2>(),
- true); // Allow EIO return
+ bl, j->get<2>());
if (r < 0) {
- get_parent()->clog_error() << __func__
- << ": Error " << r
- << " reading "
+ get_parent()->clog_error() << "Error " << r
+ << " reading object "
<< i->first;
dout(5) << __func__ << ": Error " << r
<< " reading " << i->first << dendl;
bufferhash h(-1);
h << bl;
if (h.digest() != hinfo->get_chunk_hash(shard)) {
- get_parent()->clog_error() << __func__ << ": Bad hash for " << i->first << " digest 0x"
+ get_parent()->clog_error() << "Bad hash for " << i->first << " digest 0x"
<< hex << h.digest() << " expected 0x" << hinfo->get_chunk_hash(shard) << dec;
dout(5) << __func__ << ": Bad hash for " << i->first << " digest 0x"
<< hex << h.digest() << " expected 0x" << hinfo->get_chunk_hash(shard) << dec << dendl;
unsigned is_complete = 0;
// For redundant reads check for completion as each shard comes in,
// or in a non-recovery read check for completion once all the shards read.
- // TODO: It would be nice if recovery could send more reads too
- if (rop.do_redundant_reads || (!rop.for_recovery && rop.in_progress.empty())) {
+ if (rop.do_redundant_reads || rop.in_progress.empty()) {
for (map<hobject_t, read_result_t>::const_iterator iter =
rop.complete.begin();
iter != rop.complete.end();
}
// Couldn't read any additional shards so handle as completed with errors
}
- if (rop.complete[iter->first].errors.empty()) {
- dout(20) << __func__ << " simply not enough copies err=" << err << dendl;
- } else {
- // Grab the first error
- err = rop.complete[iter->first].errors.begin()->second;
- dout(20) << __func__ << ": Use one of the shard errors err=" << err << dendl;
- }
+ // We don't want to confuse clients / RBD with objectstore error
+ // values in particular ENOENT. We may have different error returns
+ // from different shards, so we'll return minimum_to_decode() error
+ // (usually EIO) to reader. It is likely an error here is due to a
+ // damaged pg.
rop.complete[iter->first].r = err;
++is_complete;
}
err = rop.complete[iter->first].errors.begin()->second;
rop.complete[iter->first].r = err;
} else {
- get_parent()->clog_error() << __func__ << ": Error(s) ignored for "
+ get_parent()->clog_warn() << "Error(s) ignored for "
<< iter->first << " enough copies available";
dout(10) << __func__ << " Error(s) ignored for " << iter->first
<< " enough copies available" << dendl;
}
}
-int ECBackend::get_min_avail_to_read_shards(
+void ECBackend::get_all_avail_shards(
const hobject_t &hoid,
- const set<int> &want,
- bool for_recovery,
- bool do_redundant_reads,
- set<pg_shard_t> *to_read)
+ set<int> &have,
+ map<shard_id_t, pg_shard_t> &shards,
+ bool for_recovery)
{
- // Make sure we don't do redundant reads for recovery
- assert(!for_recovery || !do_redundant_reads);
-
- set<int> have;
- map<shard_id_t, pg_shard_t> shards;
-
for (set<pg_shard_t>::const_iterator i =
get_parent()->get_acting_shards().begin();
i != get_parent()->get_acting_shards().end();
}
}
}
+}
+
+int ECBackend::get_min_avail_to_read_shards(
+ const hobject_t &hoid,
+ const set<int> &want,
+ bool for_recovery,
+ bool do_redundant_reads,
+ set<pg_shard_t> *to_read)
+{
+ // Make sure we don't do redundant reads for recovery
+ assert(!for_recovery || !do_redundant_reads);
+
+ set<int> have;
+ map<shard_id_t, pg_shard_t> shards;
+
+ get_all_avail_shards(hoid, have, shards, for_recovery);
set<int> need;
int r = ec_impl->minimum_to_decode(want, have, &need);
int ECBackend::get_remaining_shards(
const hobject_t &hoid,
const set<int> &avail,
- set<pg_shard_t> *to_read)
+ set<pg_shard_t> *to_read,
+ bool for_recovery)
{
- set<int> need;
- map<shard_id_t, pg_shard_t> shards;
+ assert(to_read);
- for (set<pg_shard_t>::const_iterator i =
- get_parent()->get_acting_shards().begin();
- i != get_parent()->get_acting_shards().end();
- ++i) {
- dout(10) << __func__ << ": checking acting " << *i << dendl;
- const pg_missing_t &missing = get_parent()->get_shard_missing(*i);
- if (!missing.is_missing(hoid)) {
- assert(!need.count(i->shard));
- need.insert(i->shard);
- assert(!shards.count(i->shard));
- shards.insert(make_pair(i->shard, *i));
- }
- }
+ set<int> have;
+ map<shard_id_t, pg_shard_t> shards;
- if (!to_read)
- return 0;
+ get_all_avail_shards(hoid, have, shards, for_recovery);
- for (set<int>::iterator i = need.begin();
- i != need.end();
+ for (set<int>::iterator i = have.begin();
+ i != have.end();
++i) {
assert(shards.count(shard_id_t(*i)));
if (avail.find(*i) == avail.end())
}
if (bl.length() > 0) {
bufferlist::iterator bp = bl.begin();
- ::decode(hinfo, bp);
+ try {
+ ::decode(hinfo, bp);
+ } catch(...) {
+ dout(0) << __func__ << ": Can't decode hinfo for " << hoid << dendl;
+ return ECUtil::HashInfoRef();
+ }
if (checks && hinfo.get_total_chunk_size() != (uint64_t)st.st_size) {
dout(0) << __func__ << ": Mismatch of total_chunk_size "
<< hinfo.get_total_chunk_size() << dendl;
already_read.insert(i->shard);
dout(10) << __func__ << " have/error shards=" << already_read << dendl;
set<pg_shard_t> shards;
- int r = get_remaining_shards(hoid, already_read, &shards);
+ int r = get_remaining_shards(hoid, already_read, &shards, rop.for_recovery);
if (r)
return r;
if (shards.empty())
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
pos,
stride, bl,
- fadvise_flags, true);
+ fadvise_flags);
if (r < 0)
break;
if (bl.length() % sinfo.get_chunk_size()) {