eversion_t auth_version;
bufferlist auth_bl;
- map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
+ // Create list of shards with primary last so it will be auth copy all
+ // other things being equal.
+ list<pg_shard_t> shards;
for (map<pg_shard_t, ScrubMap *>::const_iterator j = maps.begin();
j != maps.end();
++j) {
+ if (j->first == get_parent()->whoami_shard())
+ continue;
+ shards.push_back(j->first);
+ }
+ shards.push_back(get_parent()->whoami_shard());
+
+ map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
+ for (auto &l : shards) {
+ map<pg_shard_t, ScrubMap *>::const_iterator j = maps.find(l);
map<hobject_t, ScrubMap::object>::iterator i =
j->second->objects.find(obj);
if (i == j->second->objects.end()) {
object_info_t oi;
bufferlist bl;
map<string, bufferptr>::iterator k;
+ SnapSet ss;
+ bufferlist ss_bl;
if (i->second.stat_error) {
shard_info.set_stat_error();
goto out;
}
+ if (oi.soid != obj) {
+ shard_info.set_oi_attr_corrupted();
+ error_string += " oi_attr_corrupted";
+ goto out;
+ }
+
if (auth_version != eversion_t()) {
if (!object_error.has_object_info_inconsistency() && !(bl == auth_bl)) {
object_error.set_object_info_inconsistency();
if (i->second.read_error || i->second.ec_hash_mismatch || i->second.ec_size_mismatch)
goto out;
+ // We don't set errors here for snapset, but we won't pick an auth copy if the
+ // snapset is missing or won't decode.
+ if (obj.is_head() || obj.is_snapdir()) {
+ k = i->second.attrs.find(SS_ATTR);
+ if (k == i->second.attrs.end()) {
+ goto out;
+ }
+ ss_bl.push_back(k->second);
+ try {
+ bufferlist::iterator bliter = ss_bl.begin();
+ ::decode(ss, bliter);
+ } catch (...) {
+ // invalid snapset, probably corrupt
+ goto out;
+ }
+ }
+
if (auth_version == eversion_t() || oi.version > auth_version ||
(oi.version == auth_version && dcount(oi) > dcount(*auth_oi))) {
auth = j;
be_select_auth_object(*k, maps, &auth_oi, shard_map, object_error);
list<pg_shard_t> auth_list;
+ set<pg_shard_t> object_errors;
if (auth == maps.end()) {
object_error.set_version(0);
object_error.set_auth_missing(*k, maps, shard_map, shallow_errors, deep_errors);
if (found)
errorstream << pgid << " shard " << j->first << ": soid " << *k
<< " " << ss.str() << "\n";
+ } else if (found) {
+ // Track possible shard to use as authoritative, if needed
+ // There are errors, without identifying the shard
+ object_errors.insert(j->first);
} else {
// XXX: The auth shard might get here that we don't know
// that it has the "correct" data.
}
if (auth_list.empty()) {
- errorstream << pgid.pgid << " soid " << *k
+ if (object_errors.empty()) {
+ errorstream << pgid.pgid << " soid " << *k
<< ": failed to pick suitable auth object\n";
- goto out;
+ goto out;
+ }
+ // Object errors exist and nothing in auth_list
+ // Prefer the auth shard otherwise take first from list.
+ pg_shard_t shard;
+ if (object_errors.count(auth->first)) {
+ shard = auth->first;
+ } else {
+ shard = *(object_errors.begin());
+ }
+ auth_list.push_back(shard);
+ object_errors.erase(shard);
}
+ // At this point auth_list is populated, so we add the object errors shards
+ // as inconsistent.
+ cur_inconsistent.insert(object_errors.begin(), object_errors.end());
if (!cur_missing.empty()) {
missing[*k] = cur_missing;
}