return error == FOUND_ERROR;
}
-static int dcount(const object_info_t &oi)
+static int dcount(const object_info_t &oi, bool prioritize)
{
int count = 0;
+ // Prioritize bluestore objects when osd_distrust_data_digest is set
+ if (prioritize)
+ count += 1000;
if (oi.is_data_digest())
count++;
if (oi.is_omap_digest())
const map<pg_shard_t,ScrubMap*> &maps,
object_info_t *auth_oi,
map<pg_shard_t, shard_info_wrapper> &shard_map,
- inconsistent_obj_wrapper &object_error)
+ inconsistent_obj_wrapper &object_error,
+ bool &digest_match)
{
eversion_t auth_version;
+ bool auth_prio = false;
bufferlist first_oi_bl, first_ss_bl, first_hk_bl;
// Create list of shards with primary first so it will be auth copy all
shards.push_front(get_parent()->whoami_shard());
map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
+ digest_match = true;
for (auto &l : shards) {
+ bool oi_prio = false;
map<pg_shard_t, ScrubMap *>::const_iterator j = maps.find(l);
map<hobject_t, ScrubMap::object>::iterator i =
j->second->objects.find(obj);
error_string += " obj_size_info_mismatch";
}
+ // digest_match will only be true if computed digests are the same
+ if (auth_version != eversion_t()
+ && auth->second->objects[obj].digest_present
+ && i->second.digest_present
+ && auth->second->objects[obj].digest != i->second.digest) {
+ digest_match = false;
+ dout(10) << __func__ << " digest_match = false, " << obj << " data_digest 0x" << std::hex << i->second.digest
+ << " != data_digest 0x" << auth->second->objects[obj].digest << std::dec
+ << dendl;
+ }
+
// Don't use this particular shard due to previous errors
// XXX: For now we can't pick one shard for repair and another's object info or snapset
if (shard_info.errors)
goto out;
+ // XXX: Do I want replicated only?
+ if (parent->get_pool().is_replicated() && cct->_conf->osd_distrust_data_digest) {
+ // This is a boost::optional<bool> so see if option set AND it has the value true
+ // We give priority to a replica where the ObjectStore like BlueStore has builtin checksum
+ if (j->second->has_builtin_csum && j->second->has_builtin_csum == true) {
+ oi_prio = true;
+ }
+ }
+
if (auth_version == eversion_t() || oi.version > auth_version ||
- (oi.version == auth_version && dcount(oi) > dcount(*auth_oi))) {
+ (oi.version == auth_version && dcount(oi, oi_prio) > dcount(*auth_oi, auth_prio))) {
auth = j;
*auth_oi = oi;
auth_version = oi.version;
+ auth_prio = oi_prio;
}
out:
inconsistent_obj_wrapper object_error{*k};
+ bool digest_match;
map<pg_shard_t, ScrubMap *>::const_iterator auth =
- be_select_auth_object(*k, maps, &auth_oi, shard_map, object_error);
+ be_select_auth_object(*k, maps, &auth_oi, shard_map, object_error,
+ digest_match);
list<pg_shard_t> auth_list;
set<pg_shard_t> object_errors;
ScrubMap::object& auth_object = auth->second->objects[*k];
set<pg_shard_t> cur_missing;
set<pg_shard_t> cur_inconsistent;
+ bool fix_digest = false;
for (auto j = maps.cbegin(); j != maps.cend(); ++j) {
if (j == auth)
shard_map[j->first],
object_error,
ss);
+
+ dout(20) << __func__ << (repair ? " repair " : " ") << (parent->get_pool().is_replicated() ? "replicated " : "")
+ << (j == auth ? "auth " : "") << "shards " << shard_map.size() << (digest_match ? " digest_match " : " ")
+ << (shard_map[j->first].has_data_digest_mismatch_info() ? "info_mismatch " : "")
+ << (shard_map[j->first].only_data_digest_mismatch_info() ? "only" : "")
+ << dendl;
+
+ if (cct->_conf->osd_distrust_data_digest) {
+ if (digest_match && parent->get_pool().is_replicated()
+ && shard_map[j->first].has_data_digest_mismatch_info()) {
+ fix_digest = true;
+ }
+ shard_map[j->first].clear_data_digest_mismatch_info();
+ // If all replicas match, but they don't match object_info we can
+ // repair it by using missing_digest mechanism
+ } else if (repair && parent->get_pool().is_replicated() && j == auth && shard_map.size() > 1
+ && digest_match && shard_map[j->first].only_data_digest_mismatch_info()
+ && auth_object.digest_present) {
+ // Set in missing_digests
+ fix_digest = true;
+ // Clear the error
+ shard_map[j->first].clear_data_digest_mismatch_info();
+ errorstream << pgid << " : soid " << *k << " repairing object info data_digest" << "\n";
+ }
// Some errors might have already been set in be_select_auth_object()
if (shard_map[j->first].errors != 0) {
cur_inconsistent.insert(j->first);
if (found)
errorstream << pgid << " shard " << j->first << ": soid " << *k
<< " " << ss.str() << "\n";
- } else if (found) {
+ } else if (object_error.errors != 0) {
// Track possible shard to use as authoritative, if needed
// There are errors, without identifying the shard
object_errors.insert(j->first);
- errorstream << pgid << " : soid " << *k << " " << ss.str() << "\n";
+ if (found)
+ errorstream << pgid << " : soid " << *k << " " << ss.str() << "\n";
} else {
// XXX: The auth shard might get here that we don't know
// that it has the "correct" data.
if (!cur_inconsistent.empty()) {
inconsistent[*k] = cur_inconsistent;
}
+
+ if (fix_digest) {
+ boost::optional<uint32_t> data_digest, omap_digest;
+ assert(auth_object.digest_present);
+ data_digest = auth_object.digest;
+ if (auth_object.omap_digest_present) {
+ omap_digest = auth_object.omap_digest;
+ }
+ missing_digest[*k] = make_pair(data_digest, omap_digest);
+ }
+ // Special handling of this particular type of inconsistency
+ // This can over-ride a data_digest or set an omap_digest
+ // when all replicas match but the object info is wrong.
if (!cur_inconsistent.empty() || !cur_missing.empty()) {
authoritative[*k] = auth_list;
- } else if (parent->get_pool().is_replicated()) {
+ } else if (!fix_digest && parent->get_pool().is_replicated()) {
enum {
NO = 0,
MAYBE = 1,
// recorded digest != actual digest?
if (auth_oi.is_data_digest() && auth_object.digest_present &&
auth_oi.data_digest != auth_object.digest) {
- assert(shard_map[auth->first].has_data_digest_mismatch_info());
+ assert(cct->_conf->osd_distrust_data_digest
+ || shard_map[auth->first].has_data_digest_mismatch_info());
errorstream << pgid << " recorded data digest 0x"
<< std::hex << auth_oi.data_digest << " != on disk 0x"
<< auth_object.digest << std::dec << " on " << auth_oi.soid