]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/osd/PGBackend.cc
update sources to 12.2.8
[ceph.git] / ceph / src / osd / PGBackend.cc
index 7c508adf396d5ddb102f6fa2b9111404f3efddc4..f77b8bfad933203c9c3d3e6520f988f90c3f069e 100644 (file)
@@ -742,9 +742,12 @@ bool PGBackend::be_compare_scrub_objects(
   return error == FOUND_ERROR;
 }
 
-static int dcount(const object_info_t &oi)
+static int dcount(const object_info_t &oi, bool prioritize)
 {
   int count = 0;
+  // Prioritize bluestore objects when osd_distrust_data_digest is set
+  if (prioritize)
+    count += 1000;
   if (oi.is_data_digest())
     count++;
   if (oi.is_omap_digest())
@@ -758,9 +761,11 @@ map<pg_shard_t, ScrubMap *>::const_iterator
   const map<pg_shard_t,ScrubMap*> &maps,
   object_info_t *auth_oi,
   map<pg_shard_t, shard_info_wrapper> &shard_map,
-  inconsistent_obj_wrapper &object_error)
+  inconsistent_obj_wrapper &object_error,
+  bool &digest_match)
 {
   eversion_t auth_version;
+  bool auth_prio = false;
   bufferlist first_oi_bl, first_ss_bl, first_hk_bl;
 
   // Create list of shards with primary first so it will be auth copy all
@@ -776,7 +781,9 @@ map<pg_shard_t, ScrubMap *>::const_iterator
   shards.push_front(get_parent()->whoami_shard());
 
   map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
+  digest_match = true;
   for (auto &l : shards) {
+    bool oi_prio = false;
     map<pg_shard_t, ScrubMap *>::const_iterator j = maps.find(l);
     map<hobject_t, ScrubMap::object>::iterator i =
       j->second->objects.find(obj);
@@ -898,16 +905,37 @@ map<pg_shard_t, ScrubMap *>::const_iterator
       error_string += " obj_size_info_mismatch";
     }
 
+    // digest_match will only be true if computed digests are the same
+    if (auth_version != eversion_t()
+        && auth->second->objects[obj].digest_present
+        && i->second.digest_present
+        && auth->second->objects[obj].digest != i->second.digest) {
+      digest_match = false;
+      dout(10) << __func__ << " digest_match = false, " << obj << " data_digest 0x" << std::hex << i->second.digest
+                   << " != data_digest 0x" << auth->second->objects[obj].digest << std::dec
+                   << dendl;
+    }
+
     // Don't use this particular shard due to previous errors
     // XXX: For now we can't pick one shard for repair and another's object info or snapset
     if (shard_info.errors)
       goto out;
 
+    // XXX: Do I want replicated only?
+    if (parent->get_pool().is_replicated() && cct->_conf->osd_distrust_data_digest) {
+      // This is a boost::optional<bool> so see if option set AND it has the value true
+      // We give priority to a replica where the ObjectStore like BlueStore has builtin checksum
+      if (j->second->has_builtin_csum && j->second->has_builtin_csum == true) {
+        oi_prio = true;
+      }
+    }
+
     if (auth_version == eversion_t() || oi.version > auth_version ||
-        (oi.version == auth_version && dcount(oi) > dcount(*auth_oi))) {
+        (oi.version == auth_version && dcount(oi, oi_prio) > dcount(*auth_oi, auth_prio))) {
       auth = j;
       *auth_oi = oi;
       auth_version = oi.version;
+      auth_prio = oi_prio;
     }
 
 out:
@@ -953,8 +981,10 @@ void PGBackend::be_compare_scrubmaps(
 
     inconsistent_obj_wrapper object_error{*k};
 
+    bool digest_match;
     map<pg_shard_t, ScrubMap *>::const_iterator auth =
-      be_select_auth_object(*k, maps, &auth_oi, shard_map, object_error);
+      be_select_auth_object(*k, maps, &auth_oi, shard_map, object_error,
+                            digest_match);
 
     list<pg_shard_t> auth_list;
     set<pg_shard_t> object_errors;
@@ -975,6 +1005,7 @@ void PGBackend::be_compare_scrubmaps(
     ScrubMap::object& auth_object = auth->second->objects[*k];
     set<pg_shard_t> cur_missing;
     set<pg_shard_t> cur_inconsistent;
+    bool fix_digest = false;
 
     for (auto  j = maps.cbegin(); j != maps.cend(); ++j) {
       if (j == auth)
@@ -990,6 +1021,30 @@ void PGBackend::be_compare_scrubmaps(
                                   shard_map[j->first],
                                   object_error,
                                   ss);
+
+       dout(20) << __func__ << (repair ? " repair " : " ") << (parent->get_pool().is_replicated() ? "replicated " : "")
+         << (j == auth ? "auth " : "") << "shards " << shard_map.size() << (digest_match ? " digest_match " : " ")
+         << (shard_map[j->first].has_data_digest_mismatch_info() ? "info_mismatch " : "")
+         << (shard_map[j->first].only_data_digest_mismatch_info() ? "only" : "")
+         << dendl;
+
+        if (cct->_conf->osd_distrust_data_digest) {
+         if (digest_match && parent->get_pool().is_replicated()
+              && shard_map[j->first].has_data_digest_mismatch_info()) {
+           fix_digest = true;
+         }
+         shard_map[j->first].clear_data_digest_mismatch_info();
+       // If all replicas match, but they don't match object_info we can
+       // repair it by using missing_digest mechanism
+       } else if (repair && parent->get_pool().is_replicated() && j == auth && shard_map.size() > 1
+           && digest_match && shard_map[j->first].only_data_digest_mismatch_info()
+           && auth_object.digest_present) {
+         // Set in missing_digests
+         fix_digest = true;
+         // Clear the error
+         shard_map[j->first].clear_data_digest_mismatch_info();
+         errorstream << pgid << " : soid " << *k << " repairing object info data_digest" << "\n";
+       }
        // Some errors might have already been set in be_select_auth_object()
        if (shard_map[j->first].errors != 0) {
          cur_inconsistent.insert(j->first);
@@ -1002,11 +1057,12 @@ void PGBackend::be_compare_scrubmaps(
          if (found)
            errorstream << pgid << " shard " << j->first << ": soid " << *k
                      << " " << ss.str() << "\n";
-       } else if (found) {
+       } else if (object_error.errors != 0) {
          // Track possible shard to use as authoritative, if needed
          // There are errors, without identifying the shard
          object_errors.insert(j->first);
-         errorstream << pgid << " : soid " << *k << " " << ss.str() << "\n";
+         if (found)
+           errorstream << pgid << " : soid " << *k << " " << ss.str() << "\n";
        } else {
          // XXX: The auth shard might get here that we don't know
          // that it has the "correct" data.
@@ -1050,9 +1106,22 @@ void PGBackend::be_compare_scrubmaps(
     if (!cur_inconsistent.empty()) {
       inconsistent[*k] = cur_inconsistent;
     }
+
+    if (fix_digest) {
+      boost::optional<uint32_t> data_digest, omap_digest;
+      assert(auth_object.digest_present);
+      data_digest = auth_object.digest;
+      if (auth_object.omap_digest_present) {
+        omap_digest = auth_object.omap_digest;
+      }
+      missing_digest[*k] = make_pair(data_digest, omap_digest);
+    }
+    // Special handling of this particular type of inconsistency
+    // This can over-ride a data_digest or set an omap_digest
+    // when all replicas match but the object info is wrong.
     if (!cur_inconsistent.empty() || !cur_missing.empty()) {
       authoritative[*k] = auth_list;
-    } else if (parent->get_pool().is_replicated()) {
+    } else if (!fix_digest && parent->get_pool().is_replicated()) {
       enum {
        NO = 0,
        MAYBE = 1,
@@ -1071,7 +1140,8 @@ void PGBackend::be_compare_scrubmaps(
       // recorded digest != actual digest?
       if (auth_oi.is_data_digest() && auth_object.digest_present &&
          auth_oi.data_digest != auth_object.digest) {
-        assert(shard_map[auth->first].has_data_digest_mismatch_info());
+       assert(cct->_conf->osd_distrust_data_digest
+              || shard_map[auth->first].has_data_digest_mismatch_info());
        errorstream << pgid << " recorded data digest 0x"
                    << std::hex << auth_oi.data_digest << " != on disk 0x"
                    << auth_object.digest << std::dec << " on " << auth_oi.soid