]> git.proxmox.com Git - ceph.git/blame - ceph/src/osd/PGBackend.cc
Import ceph 15.2.8
[ceph.git] / ceph / src / osd / PGBackend.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013,2014 Inktank Storage, Inc.
7 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License version 2.1, as published by the Free Software
14 * Foundation. See file COPYING.
15 *
16 */
17
18
19#include "common/errno.h"
20#include "common/scrub_types.h"
21#include "ReplicatedBackend.h"
22#include "ScrubStore.h"
23#include "ECBackend.h"
24#include "PGBackend.h"
25#include "OSD.h"
26#include "erasure-code/ErasureCodePlugin.h"
27#include "OSDMap.h"
28#include "PGLog.h"
29#include "common/LogClient.h"
c07f9fc5
FG
30#include "messages/MOSDPGRecoveryDelete.h"
31#include "messages/MOSDPGRecoveryDeleteReply.h"
7c673cae
FG
32
33#define dout_context cct
34#define dout_subsys ceph_subsys_osd
35#define DOUT_PREFIX_ARGS this
36#undef dout_prefix
37#define dout_prefix _prefix(_dout, this)
38static ostream& _prefix(std::ostream *_dout, PGBackend *pgb) {
11fdf7f2 39 return pgb->get_parent()->gen_dbg_prefix(*_dout);
7c673cae
FG
40}
41
c07f9fc5
FG
42void PGBackend::recover_delete_object(const hobject_t &oid, eversion_t v,
43 RecoveryHandle *h)
44{
11fdf7f2
TL
45 ceph_assert(get_parent()->get_acting_recovery_backfill_shards().size() > 0);
46 for (const auto& shard : get_parent()->get_acting_recovery_backfill_shards()) {
c07f9fc5
FG
47 if (shard == get_parent()->whoami_shard())
48 continue;
49 if (get_parent()->get_shard_missing(shard).is_missing(oid)) {
50 dout(20) << __func__ << " will remove " << oid << " " << v << " from "
51 << shard << dendl;
52 h->deletes[shard].push_back(make_pair(oid, v));
53 get_parent()->begin_peer_recover(shard, oid);
54 }
55 }
56}
57
58void PGBackend::send_recovery_deletes(int prio,
59 const map<pg_shard_t, vector<pair<hobject_t, eversion_t> > > &deletes)
60{
61 epoch_t min_epoch = get_parent()->get_last_peering_reset_epoch();
62 for (const auto& p : deletes) {
63 const auto& shard = p.first;
64 const auto& objects = p.second;
65 ConnectionRef con = get_parent()->get_con_osd_cluster(
66 shard.osd,
11fdf7f2 67 get_osdmap_epoch());
c07f9fc5
FG
68 if (!con)
69 continue;
70 auto it = objects.begin();
71 while (it != objects.end()) {
72 uint64_t cost = 0;
73 uint64_t deletes = 0;
74 spg_t target_pg = spg_t(get_parent()->get_info().pgid.pgid, shard.shard);
75 MOSDPGRecoveryDelete *msg =
76 new MOSDPGRecoveryDelete(get_parent()->whoami_shard(),
77 target_pg,
11fdf7f2 78 get_osdmap_epoch(),
c07f9fc5
FG
79 min_epoch);
80 msg->set_priority(prio);
81
82 while (it != objects.end() &&
83 cost < cct->_conf->osd_max_push_cost &&
84 deletes < cct->_conf->osd_max_push_objects) {
85 dout(20) << __func__ << ": sending recovery delete << " << it->first
86 << " " << it->second << " to osd." << shard << dendl;
87 msg->objects.push_back(*it);
88 cost += cct->_conf->osd_push_per_object_cost;
89 ++deletes;
90 ++it;
91 }
92
93 msg->set_cost(cost);
94 get_parent()->send_message_osd_cluster(msg, con);
95 }
96 }
97}
98
99bool PGBackend::handle_message(OpRequestRef op)
100{
101 switch (op->get_req()->get_type()) {
102 case MSG_OSD_PG_RECOVERY_DELETE:
103 handle_recovery_delete(op);
104 return true;
105
106 case MSG_OSD_PG_RECOVERY_DELETE_REPLY:
107 handle_recovery_delete_reply(op);
108 return true;
109
110 default:
111 break;
112 }
113
114 return _handle_message(op);
115}
116
117void PGBackend::handle_recovery_delete(OpRequestRef op)
118{
9f95a23c 119 auto m = op->get_req<MOSDPGRecoveryDelete>();
11fdf7f2 120 ceph_assert(m->get_type() == MSG_OSD_PG_RECOVERY_DELETE);
c07f9fc5
FG
121 dout(20) << __func__ << " " << op << dendl;
122
123 op->mark_started();
124
125 C_GatherBuilder gather(cct);
126 for (const auto &p : m->objects) {
127 get_parent()->remove_missing_object(p.first, p.second, gather.new_sub());
128 }
129
130 MOSDPGRecoveryDeleteReply *reply = new MOSDPGRecoveryDeleteReply;
131 reply->from = get_parent()->whoami_shard();
132 reply->set_priority(m->get_priority());
133 reply->pgid = spg_t(get_parent()->get_info().pgid.pgid, m->from.shard);
134 reply->map_epoch = m->map_epoch;
135 reply->min_epoch = m->min_epoch;
136 reply->objects = m->objects;
137 ConnectionRef conn = m->get_connection();
138
9f95a23c 139 gather.set_finisher(new LambdaContext(
c07f9fc5
FG
140 [=](int r) {
141 if (r != -EAGAIN) {
142 get_parent()->send_message_osd_cluster(reply, conn.get());
b5b8bbf5
FG
143 } else {
144 reply->put();
c07f9fc5
FG
145 }
146 }));
147 gather.activate();
148}
149
150void PGBackend::handle_recovery_delete_reply(OpRequestRef op)
151{
9f95a23c 152 auto m = op->get_req<MOSDPGRecoveryDeleteReply>();
11fdf7f2 153 ceph_assert(m->get_type() == MSG_OSD_PG_RECOVERY_DELETE_REPLY);
c07f9fc5
FG
154 dout(20) << __func__ << " " << op << dendl;
155
156 for (const auto &p : m->objects) {
157 ObjectRecoveryInfo recovery_info;
158 hobject_t oid = p.first;
159 recovery_info.version = p.second;
160 get_parent()->on_peer_recover(m->from, oid, recovery_info);
161 bool peers_recovered = true;
11fdf7f2 162 for (const auto& shard : get_parent()->get_acting_recovery_backfill_shards()) {
c07f9fc5
FG
163 if (shard == get_parent()->whoami_shard())
164 continue;
165 if (get_parent()->get_shard_missing(shard).is_missing(oid)) {
166 dout(20) << __func__ << " " << oid << " still missing on at least "
167 << shard << dendl;
168 peers_recovered = false;
169 break;
170 }
171 }
172 if (peers_recovered && !get_parent()->get_local_missing().is_missing(oid)) {
173 dout(20) << __func__ << " completed recovery, local_missing = "
174 << get_parent()->get_local_missing() << dendl;
175 object_stat_sum_t stat_diff;
176 stat_diff.num_objects_recovered = 1;
177 get_parent()->on_global_recover(p.first, stat_diff, true);
178 }
179 }
180}
181
7c673cae
FG
182void PGBackend::rollback(
183 const pg_log_entry_t &entry,
184 ObjectStore::Transaction *t)
185{
186
187 struct RollbackVisitor : public ObjectModDesc::Visitor {
188 const hobject_t &hoid;
189 PGBackend *pg;
190 ObjectStore::Transaction t;
191 RollbackVisitor(
192 const hobject_t &hoid,
193 PGBackend *pg) : hoid(hoid), pg(pg) {}
194 void append(uint64_t old_size) override {
195 ObjectStore::Transaction temp;
196 pg->rollback_append(hoid, old_size, &temp);
197 temp.append(t);
198 temp.swap(t);
199 }
9f95a23c 200 void setattrs(map<string, std::optional<bufferlist> > &attrs) override {
7c673cae
FG
201 ObjectStore::Transaction temp;
202 pg->rollback_setattrs(hoid, attrs, &temp);
203 temp.append(t);
204 temp.swap(t);
205 }
206 void rmobject(version_t old_version) override {
207 ObjectStore::Transaction temp;
208 pg->rollback_stash(hoid, old_version, &temp);
209 temp.append(t);
210 temp.swap(t);
211 }
212 void try_rmobject(version_t old_version) override {
213 ObjectStore::Transaction temp;
214 pg->rollback_try_stash(hoid, old_version, &temp);
215 temp.append(t);
216 temp.swap(t);
217 }
218 void create() override {
219 ObjectStore::Transaction temp;
220 pg->rollback_create(hoid, &temp);
221 temp.append(t);
222 temp.swap(t);
223 }
224 void update_snaps(const set<snapid_t> &snaps) override {
225 ObjectStore::Transaction temp;
226 pg->get_parent()->pgb_set_object_snap_mapping(hoid, snaps, &temp);
227 temp.append(t);
228 temp.swap(t);
229 }
230 void rollback_extents(
231 version_t gen,
232 const vector<pair<uint64_t, uint64_t> > &extents) override {
233 ObjectStore::Transaction temp;
234 pg->rollback_extents(gen, extents, hoid, &temp);
235 temp.append(t);
236 temp.swap(t);
237 }
238 };
239
11fdf7f2 240 ceph_assert(entry.mod_desc.can_rollback());
7c673cae
FG
241 RollbackVisitor vis(entry.soid, this);
242 entry.mod_desc.visit(&vis);
243 t->append(vis.t);
244}
245
246struct Trimmer : public ObjectModDesc::Visitor {
247 const hobject_t &soid;
248 PGBackend *pg;
249 ObjectStore::Transaction *t;
250 Trimmer(
251 const hobject_t &soid,
252 PGBackend *pg,
253 ObjectStore::Transaction *t)
254 : soid(soid), pg(pg), t(t) {}
255 void rmobject(version_t old_version) override {
256 pg->trim_rollback_object(
257 soid,
258 old_version,
259 t);
260 }
261 // try_rmobject defaults to rmobject
262 void rollback_extents(
263 version_t gen,
264 const vector<pair<uint64_t, uint64_t> > &extents) override {
265 pg->trim_rollback_object(
266 soid,
267 gen,
268 t);
269 }
270};
271
272void PGBackend::rollforward(
273 const pg_log_entry_t &entry,
274 ObjectStore::Transaction *t)
275{
276 auto dpp = get_parent()->get_dpp();
277 ldpp_dout(dpp, 20) << __func__ << ": entry=" << entry << dendl;
278 if (!entry.can_rollback())
279 return;
280 Trimmer trimmer(entry.soid, this, t);
281 entry.mod_desc.visit(&trimmer);
282}
283
284void PGBackend::trim(
285 const pg_log_entry_t &entry,
286 ObjectStore::Transaction *t)
287{
288 if (!entry.can_rollback())
289 return;
290 Trimmer trimmer(entry.soid, this, t);
291 entry.mod_desc.visit(&trimmer);
292}
293
294void PGBackend::try_stash(
295 const hobject_t &hoid,
296 version_t v,
297 ObjectStore::Transaction *t)
298{
299 t->try_rename(
300 coll,
301 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
302 ghobject_t(hoid, v, get_parent()->whoami_shard().shard));
303}
304
305void PGBackend::remove(
306 const hobject_t &hoid,
307 ObjectStore::Transaction *t) {
11fdf7f2 308 ceph_assert(!hoid.is_temp());
7c673cae
FG
309 t->remove(
310 coll,
311 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
312 get_parent()->pgb_clear_object_snap_mapping(hoid, t);
313}
314
315void PGBackend::on_change_cleanup(ObjectStore::Transaction *t)
316{
317 dout(10) << __func__ << dendl;
318 // clear temp
319 for (set<hobject_t>::iterator i = temp_contents.begin();
320 i != temp_contents.end();
321 ++i) {
322 dout(10) << __func__ << ": Removing oid "
323 << *i << " from the temp collection" << dendl;
324 t->remove(
325 coll,
326 ghobject_t(*i, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
327 }
328 temp_contents.clear();
329}
330
331int PGBackend::objects_list_partial(
332 const hobject_t &begin,
333 int min,
334 int max,
335 vector<hobject_t> *ls,
336 hobject_t *next)
337{
11fdf7f2 338 ceph_assert(ls);
7c673cae
FG
339 // Starts with the smallest generation to make sure the result list
340 // has the marker object (it might have multiple generations
341 // though, which would be filtered).
342 ghobject_t _next;
343 if (!begin.is_min())
344 _next = ghobject_t(begin, 0, get_parent()->whoami_shard().shard);
345 ls->reserve(max);
346 int r = 0;
347
348 if (min > max)
349 min = max;
350
351 while (!_next.is_max() && ls->size() < (unsigned)min) {
352 vector<ghobject_t> objects;
f91f0fd5
TL
353 if (HAVE_FEATURE(parent->min_upacting_features(),
354 OSD_FIXED_COLLECTION_LIST)) {
355 r = store->collection_list(
356 ch,
357 _next,
358 ghobject_t::get_max(),
359 max - ls->size(),
360 &objects,
361 &_next);
362 } else {
363 r = store->collection_list_legacy(
364 ch,
365 _next,
366 ghobject_t::get_max(),
367 max - ls->size(),
368 &objects,
369 &_next);
370 }
7c673cae
FG
371 if (r != 0) {
372 derr << __func__ << " list collection " << ch << " got: " << cpp_strerror(r) << dendl;
373 break;
374 }
375 for (vector<ghobject_t>::iterator i = objects.begin();
376 i != objects.end();
377 ++i) {
378 if (i->is_pgmeta() || i->hobj.is_temp()) {
379 continue;
380 }
381 if (i->is_no_gen()) {
382 ls->push_back(i->hobj);
383 }
384 }
385 }
386 if (r == 0)
387 *next = _next.hobj;
388 return r;
389}
390
391int PGBackend::objects_list_range(
392 const hobject_t &start,
393 const hobject_t &end,
7c673cae
FG
394 vector<hobject_t> *ls,
395 vector<ghobject_t> *gen_obs)
396{
11fdf7f2 397 ceph_assert(ls);
7c673cae 398 vector<ghobject_t> objects;
f91f0fd5
TL
399 int r;
400 if (HAVE_FEATURE(parent->min_upacting_features(),
401 OSD_FIXED_COLLECTION_LIST)) {
402 r = store->collection_list(
403 ch,
404 ghobject_t(start, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
405 ghobject_t(end, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
406 INT_MAX,
407 &objects,
408 NULL);
409 } else {
410 r = store->collection_list_legacy(
411 ch,
412 ghobject_t(start, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
413 ghobject_t(end, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
414 INT_MAX,
415 &objects,
416 NULL);
417 }
7c673cae
FG
418 ls->reserve(objects.size());
419 for (vector<ghobject_t>::iterator i = objects.begin();
420 i != objects.end();
421 ++i) {
422 if (i->is_pgmeta() || i->hobj.is_temp()) {
423 continue;
424 }
425 if (i->is_no_gen()) {
426 ls->push_back(i->hobj);
427 } else if (gen_obs) {
428 gen_obs->push_back(*i);
429 }
430 }
431 return r;
432}
433
434int PGBackend::objects_get_attr(
435 const hobject_t &hoid,
436 const string &attr,
437 bufferlist *out)
438{
439 bufferptr bp;
440 int r = store->getattr(
441 ch,
442 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
443 attr.c_str(),
444 bp);
445 if (r >= 0 && out) {
446 out->clear();
447 out->push_back(std::move(bp));
448 }
449 return r;
450}
451
452int PGBackend::objects_get_attrs(
453 const hobject_t &hoid,
454 map<string, bufferlist> *out)
455{
456 return store->getattrs(
457 ch,
458 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
459 *out);
460}
461
462void PGBackend::rollback_setattrs(
463 const hobject_t &hoid,
9f95a23c 464 map<string, std::optional<bufferlist> > &old_attrs,
7c673cae
FG
465 ObjectStore::Transaction *t) {
466 map<string, bufferlist> to_set;
11fdf7f2 467 ceph_assert(!hoid.is_temp());
9f95a23c 468 for (map<string, std::optional<bufferlist> >::iterator i = old_attrs.begin();
7c673cae
FG
469 i != old_attrs.end();
470 ++i) {
471 if (i->second) {
9f95a23c 472 to_set[i->first] = *(i->second);
7c673cae
FG
473 } else {
474 t->rmattr(
475 coll,
476 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
477 i->first);
478 }
479 }
480 t->setattrs(
481 coll,
482 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
483 to_set);
484}
485
486void PGBackend::rollback_append(
487 const hobject_t &hoid,
488 uint64_t old_size,
489 ObjectStore::Transaction *t) {
11fdf7f2 490 ceph_assert(!hoid.is_temp());
7c673cae
FG
491 t->truncate(
492 coll,
493 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
494 old_size);
495}
496
497void PGBackend::rollback_stash(
498 const hobject_t &hoid,
499 version_t old_version,
500 ObjectStore::Transaction *t) {
11fdf7f2 501 ceph_assert(!hoid.is_temp());
7c673cae
FG
502 t->remove(
503 coll,
504 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
505 t->collection_move_rename(
506 coll,
507 ghobject_t(hoid, old_version, get_parent()->whoami_shard().shard),
508 coll,
509 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
510}
511
512void PGBackend::rollback_try_stash(
513 const hobject_t &hoid,
514 version_t old_version,
515 ObjectStore::Transaction *t) {
11fdf7f2 516 ceph_assert(!hoid.is_temp());
7c673cae
FG
517 t->remove(
518 coll,
519 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
520 t->try_rename(
521 coll,
522 ghobject_t(hoid, old_version, get_parent()->whoami_shard().shard),
523 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
524}
525
526void PGBackend::rollback_extents(
527 version_t gen,
528 const vector<pair<uint64_t, uint64_t> > &extents,
529 const hobject_t &hoid,
530 ObjectStore::Transaction *t) {
531 auto shard = get_parent()->whoami_shard().shard;
532 for (auto &&extent: extents) {
533 t->clone_range(
534 coll,
535 ghobject_t(hoid, gen, shard),
536 ghobject_t(hoid, ghobject_t::NO_GEN, shard),
537 extent.first,
538 extent.second,
539 extent.first);
540 }
541 t->remove(
542 coll,
543 ghobject_t(hoid, gen, shard));
544}
545
546void PGBackend::trim_rollback_object(
547 const hobject_t &hoid,
548 version_t old_version,
549 ObjectStore::Transaction *t) {
11fdf7f2 550 ceph_assert(!hoid.is_temp());
7c673cae
FG
551 t->remove(
552 coll, ghobject_t(hoid, old_version, get_parent()->whoami_shard().shard));
553}
554
555PGBackend *PGBackend::build_pg_backend(
556 const pg_pool_t &pool,
11fdf7f2 557 const map<string,string>& profile,
7c673cae
FG
558 Listener *l,
559 coll_t coll,
560 ObjectStore::CollectionHandle &ch,
561 ObjectStore *store,
562 CephContext *cct)
563{
11fdf7f2 564 ErasureCodeProfile ec_profile = profile;
7c673cae
FG
565 switch (pool.type) {
566 case pg_pool_t::TYPE_REPLICATED: {
567 return new ReplicatedBackend(l, coll, ch, store, cct);
568 }
569 case pg_pool_t::TYPE_ERASURE: {
570 ErasureCodeInterfaceRef ec_impl;
7c673cae
FG
571 stringstream ss;
572 ceph::ErasureCodePluginRegistry::instance().factory(
573 profile.find("plugin")->second,
11fdf7f2
TL
574 cct->_conf.get_val<std::string>("erasure_code_dir"),
575 ec_profile,
7c673cae
FG
576 &ec_impl,
577 &ss);
11fdf7f2 578 ceph_assert(ec_impl);
7c673cae
FG
579 return new ECBackend(
580 l,
581 coll,
582 ch,
583 store,
584 cct,
585 ec_impl,
586 pool.stripe_width);
587 }
588 default:
589 ceph_abort();
590 return NULL;
591 }
592}
593
28e407b8
AA
594int PGBackend::be_scan_list(
595 ScrubMap &map,
596 ScrubMapBuilder &pos)
7c673cae 597{
28e407b8 598 dout(10) << __func__ << " " << pos << dendl;
11fdf7f2
TL
599 ceph_assert(!pos.done());
600 ceph_assert(pos.pos < pos.ls.size());
28e407b8
AA
601 hobject_t& poid = pos.ls[pos.pos];
602
603 struct stat st;
604 int r = store->stat(
605 ch,
606 ghobject_t(
607 poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
608 &st,
609 true);
610 if (r == 0) {
611 ScrubMap::object &o = map.objects[poid];
612 o.size = st.st_size;
11fdf7f2 613 ceph_assert(!o.negative);
28e407b8 614 store->getattrs(
7c673cae
FG
615 ch,
616 ghobject_t(
617 poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
28e407b8 618 o.attrs);
7c673cae 619
28e407b8
AA
620 if (pos.deep) {
621 r = be_deep_scrub(poid, map, pos, o);
7c673cae 622 }
28e407b8
AA
623 dout(25) << __func__ << " " << poid << dendl;
624 } else if (r == -ENOENT) {
625 dout(25) << __func__ << " " << poid << " got " << r
626 << ", skipping" << dendl;
627 } else if (r == -EIO) {
628 dout(25) << __func__ << " " << poid << " got " << r
629 << ", stat_error" << dendl;
630 ScrubMap::object &o = map.objects[poid];
631 o.stat_error = true;
632 } else {
633 derr << __func__ << " got: " << cpp_strerror(r) << dendl;
634 ceph_abort();
635 }
636 if (r == -EINPROGRESS) {
637 return -EINPROGRESS;
7c673cae 638 }
28e407b8
AA
639 pos.next_object();
640 return 0;
7c673cae
FG
641}
642
643bool PGBackend::be_compare_scrub_objects(
644 pg_shard_t auth_shard,
645 const ScrubMap::object &auth,
646 const object_info_t& auth_oi,
647 const ScrubMap::object &candidate,
648 shard_info_wrapper &shard_result,
649 inconsistent_obj_wrapper &obj_result,
91327a77
AA
650 ostream &errorstream,
651 bool has_snapset)
7c673cae
FG
652{
653 enum { CLEAN, FOUND_ERROR } error = CLEAN;
7c673cae
FG
654 if (auth.digest_present && candidate.digest_present) {
655 if (auth.digest != candidate.digest) {
656 if (error != CLEAN)
657 errorstream << ", ";
658 error = FOUND_ERROR;
659 errorstream << "data_digest 0x" << std::hex << candidate.digest
660 << " != data_digest 0x" << auth.digest << std::dec
661 << " from shard " << auth_shard;
662 obj_result.set_data_digest_mismatch();
663 }
664 }
665 if (auth.omap_digest_present && candidate.omap_digest_present) {
666 if (auth.omap_digest != candidate.omap_digest) {
667 if (error != CLEAN)
668 errorstream << ", ";
669 error = FOUND_ERROR;
670 errorstream << "omap_digest 0x" << std::hex << candidate.omap_digest
671 << " != omap_digest 0x" << auth.omap_digest << std::dec
672 << " from shard " << auth_shard;
673 obj_result.set_omap_digest_mismatch();
674 }
675 }
676 if (parent->get_pool().is_replicated()) {
677 if (auth_oi.is_data_digest() && candidate.digest_present) {
678 if (auth_oi.data_digest != candidate.digest) {
679 if (error != CLEAN)
680 errorstream << ", ";
681 error = FOUND_ERROR;
682 errorstream << "data_digest 0x" << std::hex << candidate.digest
683 << " != data_digest 0x" << auth_oi.data_digest << std::dec
684 << " from auth oi " << auth_oi;
94b18763 685 shard_result.set_data_digest_mismatch_info();
7c673cae
FG
686 }
687 }
688 if (auth_oi.is_omap_digest() && candidate.omap_digest_present) {
689 if (auth_oi.omap_digest != candidate.omap_digest) {
690 if (error != CLEAN)
691 errorstream << ", ";
692 error = FOUND_ERROR;
693 errorstream << "omap_digest 0x" << std::hex << candidate.omap_digest
694 << " != omap_digest 0x" << auth_oi.omap_digest << std::dec
695 << " from auth oi " << auth_oi;
94b18763 696 shard_result.set_omap_digest_mismatch_info();
7c673cae
FG
697 }
698 }
699 }
700 if (candidate.stat_error)
701 return error == FOUND_ERROR;
91327a77
AA
702 if (!shard_result.has_info_missing()
703 && !shard_result.has_info_corrupted()) {
704 bufferlist can_bl, auth_bl;
705 auto can_attr = candidate.attrs.find(OI_ATTR);
706 auto auth_attr = auth.attrs.find(OI_ATTR);
707
11fdf7f2
TL
708 ceph_assert(auth_attr != auth.attrs.end());
709 ceph_assert(can_attr != candidate.attrs.end());
91327a77
AA
710
711 can_bl.push_back(can_attr->second);
712 auth_bl.push_back(auth_attr->second);
713 if (!can_bl.contents_equal(auth_bl)) {
714 if (error != CLEAN)
715 errorstream << ", ";
716 error = FOUND_ERROR;
717 obj_result.set_object_info_inconsistency();
718 errorstream << "object info inconsistent ";
719 }
720 }
721 if (has_snapset) {
722 if (!shard_result.has_snapset_missing()
723 && !shard_result.has_snapset_corrupted()) {
724 bufferlist can_bl, auth_bl;
725 auto can_attr = candidate.attrs.find(SS_ATTR);
726 auto auth_attr = auth.attrs.find(SS_ATTR);
727
11fdf7f2
TL
728 ceph_assert(auth_attr != auth.attrs.end());
729 ceph_assert(can_attr != candidate.attrs.end());
91327a77
AA
730
731 can_bl.push_back(can_attr->second);
732 auth_bl.push_back(auth_attr->second);
733 if (!can_bl.contents_equal(auth_bl)) {
734 if (error != CLEAN)
735 errorstream << ", ";
736 error = FOUND_ERROR;
737 obj_result.set_snapset_inconsistency();
738 errorstream << "snapset inconsistent ";
739 }
740 }
741 }
742 if (parent->get_pool().is_erasure()) {
743 if (!shard_result.has_hinfo_missing()
744 && !shard_result.has_hinfo_corrupted()) {
745 bufferlist can_bl, auth_bl;
746 auto can_hi = candidate.attrs.find(ECUtil::get_hinfo_key());
747 auto auth_hi = auth.attrs.find(ECUtil::get_hinfo_key());
748
11fdf7f2
TL
749 ceph_assert(auth_hi != auth.attrs.end());
750 ceph_assert(can_hi != candidate.attrs.end());
91327a77
AA
751
752 can_bl.push_back(can_hi->second);
753 auth_bl.push_back(auth_hi->second);
754 if (!can_bl.contents_equal(auth_bl)) {
755 if (error != CLEAN)
756 errorstream << ", ";
757 error = FOUND_ERROR;
758 obj_result.set_hinfo_inconsistency();
759 errorstream << "hinfo inconsistent ";
760 }
761 }
762 }
7c673cae
FG
763 uint64_t oi_size = be_get_ondisk_size(auth_oi.size);
764 if (oi_size != candidate.size) {
765 if (error != CLEAN)
766 errorstream << ", ";
767 error = FOUND_ERROR;
768 errorstream << "size " << candidate.size
769 << " != size " << oi_size
770 << " from auth oi " << auth_oi;
94b18763 771 shard_result.set_size_mismatch_info();
7c673cae
FG
772 }
773 if (auth.size != candidate.size) {
774 if (error != CLEAN)
775 errorstream << ", ";
776 error = FOUND_ERROR;
777 errorstream << "size " << candidate.size
778 << " != size " << auth.size
779 << " from shard " << auth_shard;
780 obj_result.set_size_mismatch();
781 }
eafe8130
TL
782 // If the replica is too large and we didn't already count it for this object
783 //
784 if (candidate.size > cct->_conf->osd_max_object_size
785 && !obj_result.has_size_too_large()) {
786 if (error != CLEAN)
787 errorstream << ", ";
788 error = FOUND_ERROR;
789 errorstream << "size " << candidate.size
790 << " > " << cct->_conf->osd_max_object_size
791 << " is too large";
792 obj_result.set_size_too_large();
793 }
7c673cae
FG
794 for (map<string,bufferptr>::const_iterator i = auth.attrs.begin();
795 i != auth.attrs.end();
796 ++i) {
b5b8bbf5 797 // We check system keys seperately
94b18763 798 if (i->first == OI_ATTR || i->first[0] != '_')
b5b8bbf5 799 continue;
7c673cae
FG
800 if (!candidate.attrs.count(i->first)) {
801 if (error != CLEAN)
802 errorstream << ", ";
803 error = FOUND_ERROR;
804 errorstream << "attr name mismatch '" << i->first << "'";
805 obj_result.set_attr_name_mismatch();
806 } else if (candidate.attrs.find(i->first)->second.cmp(i->second)) {
807 if (error != CLEAN)
808 errorstream << ", ";
809 error = FOUND_ERROR;
810 errorstream << "attr value mismatch '" << i->first << "'";
811 obj_result.set_attr_value_mismatch();
812 }
813 }
814 for (map<string,bufferptr>::const_iterator i = candidate.attrs.begin();
815 i != candidate.attrs.end();
816 ++i) {
b5b8bbf5 817 // We check system keys seperately
94b18763 818 if (i->first == OI_ATTR || i->first[0] != '_')
b5b8bbf5 819 continue;
7c673cae
FG
820 if (!auth.attrs.count(i->first)) {
821 if (error != CLEAN)
822 errorstream << ", ";
823 error = FOUND_ERROR;
824 errorstream << "attr name mismatch '" << i->first << "'";
825 obj_result.set_attr_name_mismatch();
826 }
827 }
828 return error == FOUND_ERROR;
829}
830
11fdf7f2 831static int dcount(const object_info_t &oi)
7c673cae
FG
832{
833 int count = 0;
834 if (oi.is_data_digest())
835 count++;
836 if (oi.is_omap_digest())
837 count++;
838 return count;
839}
840
841map<pg_shard_t, ScrubMap *>::const_iterator
842 PGBackend::be_select_auth_object(
843 const hobject_t &obj,
844 const map<pg_shard_t,ScrubMap*> &maps,
845 object_info_t *auth_oi,
846 map<pg_shard_t, shard_info_wrapper> &shard_map,
91327a77
AA
847 bool &digest_match,
848 spg_t pgid,
849 ostream &errorstream)
7c673cae
FG
850{
851 eversion_t auth_version;
7c673cae 852
b32b8144 853 // Create list of shards with primary first so it will be auth copy all
31f18b77
FG
854 // other things being equal.
855 list<pg_shard_t> shards;
7c673cae
FG
856 for (map<pg_shard_t, ScrubMap *>::const_iterator j = maps.begin();
857 j != maps.end();
858 ++j) {
31f18b77
FG
859 if (j->first == get_parent()->whoami_shard())
860 continue;
861 shards.push_back(j->first);
862 }
b32b8144 863 shards.push_front(get_parent()->whoami_shard());
31f18b77
FG
864
865 map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
1adf2230 866 digest_match = true;
31f18b77 867 for (auto &l : shards) {
91327a77
AA
868 ostringstream shard_errorstream;
869 bool error = false;
31f18b77 870 map<pg_shard_t, ScrubMap *>::const_iterator j = maps.find(l);
7c673cae
FG
871 map<hobject_t, ScrubMap::object>::iterator i =
872 j->second->objects.find(obj);
873 if (i == j->second->objects.end()) {
874 continue;
875 }
7c673cae 876 auto& shard_info = shard_map[j->first];
b5b8bbf5
FG
877 if (j->first == get_parent()->whoami_shard())
878 shard_info.primary = true;
7c673cae
FG
879 if (i->second.read_error) {
880 shard_info.set_read_error();
91327a77
AA
881 if (error)
882 shard_errorstream << ", ";
883 error = true;
884 shard_errorstream << "candidate had a read error";
7c673cae
FG
885 }
886 if (i->second.ec_hash_mismatch) {
887 shard_info.set_ec_hash_mismatch();
91327a77
AA
888 if (error)
889 shard_errorstream << ", ";
890 error = true;
891 shard_errorstream << "candidate had an ec hash mismatch";
7c673cae
FG
892 }
893 if (i->second.ec_size_mismatch) {
894 shard_info.set_ec_size_mismatch();
91327a77
AA
895 if (error)
896 shard_errorstream << ", ";
897 error = true;
898 shard_errorstream << "candidate had an ec size mismatch";
7c673cae
FG
899 }
900
901 object_info_t oi;
902 bufferlist bl;
903 map<string, bufferptr>::iterator k;
31f18b77 904 SnapSet ss;
94b18763 905 bufferlist ss_bl, hk_bl;
7c673cae
FG
906
907 if (i->second.stat_error) {
908 shard_info.set_stat_error();
91327a77
AA
909 if (error)
910 shard_errorstream << ", ";
911 error = true;
912 shard_errorstream << "candidate had a stat error";
7c673cae
FG
913 // With stat_error no further checking
914 // We don't need to also see a missing_object_info_attr
915 goto out;
916 }
917
b5b8bbf5 918 // We won't pick an auth copy if the snapset is missing or won't decode.
11fdf7f2
TL
919 ceph_assert(!obj.is_snapdir());
920 if (obj.is_head()) {
b5b8bbf5
FG
921 k = i->second.attrs.find(SS_ATTR);
922 if (k == i->second.attrs.end()) {
94b18763 923 shard_info.set_snapset_missing();
91327a77
AA
924 if (error)
925 shard_errorstream << ", ";
926 error = true;
927 shard_errorstream << "candidate had a missing snapset key";
b5b8bbf5
FG
928 } else {
929 ss_bl.push_back(k->second);
930 try {
11fdf7f2
TL
931 auto bliter = ss_bl.cbegin();
932 decode(ss, bliter);
b5b8bbf5
FG
933 } catch (...) {
934 // invalid snapset, probably corrupt
94b18763 935 shard_info.set_snapset_corrupted();
91327a77
AA
936 if (error)
937 shard_errorstream << ", ";
938 error = true;
939 shard_errorstream << "candidate had a corrupt snapset";
94b18763
FG
940 }
941 }
942 }
943
944 if (parent->get_pool().is_erasure()) {
945 ECUtil::HashInfo hi;
946 k = i->second.attrs.find(ECUtil::get_hinfo_key());
947 if (k == i->second.attrs.end()) {
948 shard_info.set_hinfo_missing();
91327a77
AA
949 if (error)
950 shard_errorstream << ", ";
951 error = true;
952 shard_errorstream << "candidate had a missing hinfo key";
94b18763
FG
953 } else {
954 hk_bl.push_back(k->second);
955 try {
11fdf7f2 956 auto bliter = hk_bl.cbegin();
94b18763 957 decode(hi, bliter);
94b18763
FG
958 } catch (...) {
959 // invalid snapset, probably corrupt
960 shard_info.set_hinfo_corrupted();
91327a77
AA
961 if (error)
962 shard_errorstream << ", ";
963 error = true;
964 shard_errorstream << "candidate had a corrupt hinfo";
b5b8bbf5
FG
965 }
966 }
967 }
968
7c673cae
FG
969 k = i->second.attrs.find(OI_ATTR);
970 if (k == i->second.attrs.end()) {
971 // no object info on object, probably corrupt
94b18763 972 shard_info.set_info_missing();
91327a77
AA
973 if (error)
974 shard_errorstream << ", ";
975 error = true;
976 shard_errorstream << "candidate had a missing info key";
7c673cae
FG
977 goto out;
978 }
979 bl.push_back(k->second);
980 try {
11fdf7f2
TL
981 auto bliter = bl.cbegin();
982 decode(oi, bliter);
7c673cae
FG
983 } catch (...) {
984 // invalid object info, probably corrupt
94b18763 985 shard_info.set_info_corrupted();
91327a77
AA
986 if (error)
987 shard_errorstream << ", ";
988 error = true;
989 shard_errorstream << "candidate had a corrupt info";
7c673cae
FG
990 goto out;
991 }
992
b5b8bbf5 993 // This is automatically corrected in PG::_repair_oinfo_oid()
11fdf7f2 994 ceph_assert(oi.soid == obj);
b5b8bbf5 995
b5b8bbf5 996 if (i->second.size != be_get_ondisk_size(oi.size)) {
94b18763 997 shard_info.set_obj_size_info_mismatch();
91327a77
AA
998 if (error)
999 shard_errorstream << ", ";
1000 error = true;
1001 shard_errorstream << "candidate size " << i->second.size << " info size "
1002 << oi.size << " mismatch";
7c673cae
FG
1003 }
1004
1adf2230
AA
1005 // digest_match will only be true if computed digests are the same
1006 if (auth_version != eversion_t()
1007 && auth->second->objects[obj].digest_present
1008 && i->second.digest_present
1009 && auth->second->objects[obj].digest != i->second.digest) {
1010 digest_match = false;
1011 dout(10) << __func__ << " digest_match = false, " << obj << " data_digest 0x" << std::hex << i->second.digest
1012 << " != data_digest 0x" << auth->second->objects[obj].digest << std::dec
1013 << dendl;
1014 }
1015
b5b8bbf5
FG
1016 // Don't use this particular shard due to previous errors
1017 // XXX: For now we can't pick one shard for repair and another's object info or snapset
1018 if (shard_info.errors)
7c673cae
FG
1019 goto out;
1020
1021 if (auth_version == eversion_t() || oi.version > auth_version ||
11fdf7f2 1022 (oi.version == auth_version && dcount(oi) > dcount(*auth_oi))) {
7c673cae
FG
1023 auth = j;
1024 *auth_oi = oi;
1025 auth_version = oi.version;
7c673cae
FG
1026 }
1027
1028out:
91327a77
AA
1029 if (error)
1030 errorstream << pgid.pgid << " shard " << l << " soid " << obj
1031 << " : " << shard_errorstream.str() << "\n";
7c673cae
FG
1032 // Keep scanning other shards
1033 }
1034 dout(10) << __func__ << ": selecting osd " << auth->first
1035 << " for obj " << obj
1036 << " with oi " << *auth_oi
1037 << dendl;
1038 return auth;
1039}
1040
1041void PGBackend::be_compare_scrubmaps(
1042 const map<pg_shard_t,ScrubMap*> &maps,
28e407b8 1043 const set<hobject_t> &master_set,
7c673cae
FG
1044 bool repair,
1045 map<hobject_t, set<pg_shard_t>> &missing,
1046 map<hobject_t, set<pg_shard_t>> &inconsistent,
1047 map<hobject_t, list<pg_shard_t>> &authoritative,
9f95a23c
TL
1048 map<hobject_t, pair<std::optional<uint32_t>,
1049 std::optional<uint32_t>>> &missing_digest,
7c673cae
FG
1050 int &shallow_errors, int &deep_errors,
1051 Scrub::Store *store,
1052 const spg_t& pgid,
1053 const vector<int> &acting,
1054 ostream &errorstream)
1055{
7c673cae
FG
1056 utime_t now = ceph_clock_now();
1057
7c673cae
FG
1058 // Check maps against master set and each other
1059 for (set<hobject_t>::const_iterator k = master_set.begin();
1060 k != master_set.end();
1061 ++k) {
1062 object_info_t auth_oi;
1063 map<pg_shard_t, shard_info_wrapper> shard_map;
1064
1065 inconsistent_obj_wrapper object_error{*k};
1066
1adf2230 1067 bool digest_match;
7c673cae 1068 map<pg_shard_t, ScrubMap *>::const_iterator auth =
91327a77
AA
1069 be_select_auth_object(*k, maps, &auth_oi, shard_map, digest_match,
1070 pgid, errorstream);
7c673cae
FG
1071
1072 list<pg_shard_t> auth_list;
31f18b77 1073 set<pg_shard_t> object_errors;
7c673cae
FG
1074 if (auth == maps.end()) {
1075 object_error.set_version(0);
b5b8bbf5
FG
1076 object_error.set_auth_missing(*k, maps, shard_map, shallow_errors,
1077 deep_errors, get_parent()->whoami_shard());
7c673cae
FG
1078 if (object_error.has_deep_errors())
1079 ++deep_errors;
1080 else if (object_error.has_shallow_errors())
1081 ++shallow_errors;
1082 store->add_object_error(k->pool, object_error);
1083 errorstream << pgid.pgid << " soid " << *k
91327a77 1084 << " : failed to pick suitable object info\n";
7c673cae
FG
1085 continue;
1086 }
1087 object_error.set_version(auth_oi.user_version);
1088 ScrubMap::object& auth_object = auth->second->objects[*k];
1089 set<pg_shard_t> cur_missing;
1090 set<pg_shard_t> cur_inconsistent;
1adf2230 1091 bool fix_digest = false;
7c673cae 1092
11fdf7f2 1093 for (auto j = maps.cbegin(); j != maps.cend(); ++j) {
7c673cae
FG
1094 if (j == auth)
1095 shard_map[auth->first].selected_oi = true;
1096 if (j->second->objects.count(*k)) {
1097 shard_map[j->first].set_object(j->second->objects[*k]);
1098 // Compare
1099 stringstream ss;
1100 bool found = be_compare_scrub_objects(auth->first,
1101 auth_object,
1102 auth_oi,
1103 j->second->objects[*k],
1104 shard_map[j->first],
1105 object_error,
91327a77
AA
1106 ss,
1107 k->has_snapset());
1adf2230
AA
1108
1109 dout(20) << __func__ << (repair ? " repair " : " ") << (parent->get_pool().is_replicated() ? "replicated " : "")
11fdf7f2
TL
1110 << (j == auth ? "auth" : "") << "shards " << shard_map.size() << (digest_match ? " digest_match " : " ")
1111 << (shard_map[j->first].only_data_digest_mismatch_info() ? "'info mismatch info'" : "")
1112 << dendl;
1adf2230
AA
1113 // If all replicas match, but they don't match object_info we can
1114 // repair it by using missing_digest mechanism
11fdf7f2 1115 if (repair && parent->get_pool().is_replicated() && j == auth && shard_map.size() > 1
1adf2230
AA
1116 && digest_match && shard_map[j->first].only_data_digest_mismatch_info()
1117 && auth_object.digest_present) {
1118 // Set in missing_digests
1119 fix_digest = true;
1120 // Clear the error
1121 shard_map[j->first].clear_data_digest_mismatch_info();
91327a77 1122 errorstream << pgid << " soid " << *k << " : repairing object info data_digest" << "\n";
1adf2230 1123 }
7c673cae
FG
1124 // Some errors might have already been set in be_select_auth_object()
1125 if (shard_map[j->first].errors != 0) {
1126 cur_inconsistent.insert(j->first);
1127 if (shard_map[j->first].has_deep_errors())
1128 ++deep_errors;
1129 else
1130 ++shallow_errors;
1131 // Only true if be_compare_scrub_objects() found errors and put something
1132 // in ss.
1133 if (found)
91327a77
AA
1134 errorstream << pgid << " shard " << j->first << " soid " << *k
1135 << " : " << ss.str() << "\n";
1136 } else if (found) {
31f18b77
FG
1137 // Track possible shard to use as authoritative, if needed
1138 // There are errors, without identifying the shard
1139 object_errors.insert(j->first);
91327a77 1140 errorstream << pgid << " soid " << *k << " : " << ss.str() << "\n";
7c673cae
FG
1141 } else {
1142 // XXX: The auth shard might get here that we don't know
1143 // that it has the "correct" data.
1144 auth_list.push_back(j->first);
1145 }
1146 } else {
1147 cur_missing.insert(j->first);
1148 shard_map[j->first].set_missing();
b5b8bbf5 1149 shard_map[j->first].primary = (j->first == get_parent()->whoami_shard());
7c673cae
FG
1150 // Can't have any other errors if there is no information available
1151 ++shallow_errors;
91327a77 1152 errorstream << pgid << " shard " << j->first << " " << *k << " : missing\n";
7c673cae
FG
1153 }
1154 object_error.add_shard(j->first, shard_map[j->first]);
1155 }
1156
1157 if (auth_list.empty()) {
31f18b77
FG
1158 if (object_errors.empty()) {
1159 errorstream << pgid.pgid << " soid " << *k
91327a77 1160 << " : failed to pick suitable auth object\n";
31f18b77
FG
1161 goto out;
1162 }
1163 // Object errors exist and nothing in auth_list
1164 // Prefer the auth shard otherwise take first from list.
1165 pg_shard_t shard;
1166 if (object_errors.count(auth->first)) {
1167 shard = auth->first;
1168 } else {
1169 shard = *(object_errors.begin());
1170 }
1171 auth_list.push_back(shard);
1172 object_errors.erase(shard);
7c673cae 1173 }
31f18b77
FG
1174 // At this point auth_list is populated, so we add the object errors shards
1175 // as inconsistent.
1176 cur_inconsistent.insert(object_errors.begin(), object_errors.end());
7c673cae
FG
1177 if (!cur_missing.empty()) {
1178 missing[*k] = cur_missing;
1179 }
1180 if (!cur_inconsistent.empty()) {
1181 inconsistent[*k] = cur_inconsistent;
1182 }
1adf2230
AA
1183
1184 if (fix_digest) {
9f95a23c 1185 std::optional<uint32_t> data_digest, omap_digest;
11fdf7f2 1186 ceph_assert(auth_object.digest_present);
1adf2230
AA
1187 data_digest = auth_object.digest;
1188 if (auth_object.omap_digest_present) {
1189 omap_digest = auth_object.omap_digest;
1190 }
1191 missing_digest[*k] = make_pair(data_digest, omap_digest);
1192 }
7c673cae
FG
1193 if (!cur_inconsistent.empty() || !cur_missing.empty()) {
1194 authoritative[*k] = auth_list;
1adf2230 1195 } else if (!fix_digest && parent->get_pool().is_replicated()) {
7c673cae
FG
1196 enum {
1197 NO = 0,
1198 MAYBE = 1,
1199 FORCE = 2,
1200 } update = NO;
1201
28e407b8
AA
1202 if (auth_object.digest_present && !auth_oi.is_data_digest()) {
1203 dout(20) << __func__ << " missing data digest on " << *k << dendl;
7c673cae
FG
1204 update = MAYBE;
1205 }
28e407b8
AA
1206 if (auth_object.omap_digest_present && !auth_oi.is_omap_digest()) {
1207 dout(20) << __func__ << " missing omap digest on " << *k << dendl;
7c673cae
FG
1208 update = MAYBE;
1209 }
1210
1211 // recorded digest != actual digest?
1212 if (auth_oi.is_data_digest() && auth_object.digest_present &&
1213 auth_oi.data_digest != auth_object.digest) {
11fdf7f2 1214 ceph_assert(shard_map[auth->first].has_data_digest_mismatch_info());
7c673cae
FG
1215 errorstream << pgid << " recorded data digest 0x"
1216 << std::hex << auth_oi.data_digest << " != on disk 0x"
1217 << auth_object.digest << std::dec << " on " << auth_oi.soid
1218 << "\n";
1219 if (repair)
1220 update = FORCE;
1221 }
1222 if (auth_oi.is_omap_digest() && auth_object.omap_digest_present &&
1223 auth_oi.omap_digest != auth_object.omap_digest) {
11fdf7f2 1224 ceph_assert(shard_map[auth->first].has_omap_digest_mismatch_info());
7c673cae
FG
1225 errorstream << pgid << " recorded omap digest 0x"
1226 << std::hex << auth_oi.omap_digest << " != on disk 0x"
1227 << auth_object.omap_digest << std::dec
1228 << " on " << auth_oi.soid << "\n";
1229 if (repair)
1230 update = FORCE;
1231 }
1232
1233 if (update != NO) {
1234 utime_t age = now - auth_oi.local_mtime;
1235 if (update == FORCE ||
1236 age > cct->_conf->osd_deep_scrub_update_digest_min_age) {
9f95a23c 1237 std::optional<uint32_t> data_digest, omap_digest;
28e407b8
AA
1238 if (auth_object.digest_present) {
1239 data_digest = auth_object.digest;
1240 dout(20) << __func__ << " will update data digest on " << *k << dendl;
1241 }
1242 if (auth_object.omap_digest_present) {
1243 omap_digest = auth_object.omap_digest;
1244 dout(20) << __func__ << " will update omap digest on " << *k << dendl;
1245 }
1246 missing_digest[*k] = make_pair(data_digest, omap_digest);
7c673cae
FG
1247 } else {
1248 dout(20) << __func__ << " missing digest but age " << age
1249 << " < " << cct->_conf->osd_deep_scrub_update_digest_min_age
1250 << " on " << *k << dendl;
1251 }
1252 }
1253 }
1254out:
1255 if (object_error.has_deep_errors())
1256 ++deep_errors;
1257 else if (object_error.has_shallow_errors())
1258 ++shallow_errors;
1259 if (object_error.errors || object_error.union_shards.errors) {
1260 store->add_object_error(k->pool, object_error);
1261 }
1262 }
1263}
28e407b8 1264
11fdf7f2 1265void PGBackend::be_omap_checks(const map<pg_shard_t,ScrubMap*> &maps,
28e407b8 1266 const set<hobject_t> &master_set,
11fdf7f2 1267 omap_stat_t& omap_stats,
28e407b8
AA
1268 ostream &warnstream) const
1269{
11fdf7f2 1270 bool needs_omap_check = false;
28e407b8 1271 for (const auto& map : maps) {
11fdf7f2
TL
1272 if (map.second->has_large_omap_object_errors || map.second->has_omap_keys) {
1273 needs_omap_check = true;
28e407b8
AA
1274 break;
1275 }
1276 }
1277
11fdf7f2
TL
1278 if (!needs_omap_check) {
1279 return; // Nothing to do
28e407b8
AA
1280 }
1281
11fdf7f2 1282 // Iterate through objects and update omap stats
28e407b8
AA
1283 for (const auto& k : master_set) {
1284 for (const auto& map : maps) {
494da23a
TL
1285 if (map.first != get_parent()->primary_shard()) {
1286 // Only set omap stats for the primary
1287 continue;
1288 }
11fdf7f2
TL
1289 auto it = map.second->objects.find(k);
1290 if (it == map.second->objects.end())
1291 continue;
1292 ScrubMap::object& obj = it->second;
1293 omap_stats.omap_bytes += obj.object_omap_bytes;
1294 omap_stats.omap_keys += obj.object_omap_keys;
28e407b8 1295 if (obj.large_omap_object_found) {
eafe8130
TL
1296 pg_t pg;
1297 auto osdmap = get_osdmap();
1298 osdmap->map_to_pg(k.pool, k.oid.name, k.get_key(), k.nspace, &pg);
1299 pg_t mpg = osdmap->raw_pg_to_pg(pg);
11fdf7f2 1300 omap_stats.large_omap_objects++;
eafe8130
TL
1301 warnstream << "Large omap object found. Object: " << k
1302 << " PG: " << pg << " (" << mpg << ")"
1303 << " Key count: " << obj.large_omap_object_key_count
1304 << " Size (bytes): " << obj.large_omap_object_value_size
1305 << '\n';
28e407b8
AA
1306 break;
1307 }
1308 }
1309 }
1310}