]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/PGBackend.cc
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / osd / PGBackend.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013,2014 Inktank Storage, Inc.
7 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License version 2.1, as published by the Free Software
14 * Foundation. See file COPYING.
15 *
16 */
17
18
19 #include "common/errno.h"
20 #include "common/scrub_types.h"
21 #include "ReplicatedBackend.h"
22 #include "ScrubStore.h"
23 #include "ECBackend.h"
24 #include "PGBackend.h"
25 #include "OSD.h"
26 #include "erasure-code/ErasureCodePlugin.h"
27 #include "OSDMap.h"
28 #include "PGLog.h"
29 #include "common/LogClient.h"
30
31 #define dout_context cct
32 #define dout_subsys ceph_subsys_osd
33 #define DOUT_PREFIX_ARGS this
34 #undef dout_prefix
35 #define dout_prefix _prefix(_dout, this)
36 static ostream& _prefix(std::ostream *_dout, PGBackend *pgb) {
37 return *_dout << pgb->get_parent()->gen_dbg_prefix();
38 }
39
40 void PGBackend::rollback(
41 const pg_log_entry_t &entry,
42 ObjectStore::Transaction *t)
43 {
44
45 struct RollbackVisitor : public ObjectModDesc::Visitor {
46 const hobject_t &hoid;
47 PGBackend *pg;
48 ObjectStore::Transaction t;
49 RollbackVisitor(
50 const hobject_t &hoid,
51 PGBackend *pg) : hoid(hoid), pg(pg) {}
52 void append(uint64_t old_size) override {
53 ObjectStore::Transaction temp;
54 pg->rollback_append(hoid, old_size, &temp);
55 temp.append(t);
56 temp.swap(t);
57 }
58 void setattrs(map<string, boost::optional<bufferlist> > &attrs) override {
59 ObjectStore::Transaction temp;
60 pg->rollback_setattrs(hoid, attrs, &temp);
61 temp.append(t);
62 temp.swap(t);
63 }
64 void rmobject(version_t old_version) override {
65 ObjectStore::Transaction temp;
66 pg->rollback_stash(hoid, old_version, &temp);
67 temp.append(t);
68 temp.swap(t);
69 }
70 void try_rmobject(version_t old_version) override {
71 ObjectStore::Transaction temp;
72 pg->rollback_try_stash(hoid, old_version, &temp);
73 temp.append(t);
74 temp.swap(t);
75 }
76 void create() override {
77 ObjectStore::Transaction temp;
78 pg->rollback_create(hoid, &temp);
79 temp.append(t);
80 temp.swap(t);
81 }
82 void update_snaps(const set<snapid_t> &snaps) override {
83 ObjectStore::Transaction temp;
84 pg->get_parent()->pgb_set_object_snap_mapping(hoid, snaps, &temp);
85 temp.append(t);
86 temp.swap(t);
87 }
88 void rollback_extents(
89 version_t gen,
90 const vector<pair<uint64_t, uint64_t> > &extents) override {
91 ObjectStore::Transaction temp;
92 pg->rollback_extents(gen, extents, hoid, &temp);
93 temp.append(t);
94 temp.swap(t);
95 }
96 };
97
98 assert(entry.mod_desc.can_rollback());
99 RollbackVisitor vis(entry.soid, this);
100 entry.mod_desc.visit(&vis);
101 t->append(vis.t);
102 }
103
104 struct Trimmer : public ObjectModDesc::Visitor {
105 const hobject_t &soid;
106 PGBackend *pg;
107 ObjectStore::Transaction *t;
108 Trimmer(
109 const hobject_t &soid,
110 PGBackend *pg,
111 ObjectStore::Transaction *t)
112 : soid(soid), pg(pg), t(t) {}
113 void rmobject(version_t old_version) override {
114 pg->trim_rollback_object(
115 soid,
116 old_version,
117 t);
118 }
119 // try_rmobject defaults to rmobject
120 void rollback_extents(
121 version_t gen,
122 const vector<pair<uint64_t, uint64_t> > &extents) override {
123 pg->trim_rollback_object(
124 soid,
125 gen,
126 t);
127 }
128 };
129
130 void PGBackend::rollforward(
131 const pg_log_entry_t &entry,
132 ObjectStore::Transaction *t)
133 {
134 auto dpp = get_parent()->get_dpp();
135 ldpp_dout(dpp, 20) << __func__ << ": entry=" << entry << dendl;
136 if (!entry.can_rollback())
137 return;
138 Trimmer trimmer(entry.soid, this, t);
139 entry.mod_desc.visit(&trimmer);
140 }
141
142 void PGBackend::trim(
143 const pg_log_entry_t &entry,
144 ObjectStore::Transaction *t)
145 {
146 if (!entry.can_rollback())
147 return;
148 Trimmer trimmer(entry.soid, this, t);
149 entry.mod_desc.visit(&trimmer);
150 }
151
152 void PGBackend::try_stash(
153 const hobject_t &hoid,
154 version_t v,
155 ObjectStore::Transaction *t)
156 {
157 t->try_rename(
158 coll,
159 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
160 ghobject_t(hoid, v, get_parent()->whoami_shard().shard));
161 }
162
163 void PGBackend::remove(
164 const hobject_t &hoid,
165 ObjectStore::Transaction *t) {
166 assert(!hoid.is_temp());
167 t->remove(
168 coll,
169 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
170 get_parent()->pgb_clear_object_snap_mapping(hoid, t);
171 }
172
173 void PGBackend::on_change_cleanup(ObjectStore::Transaction *t)
174 {
175 dout(10) << __func__ << dendl;
176 // clear temp
177 for (set<hobject_t>::iterator i = temp_contents.begin();
178 i != temp_contents.end();
179 ++i) {
180 dout(10) << __func__ << ": Removing oid "
181 << *i << " from the temp collection" << dendl;
182 t->remove(
183 coll,
184 ghobject_t(*i, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
185 }
186 temp_contents.clear();
187 }
188
189 int PGBackend::objects_list_partial(
190 const hobject_t &begin,
191 int min,
192 int max,
193 vector<hobject_t> *ls,
194 hobject_t *next)
195 {
196 assert(ls);
197 // Starts with the smallest generation to make sure the result list
198 // has the marker object (it might have multiple generations
199 // though, which would be filtered).
200 ghobject_t _next;
201 if (!begin.is_min())
202 _next = ghobject_t(begin, 0, get_parent()->whoami_shard().shard);
203 ls->reserve(max);
204 int r = 0;
205
206 if (min > max)
207 min = max;
208
209 while (!_next.is_max() && ls->size() < (unsigned)min) {
210 vector<ghobject_t> objects;
211 r = store->collection_list(
212 ch,
213 _next,
214 ghobject_t::get_max(),
215 max - ls->size(),
216 &objects,
217 &_next);
218 if (r != 0) {
219 derr << __func__ << " list collection " << ch << " got: " << cpp_strerror(r) << dendl;
220 break;
221 }
222 for (vector<ghobject_t>::iterator i = objects.begin();
223 i != objects.end();
224 ++i) {
225 if (i->is_pgmeta() || i->hobj.is_temp()) {
226 continue;
227 }
228 if (i->is_no_gen()) {
229 ls->push_back(i->hobj);
230 }
231 }
232 }
233 if (r == 0)
234 *next = _next.hobj;
235 return r;
236 }
237
238 int PGBackend::objects_list_range(
239 const hobject_t &start,
240 const hobject_t &end,
241 snapid_t seq,
242 vector<hobject_t> *ls,
243 vector<ghobject_t> *gen_obs)
244 {
245 assert(ls);
246 vector<ghobject_t> objects;
247 int r = store->collection_list(
248 ch,
249 ghobject_t(start, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
250 ghobject_t(end, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
251 INT_MAX,
252 &objects,
253 NULL);
254 ls->reserve(objects.size());
255 for (vector<ghobject_t>::iterator i = objects.begin();
256 i != objects.end();
257 ++i) {
258 if (i->is_pgmeta() || i->hobj.is_temp()) {
259 continue;
260 }
261 if (i->is_no_gen()) {
262 ls->push_back(i->hobj);
263 } else if (gen_obs) {
264 gen_obs->push_back(*i);
265 }
266 }
267 return r;
268 }
269
270 int PGBackend::objects_get_attr(
271 const hobject_t &hoid,
272 const string &attr,
273 bufferlist *out)
274 {
275 bufferptr bp;
276 int r = store->getattr(
277 ch,
278 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
279 attr.c_str(),
280 bp);
281 if (r >= 0 && out) {
282 out->clear();
283 out->push_back(std::move(bp));
284 }
285 return r;
286 }
287
288 int PGBackend::objects_get_attrs(
289 const hobject_t &hoid,
290 map<string, bufferlist> *out)
291 {
292 return store->getattrs(
293 ch,
294 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
295 *out);
296 }
297
298 void PGBackend::rollback_setattrs(
299 const hobject_t &hoid,
300 map<string, boost::optional<bufferlist> > &old_attrs,
301 ObjectStore::Transaction *t) {
302 map<string, bufferlist> to_set;
303 assert(!hoid.is_temp());
304 for (map<string, boost::optional<bufferlist> >::iterator i = old_attrs.begin();
305 i != old_attrs.end();
306 ++i) {
307 if (i->second) {
308 to_set[i->first] = i->second.get();
309 } else {
310 t->rmattr(
311 coll,
312 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
313 i->first);
314 }
315 }
316 t->setattrs(
317 coll,
318 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
319 to_set);
320 }
321
322 void PGBackend::rollback_append(
323 const hobject_t &hoid,
324 uint64_t old_size,
325 ObjectStore::Transaction *t) {
326 assert(!hoid.is_temp());
327 t->truncate(
328 coll,
329 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
330 old_size);
331 }
332
333 void PGBackend::rollback_stash(
334 const hobject_t &hoid,
335 version_t old_version,
336 ObjectStore::Transaction *t) {
337 assert(!hoid.is_temp());
338 t->remove(
339 coll,
340 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
341 t->collection_move_rename(
342 coll,
343 ghobject_t(hoid, old_version, get_parent()->whoami_shard().shard),
344 coll,
345 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
346 }
347
348 void PGBackend::rollback_try_stash(
349 const hobject_t &hoid,
350 version_t old_version,
351 ObjectStore::Transaction *t) {
352 assert(!hoid.is_temp());
353 t->remove(
354 coll,
355 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
356 t->try_rename(
357 coll,
358 ghobject_t(hoid, old_version, get_parent()->whoami_shard().shard),
359 ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
360 }
361
362 void PGBackend::rollback_extents(
363 version_t gen,
364 const vector<pair<uint64_t, uint64_t> > &extents,
365 const hobject_t &hoid,
366 ObjectStore::Transaction *t) {
367 auto shard = get_parent()->whoami_shard().shard;
368 for (auto &&extent: extents) {
369 t->clone_range(
370 coll,
371 ghobject_t(hoid, gen, shard),
372 ghobject_t(hoid, ghobject_t::NO_GEN, shard),
373 extent.first,
374 extent.second,
375 extent.first);
376 }
377 t->remove(
378 coll,
379 ghobject_t(hoid, gen, shard));
380 }
381
382 void PGBackend::trim_rollback_object(
383 const hobject_t &hoid,
384 version_t old_version,
385 ObjectStore::Transaction *t) {
386 assert(!hoid.is_temp());
387 t->remove(
388 coll, ghobject_t(hoid, old_version, get_parent()->whoami_shard().shard));
389 }
390
391 PGBackend *PGBackend::build_pg_backend(
392 const pg_pool_t &pool,
393 const OSDMapRef curmap,
394 Listener *l,
395 coll_t coll,
396 ObjectStore::CollectionHandle &ch,
397 ObjectStore *store,
398 CephContext *cct)
399 {
400 switch (pool.type) {
401 case pg_pool_t::TYPE_REPLICATED: {
402 return new ReplicatedBackend(l, coll, ch, store, cct);
403 }
404 case pg_pool_t::TYPE_ERASURE: {
405 ErasureCodeInterfaceRef ec_impl;
406 ErasureCodeProfile profile = curmap->get_erasure_code_profile(pool.erasure_code_profile);
407 assert(profile.count("plugin"));
408 stringstream ss;
409 ceph::ErasureCodePluginRegistry::instance().factory(
410 profile.find("plugin")->second,
411 cct->_conf->get_val<std::string>("erasure_code_dir"),
412 profile,
413 &ec_impl,
414 &ss);
415 assert(ec_impl);
416 return new ECBackend(
417 l,
418 coll,
419 ch,
420 store,
421 cct,
422 ec_impl,
423 pool.stripe_width);
424 }
425 default:
426 ceph_abort();
427 return NULL;
428 }
429 }
430
431 /*
432 * pg lock may or may not be held
433 */
434 void PGBackend::be_scan_list(
435 ScrubMap &map, const vector<hobject_t> &ls, bool deep, uint32_t seed,
436 ThreadPool::TPHandle &handle)
437 {
438 dout(10) << __func__ << " scanning " << ls.size() << " objects"
439 << (deep ? " deeply" : "") << dendl;
440 int i = 0;
441 for (vector<hobject_t>::const_iterator p = ls.begin();
442 p != ls.end();
443 ++p, i++) {
444 handle.reset_tp_timeout();
445 hobject_t poid = *p;
446
447 struct stat st;
448 int r = store->stat(
449 ch,
450 ghobject_t(
451 poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
452 &st,
453 true);
454 if (r == 0) {
455 ScrubMap::object &o = map.objects[poid];
456 o.size = st.st_size;
457 assert(!o.negative);
458 store->getattrs(
459 ch,
460 ghobject_t(
461 poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
462 o.attrs);
463
464 // calculate the CRC32 on deep scrubs
465 if (deep) {
466 be_deep_scrub(*p, seed, o, handle);
467 }
468
469 dout(25) << __func__ << " " << poid << dendl;
470 } else if (r == -ENOENT) {
471 dout(25) << __func__ << " " << poid << " got " << r
472 << ", skipping" << dendl;
473 } else if (r == -EIO) {
474 dout(25) << __func__ << " " << poid << " got " << r
475 << ", stat_error" << dendl;
476 ScrubMap::object &o = map.objects[poid];
477 o.stat_error = true;
478 } else {
479 derr << __func__ << " got: " << cpp_strerror(r) << dendl;
480 ceph_abort();
481 }
482 }
483 }
484
485 bool PGBackend::be_compare_scrub_objects(
486 pg_shard_t auth_shard,
487 const ScrubMap::object &auth,
488 const object_info_t& auth_oi,
489 const ScrubMap::object &candidate,
490 shard_info_wrapper &shard_result,
491 inconsistent_obj_wrapper &obj_result,
492 ostream &errorstream)
493 {
494 enum { CLEAN, FOUND_ERROR } error = CLEAN;
495 if (candidate.stat_error) {
496 assert(shard_result.has_stat_error());
497 error = FOUND_ERROR;
498 errorstream << "candidate had a stat error";
499 }
500 if (candidate.read_error || candidate.ec_hash_mismatch || candidate.ec_size_mismatch) {
501 error = FOUND_ERROR;
502 errorstream << "candidate had a read error";
503 }
504 if (auth.digest_present && candidate.digest_present) {
505 if (auth.digest != candidate.digest) {
506 if (error != CLEAN)
507 errorstream << ", ";
508 error = FOUND_ERROR;
509 errorstream << "data_digest 0x" << std::hex << candidate.digest
510 << " != data_digest 0x" << auth.digest << std::dec
511 << " from shard " << auth_shard;
512 obj_result.set_data_digest_mismatch();
513 }
514 }
515 if (auth.omap_digest_present && candidate.omap_digest_present) {
516 if (auth.omap_digest != candidate.omap_digest) {
517 if (error != CLEAN)
518 errorstream << ", ";
519 error = FOUND_ERROR;
520 errorstream << "omap_digest 0x" << std::hex << candidate.omap_digest
521 << " != omap_digest 0x" << auth.omap_digest << std::dec
522 << " from shard " << auth_shard;
523 obj_result.set_omap_digest_mismatch();
524 }
525 }
526 if (parent->get_pool().is_replicated()) {
527 if (auth_oi.is_data_digest() && candidate.digest_present) {
528 if (auth_oi.data_digest != candidate.digest) {
529 if (error != CLEAN)
530 errorstream << ", ";
531 error = FOUND_ERROR;
532 errorstream << "data_digest 0x" << std::hex << candidate.digest
533 << " != data_digest 0x" << auth_oi.data_digest << std::dec
534 << " from auth oi " << auth_oi;
535 shard_result.set_data_digest_mismatch_oi();
536 }
537 }
538 if (auth_oi.is_omap_digest() && candidate.omap_digest_present) {
539 if (auth_oi.omap_digest != candidate.omap_digest) {
540 if (error != CLEAN)
541 errorstream << ", ";
542 error = FOUND_ERROR;
543 errorstream << "omap_digest 0x" << std::hex << candidate.omap_digest
544 << " != omap_digest 0x" << auth_oi.omap_digest << std::dec
545 << " from auth oi " << auth_oi;
546 shard_result.set_omap_digest_mismatch_oi();
547 }
548 }
549 }
550 if (candidate.stat_error)
551 return error == FOUND_ERROR;
552 uint64_t oi_size = be_get_ondisk_size(auth_oi.size);
553 if (oi_size != candidate.size) {
554 if (error != CLEAN)
555 errorstream << ", ";
556 error = FOUND_ERROR;
557 errorstream << "size " << candidate.size
558 << " != size " << oi_size
559 << " from auth oi " << auth_oi;
560 shard_result.set_size_mismatch_oi();
561 }
562 if (auth.size != candidate.size) {
563 if (error != CLEAN)
564 errorstream << ", ";
565 error = FOUND_ERROR;
566 errorstream << "size " << candidate.size
567 << " != size " << auth.size
568 << " from shard " << auth_shard;
569 obj_result.set_size_mismatch();
570 }
571 for (map<string,bufferptr>::const_iterator i = auth.attrs.begin();
572 i != auth.attrs.end();
573 ++i) {
574 if (!candidate.attrs.count(i->first)) {
575 if (error != CLEAN)
576 errorstream << ", ";
577 error = FOUND_ERROR;
578 errorstream << "attr name mismatch '" << i->first << "'";
579 obj_result.set_attr_name_mismatch();
580 } else if (candidate.attrs.find(i->first)->second.cmp(i->second)) {
581 if (error != CLEAN)
582 errorstream << ", ";
583 error = FOUND_ERROR;
584 errorstream << "attr value mismatch '" << i->first << "'";
585 obj_result.set_attr_value_mismatch();
586 }
587 }
588 for (map<string,bufferptr>::const_iterator i = candidate.attrs.begin();
589 i != candidate.attrs.end();
590 ++i) {
591 if (!auth.attrs.count(i->first)) {
592 if (error != CLEAN)
593 errorstream << ", ";
594 error = FOUND_ERROR;
595 errorstream << "attr name mismatch '" << i->first << "'";
596 obj_result.set_attr_name_mismatch();
597 }
598 }
599 return error == FOUND_ERROR;
600 }
601
602 static int dcount(const object_info_t &oi)
603 {
604 int count = 0;
605 if (oi.is_data_digest())
606 count++;
607 if (oi.is_omap_digest())
608 count++;
609 return count;
610 }
611
612 map<pg_shard_t, ScrubMap *>::const_iterator
613 PGBackend::be_select_auth_object(
614 const hobject_t &obj,
615 const map<pg_shard_t,ScrubMap*> &maps,
616 object_info_t *auth_oi,
617 map<pg_shard_t, shard_info_wrapper> &shard_map,
618 inconsistent_obj_wrapper &object_error)
619 {
620 eversion_t auth_version;
621 bufferlist auth_bl;
622
623 map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
624 for (map<pg_shard_t, ScrubMap *>::const_iterator j = maps.begin();
625 j != maps.end();
626 ++j) {
627 map<hobject_t, ScrubMap::object>::iterator i =
628 j->second->objects.find(obj);
629 if (i == j->second->objects.end()) {
630 continue;
631 }
632 string error_string;
633 auto& shard_info = shard_map[j->first];
634 if (i->second.read_error) {
635 shard_info.set_read_error();
636 error_string += " read_error";
637 }
638 if (i->second.ec_hash_mismatch) {
639 shard_info.set_ec_hash_mismatch();
640 error_string += " ec_hash_mismatch";
641 }
642 if (i->second.ec_size_mismatch) {
643 shard_info.set_ec_size_mismatch();
644 error_string += " ec_size_mismatch";
645 }
646
647 object_info_t oi;
648 bufferlist bl;
649 map<string, bufferptr>::iterator k;
650
651 if (i->second.stat_error) {
652 shard_info.set_stat_error();
653 error_string += " stat_error";
654 // With stat_error no further checking
655 // We don't need to also see a missing_object_info_attr
656 goto out;
657 }
658
659 k = i->second.attrs.find(OI_ATTR);
660 if (k == i->second.attrs.end()) {
661 // no object info on object, probably corrupt
662 shard_info.set_oi_attr_missing();
663 error_string += " oi_attr_missing";
664 goto out;
665 }
666 bl.push_back(k->second);
667 try {
668 bufferlist::iterator bliter = bl.begin();
669 ::decode(oi, bliter);
670 } catch (...) {
671 // invalid object info, probably corrupt
672 shard_info.set_oi_attr_corrupted();
673 error_string += " oi_attr_corrupted";
674 goto out;
675 }
676
677 if (auth_version != eversion_t()) {
678 if (!object_error.has_object_info_inconsistency() && !(bl == auth_bl)) {
679 object_error.set_object_info_inconsistency();
680 error_string += " object_info_inconsistency";
681 }
682 }
683
684 // Don't use this particular shard because it won't be able to repair data
685 // XXX: For now we can't pick one shard for repair and another's object info
686 if (i->second.read_error || i->second.ec_hash_mismatch || i->second.ec_size_mismatch)
687 goto out;
688
689 if (auth_version == eversion_t() || oi.version > auth_version ||
690 (oi.version == auth_version && dcount(oi) > dcount(*auth_oi))) {
691 auth = j;
692 *auth_oi = oi;
693 auth_version = oi.version;
694 auth_bl.clear();
695 auth_bl.append(bl);
696 }
697
698 out:
699 // Check error_string because some errors already generated messages
700 if (error_string != "") {
701 dout(10) << __func__ << ": error(s) osd " << j->first
702 << " for obj " << obj
703 << "," << error_string
704 << dendl;
705 }
706 // Keep scanning other shards
707 }
708 dout(10) << __func__ << ": selecting osd " << auth->first
709 << " for obj " << obj
710 << " with oi " << *auth_oi
711 << dendl;
712 return auth;
713 }
714
715 void PGBackend::be_compare_scrubmaps(
716 const map<pg_shard_t,ScrubMap*> &maps,
717 bool repair,
718 map<hobject_t, set<pg_shard_t>> &missing,
719 map<hobject_t, set<pg_shard_t>> &inconsistent,
720 map<hobject_t, list<pg_shard_t>> &authoritative,
721 map<hobject_t, pair<uint32_t,uint32_t>> &missing_digest,
722 int &shallow_errors, int &deep_errors,
723 Scrub::Store *store,
724 const spg_t& pgid,
725 const vector<int> &acting,
726 ostream &errorstream)
727 {
728 map<hobject_t,ScrubMap::object>::const_iterator i;
729 map<pg_shard_t, ScrubMap *>::const_iterator j;
730 set<hobject_t> master_set;
731 utime_t now = ceph_clock_now();
732
733 // Construct master set
734 for (j = maps.begin(); j != maps.end(); ++j) {
735 for (i = j->second->objects.begin(); i != j->second->objects.end(); ++i) {
736 master_set.insert(i->first);
737 }
738 }
739
740 // Check maps against master set and each other
741 for (set<hobject_t>::const_iterator k = master_set.begin();
742 k != master_set.end();
743 ++k) {
744 object_info_t auth_oi;
745 map<pg_shard_t, shard_info_wrapper> shard_map;
746
747 inconsistent_obj_wrapper object_error{*k};
748
749 map<pg_shard_t, ScrubMap *>::const_iterator auth =
750 be_select_auth_object(*k, maps, &auth_oi, shard_map, object_error);
751
752 list<pg_shard_t> auth_list;
753 if (auth == maps.end()) {
754 object_error.set_version(0);
755 object_error.set_auth_missing(*k, maps, shard_map, shallow_errors, deep_errors);
756 if (object_error.has_deep_errors())
757 ++deep_errors;
758 else if (object_error.has_shallow_errors())
759 ++shallow_errors;
760 store->add_object_error(k->pool, object_error);
761 errorstream << pgid.pgid << " soid " << *k
762 << ": failed to pick suitable object info\n";
763 continue;
764 }
765 object_error.set_version(auth_oi.user_version);
766 ScrubMap::object& auth_object = auth->second->objects[*k];
767 set<pg_shard_t> cur_missing;
768 set<pg_shard_t> cur_inconsistent;
769
770 for (j = maps.begin(); j != maps.end(); ++j) {
771 if (j == auth)
772 shard_map[auth->first].selected_oi = true;
773 if (j->second->objects.count(*k)) {
774 shard_map[j->first].set_object(j->second->objects[*k]);
775 // Compare
776 stringstream ss;
777 bool found = be_compare_scrub_objects(auth->first,
778 auth_object,
779 auth_oi,
780 j->second->objects[*k],
781 shard_map[j->first],
782 object_error,
783 ss);
784 // Some errors might have already been set in be_select_auth_object()
785 if (shard_map[j->first].errors != 0) {
786 cur_inconsistent.insert(j->first);
787 if (shard_map[j->first].has_deep_errors())
788 ++deep_errors;
789 else
790 ++shallow_errors;
791 // Only true if be_compare_scrub_objects() found errors and put something
792 // in ss.
793 if (found)
794 errorstream << pgid << " shard " << j->first << ": soid " << *k
795 << " " << ss.str() << "\n";
796 } else {
797 // XXX: The auth shard might get here that we don't know
798 // that it has the "correct" data.
799 auth_list.push_back(j->first);
800 }
801 } else {
802 cur_missing.insert(j->first);
803 shard_map[j->first].set_missing();
804 // Can't have any other errors if there is no information available
805 ++shallow_errors;
806 errorstream << pgid << " shard " << j->first << " missing " << *k
807 << "\n";
808 }
809 object_error.add_shard(j->first, shard_map[j->first]);
810 }
811
812 if (auth_list.empty()) {
813 errorstream << pgid.pgid << " soid " << *k
814 << ": failed to pick suitable auth object\n";
815 goto out;
816 }
817 if (!cur_missing.empty()) {
818 missing[*k] = cur_missing;
819 }
820 if (!cur_inconsistent.empty()) {
821 inconsistent[*k] = cur_inconsistent;
822 }
823 if (!cur_inconsistent.empty() || !cur_missing.empty()) {
824 authoritative[*k] = auth_list;
825 } else if (parent->get_pool().is_replicated()) {
826 enum {
827 NO = 0,
828 MAYBE = 1,
829 FORCE = 2,
830 } update = NO;
831
832 if (auth_object.digest_present && auth_object.omap_digest_present &&
833 (!auth_oi.is_data_digest() || !auth_oi.is_omap_digest())) {
834 dout(20) << __func__ << " missing digest on " << *k << dendl;
835 update = MAYBE;
836 }
837 if (auth_object.digest_present && auth_object.omap_digest_present &&
838 cct->_conf->osd_debug_scrub_chance_rewrite_digest &&
839 (((unsigned)rand() % 100) >
840 cct->_conf->osd_debug_scrub_chance_rewrite_digest)) {
841 dout(20) << __func__ << " randomly updating digest on " << *k << dendl;
842 update = MAYBE;
843 }
844
845 // recorded digest != actual digest?
846 if (auth_oi.is_data_digest() && auth_object.digest_present &&
847 auth_oi.data_digest != auth_object.digest) {
848 assert(shard_map[auth->first].has_data_digest_mismatch_oi());
849 errorstream << pgid << " recorded data digest 0x"
850 << std::hex << auth_oi.data_digest << " != on disk 0x"
851 << auth_object.digest << std::dec << " on " << auth_oi.soid
852 << "\n";
853 if (repair)
854 update = FORCE;
855 }
856 if (auth_oi.is_omap_digest() && auth_object.omap_digest_present &&
857 auth_oi.omap_digest != auth_object.omap_digest) {
858 assert(shard_map[auth->first].has_omap_digest_mismatch_oi());
859 errorstream << pgid << " recorded omap digest 0x"
860 << std::hex << auth_oi.omap_digest << " != on disk 0x"
861 << auth_object.omap_digest << std::dec
862 << " on " << auth_oi.soid << "\n";
863 if (repair)
864 update = FORCE;
865 }
866
867 if (update != NO) {
868 utime_t age = now - auth_oi.local_mtime;
869 if (update == FORCE ||
870 age > cct->_conf->osd_deep_scrub_update_digest_min_age) {
871 dout(20) << __func__ << " will update digest on " << *k << dendl;
872 missing_digest[*k] = make_pair(auth_object.digest,
873 auth_object.omap_digest);
874 } else {
875 dout(20) << __func__ << " missing digest but age " << age
876 << " < " << cct->_conf->osd_deep_scrub_update_digest_min_age
877 << " on " << *k << dendl;
878 }
879 }
880 }
881 out:
882 if (object_error.has_deep_errors())
883 ++deep_errors;
884 else if (object_error.has_shallow_errors())
885 ++shallow_errors;
886 if (object_error.errors || object_error.union_shards.errors) {
887 store->add_object_error(k->pool, object_error);
888 }
889 }
890 }