]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/SnapMapper.cc
Import ceph 15.2.8
[ceph.git] / ceph / src / osd / SnapMapper.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include "SnapMapper.h"
16
17 #define dout_context cct
18 #define dout_subsys ceph_subsys_osd
19 #undef dout_prefix
20 #define dout_prefix *_dout << "snap_mapper."
21
22 using std::string;
23
24 const string SnapMapper::LEGACY_MAPPING_PREFIX = "MAP_";
25 const string SnapMapper::MAPPING_PREFIX = "SNA_";
26 const string SnapMapper::OBJECT_PREFIX = "OBJ_";
27
28 const char *SnapMapper::PURGED_SNAP_PREFIX = "PSN_";
29
30 /*
31
32 We have a bidirectional mapping, (1) from each snap+obj to object,
33 sorted by snapshot, such that we can enumerate to identify all clones
34 mapped to a particular snapshot, and (2) from object to snaps, so we
35 can identify which reverse mappings exist for any given object (and,
36 e.g., clean up on deletion).
37
38 "MAP_"
39 + ("%016x" % snapid)
40 + "_"
41 + (".%x" % shard_id)
42 + "_"
43 + hobject_t::to_str() ("%llx.%8x.%lx.name...." % pool, hash, snap)
44 -> SnapMapping::Mapping { snap, hoid }
45
46 "SNA_"
47 + ("%lld" % poolid)
48 + "_"
49 + ("%016x" % snapid)
50 + "_"
51 + (".%x" % shard_id)
52 + "_"
53 + hobject_t::to_str() ("%llx.%8x.%lx.name...." % pool, hash, snap)
54 -> SnapMapping::Mapping { snap, hoid }
55
56 "OBJ_" +
57 + (".%x" % shard_id)
58 + hobject_t::to_str()
59 -> SnapMapper::object_snaps { oid, set<snapid_t> }
60
61 */
62
63 int OSDriver::get_keys(
64 const std::set<std::string> &keys,
65 std::map<std::string, bufferlist> *out)
66 {
67 return os->omap_get_values(ch, hoid, keys, out);
68 }
69
70 int OSDriver::get_next(
71 const std::string &key,
72 pair<std::string, bufferlist> *next)
73 {
74 ObjectMap::ObjectMapIterator iter =
75 os->get_omap_iterator(ch, hoid);
76 if (!iter) {
77 ceph_abort();
78 return -EINVAL;
79 }
80 iter->upper_bound(key);
81 if (iter->valid()) {
82 if (next)
83 *next = make_pair(iter->key(), iter->value());
84 return 0;
85 } else {
86 return -ENOENT;
87 }
88 }
89
90 string SnapMapper::get_prefix(int64_t pool, snapid_t snap)
91 {
92 char buf[100];
93 int len = snprintf(
94 buf, sizeof(buf),
95 "%lld_%.*X_",
96 (long long)pool,
97 (int)(sizeof(snap)*2), static_cast<unsigned>(snap));
98 return MAPPING_PREFIX + string(buf, len);
99 }
100
101 string SnapMapper::to_raw_key(
102 const pair<snapid_t, hobject_t> &in)
103 {
104 return get_prefix(in.second.pool, in.first) + shard_prefix + in.second.to_str();
105 }
106
107 pair<string, bufferlist> SnapMapper::to_raw(
108 const pair<snapid_t, hobject_t> &in)
109 {
110 bufferlist bl;
111 encode(Mapping(in), bl);
112 return make_pair(
113 to_raw_key(in),
114 bl);
115 }
116
117 pair<snapid_t, hobject_t> SnapMapper::from_raw(
118 const pair<std::string, bufferlist> &image)
119 {
120 using ceph::decode;
121 Mapping map;
122 bufferlist bl(image.second);
123 auto bp = bl.cbegin();
124 decode(map, bp);
125 return make_pair(map.snap, map.hoid);
126 }
127
128 bool SnapMapper::is_mapping(const string &to_test)
129 {
130 return to_test.substr(0, MAPPING_PREFIX.size()) == MAPPING_PREFIX;
131 }
132
133 string SnapMapper::to_object_key(const hobject_t &hoid)
134 {
135 return OBJECT_PREFIX + shard_prefix + hoid.to_str();
136 }
137
138 void SnapMapper::object_snaps::encode(bufferlist &bl) const
139 {
140 ENCODE_START(1, 1, bl);
141 encode(oid, bl);
142 encode(snaps, bl);
143 ENCODE_FINISH(bl);
144 }
145
146 void SnapMapper::object_snaps::decode(bufferlist::const_iterator &bl)
147 {
148 DECODE_START(1, bl);
149 decode(oid, bl);
150 decode(snaps, bl);
151 DECODE_FINISH(bl);
152 }
153
154 bool SnapMapper::check(const hobject_t &hoid) const
155 {
156 if (hoid.match(mask_bits, match)) {
157 return true;
158 }
159 derr << __func__ << " " << hoid << " mask_bits " << mask_bits
160 << " match 0x" << std::hex << match << std::dec << " is false"
161 << dendl;
162 return false;
163 }
164
165 int SnapMapper::get_snaps(
166 const hobject_t &oid,
167 object_snaps *out)
168 {
169 ceph_assert(check(oid));
170 set<string> keys;
171 map<string, bufferlist> got;
172 keys.insert(to_object_key(oid));
173 int r = backend.get_keys(keys, &got);
174 if (r < 0) {
175 dout(20) << __func__ << " " << oid << " got err " << r << dendl;
176 return r;
177 }
178 if (got.empty()) {
179 dout(20) << __func__ << " " << oid << " got.empty()" << dendl;
180 return -ENOENT;
181 }
182 if (out) {
183 auto bp = got.begin()->second.cbegin();
184 decode(*out, bp);
185 dout(20) << __func__ << " " << oid << " " << out->snaps << dendl;
186 if (out->snaps.empty()) {
187 dout(1) << __func__ << " " << oid << " empty snapset" << dendl;
188 ceph_assert(!cct->_conf->osd_debug_verify_snaps);
189 }
190 } else {
191 dout(20) << __func__ << " " << oid << " (out == NULL)" << dendl;
192 }
193 return 0;
194 }
195
196 void SnapMapper::clear_snaps(
197 const hobject_t &oid,
198 MapCacher::Transaction<std::string, bufferlist> *t)
199 {
200 dout(20) << __func__ << " " << oid << dendl;
201 ceph_assert(check(oid));
202 set<string> to_remove;
203 to_remove.insert(to_object_key(oid));
204 if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) {
205 for (auto& i : to_remove) {
206 dout(20) << __func__ << " rm " << i << dendl;
207 }
208 }
209 backend.remove_keys(to_remove, t);
210 }
211
212 void SnapMapper::set_snaps(
213 const hobject_t &oid,
214 const object_snaps &in,
215 MapCacher::Transaction<std::string, bufferlist> *t)
216 {
217 ceph_assert(check(oid));
218 map<string, bufferlist> to_set;
219 bufferlist bl;
220 encode(in, bl);
221 to_set[to_object_key(oid)] = bl;
222 dout(20) << __func__ << " " << oid << " " << in.snaps << dendl;
223 if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) {
224 for (auto& i : to_set) {
225 dout(20) << __func__ << " set " << i.first << dendl;
226 }
227 }
228 backend.set_keys(to_set, t);
229 }
230
231 int SnapMapper::update_snaps(
232 const hobject_t &oid,
233 const set<snapid_t> &new_snaps,
234 const set<snapid_t> *old_snaps_check,
235 MapCacher::Transaction<std::string, bufferlist> *t)
236 {
237 dout(20) << __func__ << " " << oid << " " << new_snaps
238 << " was " << (old_snaps_check ? *old_snaps_check : set<snapid_t>())
239 << dendl;
240 ceph_assert(check(oid));
241 if (new_snaps.empty())
242 return remove_oid(oid, t);
243
244 object_snaps out;
245 int r = get_snaps(oid, &out);
246 // Tolerate missing keys but not disk errors
247 if (r < 0 && r != -ENOENT)
248 return r;
249 if (old_snaps_check)
250 ceph_assert(out.snaps == *old_snaps_check);
251
252 object_snaps in(oid, new_snaps);
253 set_snaps(oid, in, t);
254
255 set<string> to_remove;
256 for (set<snapid_t>::iterator i = out.snaps.begin();
257 i != out.snaps.end();
258 ++i) {
259 if (!new_snaps.count(*i)) {
260 to_remove.insert(to_raw_key(make_pair(*i, oid)));
261 }
262 }
263 if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) {
264 for (auto& i : to_remove) {
265 dout(20) << __func__ << " rm " << i << dendl;
266 }
267 }
268 backend.remove_keys(to_remove, t);
269 return 0;
270 }
271
272 void SnapMapper::add_oid(
273 const hobject_t &oid,
274 const set<snapid_t>& snaps,
275 MapCacher::Transaction<std::string, bufferlist> *t)
276 {
277 dout(20) << __func__ << " " << oid << " " << snaps << dendl;
278 ceph_assert(!snaps.empty());
279 ceph_assert(check(oid));
280 {
281 object_snaps out;
282 int r = get_snaps(oid, &out);
283 if (r != -ENOENT) {
284 derr << __func__ << " found existing snaps mapped on " << oid
285 << ", removing" << dendl;
286 ceph_assert(!cct->_conf->osd_debug_verify_snaps);
287 remove_oid(oid, t);
288 }
289 }
290
291 object_snaps _snaps(oid, snaps);
292 set_snaps(oid, _snaps, t);
293
294 map<string, bufferlist> to_add;
295 for (set<snapid_t>::iterator i = snaps.begin();
296 i != snaps.end();
297 ++i) {
298 to_add.insert(to_raw(make_pair(*i, oid)));
299 }
300 if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) {
301 for (auto& i : to_add) {
302 dout(20) << __func__ << " set " << i.first << dendl;
303 }
304 }
305 backend.set_keys(to_add, t);
306 }
307
308 int SnapMapper::get_next_objects_to_trim(
309 snapid_t snap,
310 unsigned max,
311 vector<hobject_t> *out)
312 {
313 ceph_assert(out);
314 ceph_assert(out->empty());
315 int r = 0;
316 for (set<string>::iterator i = prefixes.begin();
317 i != prefixes.end() && out->size() < max && r == 0;
318 ++i) {
319 string prefix(get_prefix(pool, snap) + *i);
320 string pos = prefix;
321 while (out->size() < max) {
322 pair<string, bufferlist> next;
323 r = backend.get_next(pos, &next);
324 dout(20) << __func__ << " get_next(" << pos << ") returns " << r
325 << " " << next << dendl;
326 if (r != 0) {
327 break; // Done
328 }
329
330 if (next.first.substr(0, prefix.size()) !=
331 prefix) {
332 break; // Done with this prefix
333 }
334
335 ceph_assert(is_mapping(next.first));
336
337 dout(20) << __func__ << " " << next.first << dendl;
338 pair<snapid_t, hobject_t> next_decoded(from_raw(next));
339 ceph_assert(next_decoded.first == snap);
340 ceph_assert(check(next_decoded.second));
341
342 out->push_back(next_decoded.second);
343 pos = next.first;
344 }
345 }
346 if (out->size() == 0) {
347 return -ENOENT;
348 } else {
349 return 0;
350 }
351 }
352
353
354 int SnapMapper::remove_oid(
355 const hobject_t &oid,
356 MapCacher::Transaction<std::string, bufferlist> *t)
357 {
358 dout(20) << __func__ << " " << oid << dendl;
359 ceph_assert(check(oid));
360 return _remove_oid(oid, t);
361 }
362
363 int SnapMapper::_remove_oid(
364 const hobject_t &oid,
365 MapCacher::Transaction<std::string, bufferlist> *t)
366 {
367 dout(20) << __func__ << " " << oid << dendl;
368 object_snaps out;
369 int r = get_snaps(oid, &out);
370 if (r < 0)
371 return r;
372
373 clear_snaps(oid, t);
374
375 set<string> to_remove;
376 for (set<snapid_t>::iterator i = out.snaps.begin();
377 i != out.snaps.end();
378 ++i) {
379 to_remove.insert(to_raw_key(make_pair(*i, oid)));
380 }
381 if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) {
382 for (auto& i : to_remove) {
383 dout(20) << __func__ << " rm " << i << dendl;
384 }
385 }
386 backend.remove_keys(to_remove, t);
387 return 0;
388 }
389
390 int SnapMapper::get_snaps(
391 const hobject_t &oid,
392 std::set<snapid_t> *snaps)
393 {
394 ceph_assert(check(oid));
395 object_snaps out;
396 int r = get_snaps(oid, &out);
397 if (r < 0)
398 return r;
399 if (snaps)
400 snaps->swap(out.snaps);
401 return 0;
402 }
403
404
405 // -- purged snaps --
406
407 string SnapMapper::make_purged_snap_key(int64_t pool, snapid_t last)
408 {
409 char k[80];
410 snprintf(k, sizeof(k), "%s_%llu_%016llx", PURGED_SNAP_PREFIX,
411 (unsigned long long)pool, (unsigned long long)last);
412 return k;
413 }
414
415 void SnapMapper::make_purged_snap_key_value(
416 int64_t pool, snapid_t begin, snapid_t end, map<string,bufferlist> *m)
417 {
418 string k = make_purged_snap_key(pool, end - 1);
419 auto& v = (*m)[k];
420 ceph::encode(pool, v);
421 ceph::encode(begin, v);
422 ceph::encode(end, v);
423 }
424
425 int SnapMapper::_lookup_purged_snap(
426 CephContext *cct,
427 ObjectStore *store,
428 ObjectStore::CollectionHandle& ch,
429 const ghobject_t& hoid,
430 int64_t pool, snapid_t snap,
431 snapid_t *begin, snapid_t *end)
432 {
433 string k = make_purged_snap_key(pool, snap);
434 auto it = store->get_omap_iterator(ch, hoid);
435 it->lower_bound(k);
436 if (!it->valid()) {
437 dout(20) << __func__ << " pool " << pool << " snap " << snap
438 << " key '" << k << "' lower_bound not found" << dendl;
439 return -ENOENT;
440 }
441 if (it->key().find(PURGED_SNAP_PREFIX) != 0) {
442 dout(20) << __func__ << " pool " << pool << " snap " << snap
443 << " key '" << k << "' lower_bound got mismatched prefix '"
444 << it->key() << "'" << dendl;
445 return -ENOENT;
446 }
447 bufferlist v = it->value();
448 auto p = v.cbegin();
449 int64_t gotpool;
450 decode(gotpool, p);
451 decode(*begin, p);
452 decode(*end, p);
453 if (snap < *begin || snap >= *end) {
454 dout(20) << __func__ << " pool " << pool << " snap " << snap
455 << " found [" << *begin << "," << *end << "), no overlap" << dendl;
456 return -ENOENT;
457 }
458 return 0;
459 }
460
461 void SnapMapper::record_purged_snaps(
462 CephContext *cct,
463 ObjectStore *store,
464 ObjectStore::CollectionHandle& ch,
465 ghobject_t hoid,
466 ObjectStore::Transaction *t,
467 map<epoch_t,mempool::osdmap::map<int64_t,snap_interval_set_t>> purged_snaps)
468 {
469 dout(10) << __func__ << " purged_snaps " << purged_snaps << dendl;
470 map<string,bufferlist> m;
471 set<string> rm;
472 for (auto& [epoch, bypool] : purged_snaps) {
473 // index by (pool, snap)
474 for (auto& [pool, snaps] : bypool) {
475 for (auto i = snaps.begin();
476 i != snaps.end();
477 ++i) {
478 snapid_t begin = i.get_start();
479 snapid_t end = i.get_end();
480 snapid_t before_begin, before_end;
481 snapid_t after_begin, after_end;
482 int b = _lookup_purged_snap(cct, store, ch, hoid,
483 pool, begin - 1, &before_begin, &before_end);
484 int a = _lookup_purged_snap(cct, store, ch, hoid,
485 pool, end, &after_begin, &after_end);
486 if (!b && !a) {
487 dout(10) << __func__
488 << " [" << begin << "," << end << ") - joins ["
489 << before_begin << "," << before_end << ") and ["
490 << after_begin << "," << after_end << ")" << dendl;
491 // erase only the begin record; we'll overwrite the end one
492 rm.insert(make_purged_snap_key(pool, before_end - 1));
493 make_purged_snap_key_value(pool, before_begin, after_end, &m);
494 } else if (!b) {
495 dout(10) << __func__
496 << " [" << begin << "," << end << ") - join with earlier ["
497 << before_begin << "," << before_end << ")" << dendl;
498 rm.insert(make_purged_snap_key(pool, before_end - 1));
499 make_purged_snap_key_value(pool, before_begin, end, &m);
500 } else if (!a) {
501 dout(10) << __func__
502 << " [" << begin << "," << end << ") - join with later ["
503 << after_begin << "," << after_end << ")" << dendl;
504 // overwrite after record
505 make_purged_snap_key_value(pool, begin, after_end, &m);
506 } else {
507 make_purged_snap_key_value(pool, begin, end, &m);
508 }
509 }
510 }
511 }
512 t->omap_rmkeys(ch->cid, hoid, rm);
513 t->omap_setkeys(ch->cid, hoid, m);
514 dout(10) << __func__ << " rm " << rm.size() << " keys, set " << m.size()
515 << " keys" << dendl;
516 }
517
518
519 bool SnapMapper::Scrubber::_parse_p()
520 {
521 if (!psit->valid()) {
522 pool = -1;
523 return false;
524 }
525 if (psit->key().find(PURGED_SNAP_PREFIX) != 0) {
526 pool = -1;
527 return false;
528 }
529 bufferlist v = psit->value();
530 auto p = v.cbegin();
531 ceph::decode(pool, p);
532 ceph::decode(begin, p);
533 ceph::decode(end, p);
534 dout(20) << __func__ << " purged_snaps pool " << pool
535 << " [" << begin << "," << end << ")" << dendl;
536 psit->next();
537 return true;
538 }
539
540 bool SnapMapper::Scrubber::_parse_m()
541 {
542 if (!mapit->valid()) {
543 return false;
544 }
545 if (mapit->key().find(MAPPING_PREFIX) != 0) {
546 return false;
547 }
548 auto v = mapit->value();
549 auto p = v.cbegin();
550 mapping.decode(p);
551
552 {
553 unsigned long long p, s;
554 long sh;
555 string k = mapit->key();
556 int r = sscanf(k.c_str(), "SNA_%lld_%llx.%lx", &p, &s, &sh);
557 if (r != 1) {
558 shard = shard_id_t::NO_SHARD;
559 } else {
560 shard = shard_id_t(sh);
561 }
562 }
563 dout(20) << __func__ << " mapping pool " << mapping.hoid.pool
564 << " snap " << mapping.snap
565 << " shard " << shard
566 << " " << mapping.hoid << dendl;
567 mapit->next();
568 return true;
569 }
570
571 void SnapMapper::Scrubber::run()
572 {
573 dout(10) << __func__ << dendl;
574
575 psit = store->get_omap_iterator(ch, purged_snaps_hoid);
576 psit->upper_bound(PURGED_SNAP_PREFIX);
577 _parse_p();
578
579 mapit = store->get_omap_iterator(ch, mapping_hoid);
580 mapit->upper_bound(MAPPING_PREFIX);
581
582 while (_parse_m()) {
583 // advance to next purged_snaps range?
584 while (pool >= 0 &&
585 (mapping.hoid.pool > pool ||
586 (mapping.hoid.pool == pool && mapping.snap >= end))) {
587 _parse_p();
588 }
589 if (pool < 0) {
590 dout(10) << __func__ << " passed final purged_snaps interval, rest ok"
591 << dendl;
592 break;
593 }
594 if (mapping.hoid.pool < pool ||
595 mapping.snap < begin) {
596 // ok
597 dout(20) << __func__ << " ok " << mapping.hoid
598 << " snap " << mapping.snap
599 << " precedes pool " << pool
600 << " purged_snaps [" << begin << "," << end << ")" << dendl;
601 } else {
602 assert(mapping.snap >= begin &&
603 mapping.snap < end &&
604 mapping.hoid.pool == pool);
605 // invalid
606 dout(10) << __func__ << " stray " << mapping.hoid
607 << " snap " << mapping.snap
608 << " in pool " << pool
609 << " shard " << shard
610 << " purged_snaps [" << begin << "," << end << ")" << dendl;
611 stray.emplace_back(std::tuple<int64_t,snapid_t,uint32_t,shard_id_t>(
612 pool, mapping.snap, mapping.hoid.get_hash(),
613 shard
614 ));
615 }
616 }
617
618 dout(10) << __func__ << " end, found " << stray.size() << " stray" << dendl;
619 psit = ObjectMap::ObjectMapIterator();
620 mapit = ObjectMap::ObjectMapIterator();
621 }
622
623
624 // -------------------------------------
625 // legacy conversion/support
626
627 string SnapMapper::get_legacy_prefix(snapid_t snap)
628 {
629 char buf[100];
630 int len = snprintf(
631 buf, sizeof(buf),
632 "%.*X_",
633 (int)(sizeof(snap)*2), static_cast<unsigned>(snap));
634 return LEGACY_MAPPING_PREFIX + string(buf, len);
635 }
636
637 string SnapMapper::to_legacy_raw_key(
638 const pair<snapid_t, hobject_t> &in)
639 {
640 return get_legacy_prefix(in.first) + shard_prefix + in.second.to_str();
641 }
642
643 bool SnapMapper::is_legacy_mapping(const string &to_test)
644 {
645 return to_test.substr(0, LEGACY_MAPPING_PREFIX.size()) ==
646 LEGACY_MAPPING_PREFIX;
647 }
648
649 int SnapMapper::convert_legacy(
650 CephContext *cct,
651 ObjectStore *store,
652 ObjectStore::CollectionHandle& ch,
653 ghobject_t hoid,
654 unsigned max)
655 {
656 uint64_t n = 0;
657
658 ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, hoid);
659 if (!iter) {
660 return -EIO;
661 }
662
663 auto start = ceph::mono_clock::now();
664
665 iter->upper_bound(SnapMapper::LEGACY_MAPPING_PREFIX);
666 map<string,bufferlist> to_set;
667 while (iter->valid()) {
668 bool valid = SnapMapper::is_legacy_mapping(iter->key());
669 if (valid) {
670 SnapMapper::Mapping m;
671 bufferlist bl(iter->value());
672 auto bp = bl.cbegin();
673 decode(m, bp);
674 to_set.emplace(
675 SnapMapper::get_prefix(m.hoid.pool, m.snap),
676 bl);
677 ++n;
678 iter->next();
679 }
680 if (!valid || !iter->valid() || to_set.size() >= max) {
681 ObjectStore::Transaction t;
682 t.omap_setkeys(ch->cid, hoid, to_set);
683 int r = store->queue_transaction(ch, std::move(t));
684 ceph_assert(r == 0);
685 to_set.clear();
686 if (!valid) {
687 break;
688 }
689 dout(10) << __func__ << " converted " << n << " keys" << dendl;
690 }
691 }
692
693 auto end = ceph::mono_clock::now();
694
695 dout(1) << __func__ << " converted " << n << " keys in "
696 << timespan_str(end - start) << dendl;
697
698 // remove the old keys
699 {
700 ObjectStore::Transaction t;
701 string end = SnapMapper::LEGACY_MAPPING_PREFIX;
702 ++end[end.size()-1]; // turn _ to whatever comes after _
703 t.omap_rmkeyrange(ch->cid, hoid,
704 SnapMapper::LEGACY_MAPPING_PREFIX,
705 end);
706 int r = store->queue_transaction(ch, std::move(t));
707 ceph_assert(r == 0);
708 }
709 return 0;
710 }