]> git.proxmox.com Git - ceph.git/blame - ceph/src/osd/osd_types.cc
update sources to v12.1.2
[ceph.git] / ceph / src / osd / osd_types.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2011 New Dream Network
7 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License version 2.1, as published by the Free Software
14 * Foundation. See file COPYING.
15 *
16 */
17
18#include <boost/assign/list_of.hpp>
19
20#include "osd_types.h"
21#include "include/ceph_features.h"
22extern "C" {
23#include "crush/hash.h"
24}
25#include "PG.h"
26#include "OSDMap.h"
27#include "PGBackend.h"
28
29const char *ceph_osd_flag_name(unsigned flag)
30{
31 switch (flag) {
32 case CEPH_OSD_FLAG_ACK: return "ack";
33 case CEPH_OSD_FLAG_ONNVRAM: return "onnvram";
34 case CEPH_OSD_FLAG_ONDISK: return "ondisk";
35 case CEPH_OSD_FLAG_RETRY: return "retry";
36 case CEPH_OSD_FLAG_READ: return "read";
37 case CEPH_OSD_FLAG_WRITE: return "write";
38 case CEPH_OSD_FLAG_ORDERSNAP: return "ordersnap";
39 case CEPH_OSD_FLAG_PEERSTAT_OLD: return "peerstat_old";
40 case CEPH_OSD_FLAG_BALANCE_READS: return "balance_reads";
41 case CEPH_OSD_FLAG_PARALLELEXEC: return "parallelexec";
42 case CEPH_OSD_FLAG_PGOP: return "pgop";
43 case CEPH_OSD_FLAG_EXEC: return "exec";
44 case CEPH_OSD_FLAG_EXEC_PUBLIC: return "exec_public";
45 case CEPH_OSD_FLAG_LOCALIZE_READS: return "localize_reads";
46 case CEPH_OSD_FLAG_RWORDERED: return "rwordered";
47 case CEPH_OSD_FLAG_IGNORE_CACHE: return "ignore_cache";
48 case CEPH_OSD_FLAG_SKIPRWLOCKS: return "skiprwlocks";
49 case CEPH_OSD_FLAG_IGNORE_OVERLAY: return "ignore_overlay";
50 case CEPH_OSD_FLAG_FLUSH: return "flush";
51 case CEPH_OSD_FLAG_MAP_SNAP_CLONE: return "map_snap_clone";
52 case CEPH_OSD_FLAG_ENFORCE_SNAPC: return "enforce_snapc";
53 case CEPH_OSD_FLAG_REDIRECTED: return "redirected";
54 case CEPH_OSD_FLAG_KNOWN_REDIR: return "known_if_redirected";
55 case CEPH_OSD_FLAG_FULL_TRY: return "full_try";
56 case CEPH_OSD_FLAG_FULL_FORCE: return "full_force";
224ce89b 57 case CEPH_OSD_FLAG_IGNORE_REDIRECT: return "ignore_redirect";
7c673cae
FG
58 default: return "???";
59 }
60}
61
62string ceph_osd_flag_string(unsigned flags)
63{
64 string s;
65 for (unsigned i=0; i<32; ++i) {
66 if (flags & (1u<<i)) {
67 if (s.length())
68 s += "+";
69 s += ceph_osd_flag_name(1u << i);
70 }
71 }
72 if (s.length())
73 return s;
74 return string("-");
75}
76
77const char * ceph_osd_op_flag_name(unsigned flag)
78{
79 const char *name;
80
81 switch(flag) {
82 case CEPH_OSD_OP_FLAG_EXCL:
83 name = "excl";
84 break;
85 case CEPH_OSD_OP_FLAG_FAILOK:
86 name = "failok";
87 break;
88 case CEPH_OSD_OP_FLAG_FADVISE_RANDOM:
89 name = "fadvise_random";
90 break;
91 case CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL:
92 name = "fadvise_sequential";
93 break;
94 case CEPH_OSD_OP_FLAG_FADVISE_WILLNEED:
95 name = "favise_willneed";
96 break;
97 case CEPH_OSD_OP_FLAG_FADVISE_DONTNEED:
98 name = "fadvise_dontneed";
99 break;
100 case CEPH_OSD_OP_FLAG_FADVISE_NOCACHE:
101 name = "fadvise_nocache";
102 break;
103 default:
104 name = "???";
105 };
106
107 return name;
108}
109
110string ceph_osd_op_flag_string(unsigned flags)
111{
112 string s;
113 for (unsigned i=0; i<32; ++i) {
114 if (flags & (1u<<i)) {
115 if (s.length())
116 s += "+";
117 s += ceph_osd_op_flag_name(1u << i);
118 }
119 }
120 if (s.length())
121 return s;
122 return string("-");
123}
124
125string ceph_osd_alloc_hint_flag_string(unsigned flags)
126{
127 string s;
128 for (unsigned i=0; i<32; ++i) {
129 if (flags & (1u<<i)) {
130 if (s.length())
131 s += "+";
132 s += ceph_osd_alloc_hint_flag_name(1u << i);
133 }
134 }
135 if (s.length())
136 return s;
137 return string("-");
138}
139
140void pg_shard_t::encode(bufferlist &bl) const
141{
142 ENCODE_START(1, 1, bl);
143 ::encode(osd, bl);
144 ::encode(shard, bl);
145 ENCODE_FINISH(bl);
146}
147void pg_shard_t::decode(bufferlist::iterator &bl)
148{
149 DECODE_START(1, bl);
150 ::decode(osd, bl);
151 ::decode(shard, bl);
152 DECODE_FINISH(bl);
153}
154
155ostream &operator<<(ostream &lhs, const pg_shard_t &rhs)
156{
157 if (rhs.is_undefined())
158 return lhs << "?";
159 if (rhs.shard == shard_id_t::NO_SHARD)
160 return lhs << rhs.osd;
161 return lhs << rhs.osd << '(' << (unsigned)(rhs.shard) << ')';
162}
163
164// -- osd_reqid_t --
165void osd_reqid_t::dump(Formatter *f) const
166{
167 f->dump_stream("name") << name;
168 f->dump_int("inc", inc);
169 f->dump_unsigned("tid", tid);
170}
171
172void osd_reqid_t::generate_test_instances(list<osd_reqid_t*>& o)
173{
174 o.push_back(new osd_reqid_t);
175 o.push_back(new osd_reqid_t(entity_name_t::CLIENT(123), 1, 45678));
176}
177
178// -- object_locator_t --
179
180void object_locator_t::encode(bufferlist& bl) const
181{
182 // verify that nobody's corrupted the locator
183 assert(hash == -1 || key.empty());
184 __u8 encode_compat = 3;
185 ENCODE_START(6, encode_compat, bl);
186 ::encode(pool, bl);
187 int32_t preferred = -1; // tell old code there is no preferred osd (-1).
188 ::encode(preferred, bl);
189 ::encode(key, bl);
190 ::encode(nspace, bl);
191 ::encode(hash, bl);
192 if (hash != -1)
193 encode_compat = MAX(encode_compat, 6); // need to interpret the hash
194 ENCODE_FINISH_NEW_COMPAT(bl, encode_compat);
195}
196
197void object_locator_t::decode(bufferlist::iterator& p)
198{
199 DECODE_START_LEGACY_COMPAT_LEN(6, 3, 3, p);
200 if (struct_v < 2) {
201 int32_t op;
202 ::decode(op, p);
203 pool = op;
204 int16_t pref;
205 ::decode(pref, p);
206 } else {
207 ::decode(pool, p);
208 int32_t preferred;
209 ::decode(preferred, p);
210 }
211 ::decode(key, p);
212 if (struct_v >= 5)
213 ::decode(nspace, p);
214 if (struct_v >= 6)
215 ::decode(hash, p);
216 else
217 hash = -1;
218 DECODE_FINISH(p);
219 // verify that nobody's corrupted the locator
220 assert(hash == -1 || key.empty());
221}
222
223void object_locator_t::dump(Formatter *f) const
224{
225 f->dump_int("pool", pool);
226 f->dump_string("key", key);
227 f->dump_string("namespace", nspace);
228 f->dump_int("hash", hash);
229}
230
231void object_locator_t::generate_test_instances(list<object_locator_t*>& o)
232{
233 o.push_back(new object_locator_t);
234 o.push_back(new object_locator_t(123));
235 o.push_back(new object_locator_t(123, 876));
236 o.push_back(new object_locator_t(1, "n2"));
237 o.push_back(new object_locator_t(1234, "", "key"));
238 o.push_back(new object_locator_t(12, "n1", "key2"));
239}
240
241// -- request_redirect_t --
242void request_redirect_t::encode(bufferlist& bl) const
243{
244 ENCODE_START(1, 1, bl);
245 ::encode(redirect_locator, bl);
246 ::encode(redirect_object, bl);
247 ::encode(osd_instructions, bl);
248 ENCODE_FINISH(bl);
249}
250
251void request_redirect_t::decode(bufferlist::iterator& bl)
252{
253 DECODE_START(1, bl);
254 ::decode(redirect_locator, bl);
255 ::decode(redirect_object, bl);
256 ::decode(osd_instructions, bl);
257 DECODE_FINISH(bl);
258}
259
260void request_redirect_t::dump(Formatter *f) const
261{
262 f->dump_string("object", redirect_object);
263 f->open_object_section("locator");
264 redirect_locator.dump(f);
265 f->close_section(); // locator
266}
267
268void request_redirect_t::generate_test_instances(list<request_redirect_t*>& o)
269{
270 object_locator_t loc(1, "redir_obj");
271 o.push_back(new request_redirect_t());
272 o.push_back(new request_redirect_t(loc, 0));
273 o.push_back(new request_redirect_t(loc, "redir_obj"));
274 o.push_back(new request_redirect_t(loc));
275}
276
277void objectstore_perf_stat_t::dump(Formatter *f) const
278{
279 f->dump_unsigned("commit_latency_ms", os_commit_latency);
280 f->dump_unsigned("apply_latency_ms", os_apply_latency);
281}
282
283void objectstore_perf_stat_t::encode(bufferlist &bl) const
284{
285 ENCODE_START(1, 1, bl);
286 ::encode(os_commit_latency, bl);
287 ::encode(os_apply_latency, bl);
288 ENCODE_FINISH(bl);
289}
290
291void objectstore_perf_stat_t::decode(bufferlist::iterator &bl)
292{
293 DECODE_START(1, bl);
294 ::decode(os_commit_latency, bl);
295 ::decode(os_apply_latency, bl);
296 DECODE_FINISH(bl);
297}
298
299void objectstore_perf_stat_t::generate_test_instances(std::list<objectstore_perf_stat_t*>& o)
300{
301 o.push_back(new objectstore_perf_stat_t());
302 o.push_back(new objectstore_perf_stat_t());
303 o.back()->os_commit_latency = 20;
304 o.back()->os_apply_latency = 30;
305}
306
307// -- osd_stat_t --
308void osd_stat_t::dump(Formatter *f) const
309{
31f18b77
FG
310 f->dump_unsigned("up_from", up_from);
311 f->dump_unsigned("seq", seq);
7c673cae
FG
312 f->dump_unsigned("kb", kb);
313 f->dump_unsigned("kb_used", kb_used);
314 f->dump_unsigned("kb_avail", kb_avail);
315 f->open_array_section("hb_peers");
316 for (auto p : hb_peers)
317 f->dump_int("osd", p);
318 f->close_section();
319 f->dump_int("snap_trim_queue_len", snap_trim_queue_len);
320 f->dump_int("num_snap_trimming", num_snap_trimming);
321 f->open_object_section("op_queue_age_hist");
322 op_queue_age_hist.dump(f);
323 f->close_section();
324 f->open_object_section("perf_stat");
325 os_perf_stat.dump(f);
326 f->close_section();
327}
328
329void osd_stat_t::encode(bufferlist &bl) const
330{
31f18b77 331 ENCODE_START(6, 2, bl);
7c673cae
FG
332 ::encode(kb, bl);
333 ::encode(kb_used, bl);
334 ::encode(kb_avail, bl);
335 ::encode(snap_trim_queue_len, bl);
336 ::encode(num_snap_trimming, bl);
337 ::encode(hb_peers, bl);
338 ::encode((uint32_t)0, bl);
339 ::encode(op_queue_age_hist, bl);
340 ::encode(os_perf_stat, bl);
31f18b77
FG
341 ::encode(up_from, bl);
342 ::encode(seq, bl);
7c673cae
FG
343 ENCODE_FINISH(bl);
344}
345
346void osd_stat_t::decode(bufferlist::iterator &bl)
347{
31f18b77 348 DECODE_START_LEGACY_COMPAT_LEN(6, 2, 2, bl);
7c673cae
FG
349 ::decode(kb, bl);
350 ::decode(kb_used, bl);
351 ::decode(kb_avail, bl);
352 ::decode(snap_trim_queue_len, bl);
353 ::decode(num_snap_trimming, bl);
354 ::decode(hb_peers, bl);
355 vector<int> num_hb_out;
356 ::decode(num_hb_out, bl);
357 if (struct_v >= 3)
358 ::decode(op_queue_age_hist, bl);
359 if (struct_v >= 4)
360 ::decode(os_perf_stat, bl);
31f18b77
FG
361 if (struct_v >= 6) {
362 ::decode(up_from, bl);
363 ::decode(seq, bl);
364 }
7c673cae
FG
365 DECODE_FINISH(bl);
366}
367
368void osd_stat_t::generate_test_instances(std::list<osd_stat_t*>& o)
369{
370 o.push_back(new osd_stat_t);
371
372 o.push_back(new osd_stat_t);
373 o.back()->kb = 1;
374 o.back()->kb_used = 2;
375 o.back()->kb_avail = 3;
376 o.back()->hb_peers.push_back(7);
377 o.back()->snap_trim_queue_len = 8;
378 o.back()->num_snap_trimming = 99;
379}
380
381// -- pg_t --
382
383int pg_t::print(char *o, int maxlen) const
384{
385 if (preferred() >= 0)
386 return snprintf(o, maxlen, "%llu.%xp%d", (unsigned long long)pool(), ps(), preferred());
387 else
388 return snprintf(o, maxlen, "%llu.%x", (unsigned long long)pool(), ps());
389}
390
391bool pg_t::parse(const char *s)
392{
393 uint64_t ppool;
394 uint32_t pseed;
395 int32_t pref;
396 int r = sscanf(s, "%llu.%xp%d", (long long unsigned *)&ppool, &pseed, &pref);
397 if (r < 2)
398 return false;
399 m_pool = ppool;
400 m_seed = pseed;
401 if (r == 3)
402 m_preferred = pref;
403 else
404 m_preferred = -1;
405 return true;
406}
407
408bool spg_t::parse(const char *s)
409{
410 pgid.set_preferred(-1);
411 shard = shard_id_t::NO_SHARD;
412 uint64_t ppool;
413 uint32_t pseed;
414 int32_t pref;
415 uint32_t pshard;
416 int r = sscanf(s, "%llu.%x", (long long unsigned *)&ppool, &pseed);
417 if (r < 2)
418 return false;
419 pgid.set_pool(ppool);
420 pgid.set_ps(pseed);
421
422 const char *p = strchr(s, 'p');
423 if (p) {
424 r = sscanf(p, "p%d", &pref);
425 if (r == 1) {
426 pgid.set_preferred(pref);
427 } else {
428 return false;
429 }
430 }
431
432 p = strchr(s, 's');
433 if (p) {
434 r = sscanf(p, "s%d", &pshard);
435 if (r == 1) {
436 shard = shard_id_t(pshard);
437 } else {
438 return false;
439 }
440 }
441 return true;
442}
443
444char *spg_t::calc_name(char *buf, const char *suffix_backwords) const
445{
446 while (*suffix_backwords)
447 *--buf = *suffix_backwords++;
448
449 if (!is_no_shard()) {
450 buf = ritoa<uint8_t, 10>((uint8_t)shard.id, buf);
451 *--buf = 's';
452 }
453
454 return pgid.calc_name(buf, "");
455}
456
457ostream& operator<<(ostream& out, const spg_t &pg)
458{
459 char buf[spg_t::calc_name_buf_size];
460 buf[spg_t::calc_name_buf_size - 1] = '\0';
461 out << pg.calc_name(buf + spg_t::calc_name_buf_size - 1, "");
462 return out;
463}
464
465pg_t pg_t::get_ancestor(unsigned old_pg_num) const
466{
467 int old_bits = cbits(old_pg_num);
468 int old_mask = (1 << old_bits) - 1;
469 pg_t ret = *this;
470 ret.m_seed = ceph_stable_mod(m_seed, old_pg_num, old_mask);
471 return ret;
472}
473
474bool pg_t::is_split(unsigned old_pg_num, unsigned new_pg_num, set<pg_t> *children) const
475{
476 assert(m_seed < old_pg_num);
477 if (new_pg_num <= old_pg_num)
478 return false;
479
480 bool split = false;
481 if (true) {
482 unsigned old_bits = cbits(old_pg_num);
483 unsigned old_mask = (1 << old_bits) - 1;
484 for (unsigned n = 1; ; n++) {
485 unsigned next_bit = (n << (old_bits-1));
486 unsigned s = next_bit | m_seed;
487
488 if (s < old_pg_num || s == m_seed)
489 continue;
490 if (s >= new_pg_num)
491 break;
492 if ((unsigned)ceph_stable_mod(s, old_pg_num, old_mask) == m_seed) {
493 split = true;
494 if (children)
495 children->insert(pg_t(s, m_pool, m_preferred));
496 }
497 }
498 }
499 if (false) {
500 // brute force
501 int old_bits = cbits(old_pg_num);
502 int old_mask = (1 << old_bits) - 1;
503 for (unsigned x = old_pg_num; x < new_pg_num; ++x) {
504 unsigned o = ceph_stable_mod(x, old_pg_num, old_mask);
505 if (o == m_seed) {
506 split = true;
507 children->insert(pg_t(x, m_pool, m_preferred));
508 }
509 }
510 }
511 return split;
512}
513
514unsigned pg_t::get_split_bits(unsigned pg_num) const {
515 if (pg_num == 1)
516 return 0;
517 assert(pg_num > 1);
518
519 // Find unique p such that pg_num \in [2^(p-1), 2^p)
520 unsigned p = cbits(pg_num);
521 assert(p); // silence coverity #751330
522
523 if ((m_seed % (1<<(p-1))) < (pg_num % (1<<(p-1))))
524 return p;
525 else
526 return p - 1;
527}
528
529pg_t pg_t::get_parent() const
530{
531 unsigned bits = cbits(m_seed);
532 assert(bits);
533 pg_t retval = *this;
534 retval.m_seed &= ~((~0)<<(bits - 1));
535 return retval;
536}
537
538hobject_t pg_t::get_hobj_start() const
539{
540 return hobject_t(object_t(), string(), CEPH_NOSNAP, m_seed, m_pool,
541 string());
542}
543
544hobject_t pg_t::get_hobj_end(unsigned pg_num) const
545{
546 // note: this assumes a bitwise sort; with the legacy nibblewise
547 // sort a PG did not always cover a single contiguous range of the
548 // (bit-reversed) hash range.
549 unsigned bits = get_split_bits(pg_num);
550 uint64_t rev_start = hobject_t::_reverse_bits(m_seed);
551 uint64_t rev_end = (rev_start | (0xffffffff >> bits)) + 1;
552 if (rev_end >= 0x100000000) {
553 assert(rev_end == 0x100000000);
554 return hobject_t::get_max();
555 } else {
556 return hobject_t(object_t(), string(), CEPH_NOSNAP,
557 hobject_t::_reverse_bits(rev_end), m_pool,
558 string());
559 }
560}
561
562void pg_t::dump(Formatter *f) const
563{
564 f->dump_unsigned("pool", m_pool);
565 f->dump_unsigned("seed", m_seed);
566 f->dump_int("preferred_osd", m_preferred);
567}
568
569void pg_t::generate_test_instances(list<pg_t*>& o)
570{
571 o.push_back(new pg_t);
572 o.push_back(new pg_t(1, 2, -1));
573 o.push_back(new pg_t(13123, 3, -1));
574 o.push_back(new pg_t(131223, 4, 23));
575}
576
577char *pg_t::calc_name(char *buf, const char *suffix_backwords) const
578{
579 while (*suffix_backwords)
580 *--buf = *suffix_backwords++;
581
582 if (m_preferred >= 0)
583 *--buf ='p';
584
585 buf = ritoa<uint32_t, 16>(m_seed, buf);
586
587 *--buf = '.';
588
589 return ritoa<uint64_t, 10>(m_pool, buf);
590}
591
592ostream& operator<<(ostream& out, const pg_t &pg)
593{
594 char buf[pg_t::calc_name_buf_size];
595 buf[pg_t::calc_name_buf_size - 1] = '\0';
596 out << pg.calc_name(buf + pg_t::calc_name_buf_size - 1, "");
597 return out;
598}
599
600
601// -- coll_t --
602
603void coll_t::calc_str()
604{
605 switch (type) {
606 case TYPE_META:
607 strcpy(_str_buff, "meta");
608 _str = _str_buff;
609 break;
610 case TYPE_PG:
611 _str_buff[spg_t::calc_name_buf_size - 1] = '\0';
612 _str = pgid.calc_name(_str_buff + spg_t::calc_name_buf_size - 1, "daeh_");
613 break;
614 case TYPE_PG_TEMP:
615 _str_buff[spg_t::calc_name_buf_size - 1] = '\0';
616 _str = pgid.calc_name(_str_buff + spg_t::calc_name_buf_size - 1, "PMET_");
617 break;
618 default:
619 assert(0 == "unknown collection type");
620 }
621}
622
623bool coll_t::parse(const std::string& s)
624{
625 if (s == "meta") {
626 type = TYPE_META;
627 pgid = spg_t();
628 removal_seq = 0;
629 calc_str();
630 assert(s == _str);
631 return true;
632 }
633 if (s.find("_head") == s.length() - 5 &&
634 pgid.parse(s.substr(0, s.length() - 5))) {
635 type = TYPE_PG;
636 removal_seq = 0;
637 calc_str();
638 assert(s == _str);
639 return true;
640 }
641 if (s.find("_TEMP") == s.length() - 5 &&
642 pgid.parse(s.substr(0, s.length() - 5))) {
643 type = TYPE_PG_TEMP;
644 removal_seq = 0;
645 calc_str();
646 assert(s == _str);
647 return true;
648 }
649 return false;
650}
651
652void coll_t::encode(bufferlist& bl) const
653{
654 // when changing this, remember to update encoded_size() too.
655 if (is_temp()) {
656 // can't express this as v2...
657 __u8 struct_v = 3;
658 ::encode(struct_v, bl);
659 ::encode(to_str(), bl);
660 } else {
661 __u8 struct_v = 2;
662 ::encode(struct_v, bl);
663 ::encode((__u8)type, bl);
664 ::encode(pgid, bl);
665 snapid_t snap = CEPH_NOSNAP;
666 ::encode(snap, bl);
667 }
668}
669
670size_t coll_t::encoded_size() const
671{
672 size_t r = sizeof(__u8);
673 if (is_temp()) {
674 // v3
675 r += sizeof(__u32);
676 if (_str) {
677 r += strlen(_str);
678 }
679 } else {
680 // v2
681 // 1. type
682 r += sizeof(__u8);
683 // 2. pgid
684 // - encoding header
685 r += sizeof(ceph_le32) + 2 * sizeof(__u8);
686 // - pg_t
687 r += sizeof(__u8) + sizeof(uint64_t) + 2 * sizeof(uint32_t);
688 // - shard_id_t
689 r += sizeof(int8_t);
690 // 3. snapid_t
691 r += sizeof(uint64_t);
692 }
693
694 return r;
695}
696
697void coll_t::decode(bufferlist::iterator& bl)
698{
699 __u8 struct_v;
700 ::decode(struct_v, bl);
701 switch (struct_v) {
702 case 1:
703 {
704 snapid_t snap;
705 ::decode(pgid, bl);
706 ::decode(snap, bl);
707
708 // infer the type
709 if (pgid == spg_t() && snap == 0) {
710 type = TYPE_META;
711 } else {
712 type = TYPE_PG;
713 }
714 removal_seq = 0;
715 }
716 break;
717
718 case 2:
719 {
720 __u8 _type;
721 snapid_t snap;
722 ::decode(_type, bl);
723 ::decode(pgid, bl);
724 ::decode(snap, bl);
725 type = (type_t)_type;
726 removal_seq = 0;
727 }
728 break;
729
730 case 3:
731 {
732 string str;
733 ::decode(str, bl);
734 bool ok = parse(str);
735 if (!ok)
736 throw std::domain_error(std::string("unable to parse pg ") + str);
737 }
738 break;
739
740 default:
741 {
742 ostringstream oss;
743 oss << "coll_t::decode(): don't know how to decode version "
744 << struct_v;
745 throw std::domain_error(oss.str());
746 }
747 }
748}
749
750void coll_t::dump(Formatter *f) const
751{
752 f->dump_unsigned("type_id", (unsigned)type);
753 if (type != TYPE_META)
754 f->dump_stream("pgid") << pgid;
755 f->dump_string("name", to_str());
756}
757
758void coll_t::generate_test_instances(list<coll_t*>& o)
759{
760 o.push_back(new coll_t());
761 o.push_back(new coll_t(spg_t(pg_t(1, 0), shard_id_t::NO_SHARD)));
762 o.push_back(new coll_t(o.back()->get_temp()));
763 o.push_back(new coll_t(spg_t(pg_t(3, 2), shard_id_t(12))));
764 o.push_back(new coll_t(o.back()->get_temp()));
765 o.push_back(new coll_t());
766}
767
768// ---
769
770std::string pg_vector_string(const vector<int32_t> &a)
771{
772 ostringstream oss;
773 oss << "[";
774 for (vector<int32_t>::const_iterator i = a.begin(); i != a.end(); ++i) {
775 if (i != a.begin())
776 oss << ",";
777 if (*i != CRUSH_ITEM_NONE)
778 oss << *i;
779 else
780 oss << "NONE";
781 }
782 oss << "]";
783 return oss.str();
784}
785
786std::string pg_state_string(int state)
787{
788 ostringstream oss;
789 if (state & PG_STATE_STALE)
790 oss << "stale+";
791 if (state & PG_STATE_CREATING)
792 oss << "creating+";
793 if (state & PG_STATE_ACTIVE)
794 oss << "active+";
795 if (state & PG_STATE_ACTIVATING)
796 oss << "activating+";
797 if (state & PG_STATE_CLEAN)
798 oss << "clean+";
799 if (state & PG_STATE_RECOVERY_WAIT)
800 oss << "recovery_wait+";
801 if (state & PG_STATE_RECOVERY_TOOFULL)
802 oss << "recovery_toofull+";
803 if (state & PG_STATE_RECOVERING)
804 oss << "recovering+";
c07f9fc5
FG
805 if (state & PG_STATE_FORCED_RECOVERY)
806 oss << "forced_recovery+";
7c673cae
FG
807 if (state & PG_STATE_DOWN)
808 oss << "down+";
809 if (state & PG_STATE_UNDERSIZED)
810 oss << "undersized+";
811 if (state & PG_STATE_DEGRADED)
812 oss << "degraded+";
813 if (state & PG_STATE_REMAPPED)
814 oss << "remapped+";
815 if (state & PG_STATE_SCRUBBING)
816 oss << "scrubbing+";
817 if (state & PG_STATE_DEEP_SCRUB)
818 oss << "deep+";
819 if (state & PG_STATE_INCONSISTENT)
820 oss << "inconsistent+";
821 if (state & PG_STATE_PEERING)
822 oss << "peering+";
823 if (state & PG_STATE_REPAIR)
824 oss << "repair+";
825 if ((state & PG_STATE_BACKFILL_WAIT) &&
826 !(state &PG_STATE_BACKFILL))
827 oss << "backfill_wait+";
828 if (state & PG_STATE_BACKFILL)
829 oss << "backfilling+";
c07f9fc5
FG
830 if (state & PG_STATE_FORCED_BACKFILL)
831 oss << "forced_backfill+";
7c673cae
FG
832 if (state & PG_STATE_BACKFILL_TOOFULL)
833 oss << "backfill_toofull+";
834 if (state & PG_STATE_INCOMPLETE)
835 oss << "incomplete+";
836 if (state & PG_STATE_PEERED)
837 oss << "peered+";
838 if (state & PG_STATE_SNAPTRIM)
839 oss << "snaptrim+";
840 if (state & PG_STATE_SNAPTRIM_WAIT)
841 oss << "snaptrim_wait+";
224ce89b
WB
842 if (state & PG_STATE_SNAPTRIM_ERROR)
843 oss << "snaptrim_error+";
7c673cae
FG
844 string ret(oss.str());
845 if (ret.length() > 0)
846 ret.resize(ret.length() - 1);
847 else
31f18b77 848 ret = "unknown";
7c673cae
FG
849 return ret;
850}
851
852int pg_string_state(const std::string& state)
853{
854 int type;
855 if (state == "active")
856 type = PG_STATE_ACTIVE;
857 else if (state == "clean")
858 type = PG_STATE_CLEAN;
859 else if (state == "down")
860 type = PG_STATE_DOWN;
861 else if (state == "scrubbing")
862 type = PG_STATE_SCRUBBING;
863 else if (state == "degraded")
864 type = PG_STATE_DEGRADED;
865 else if (state == "inconsistent")
866 type = PG_STATE_INCONSISTENT;
867 else if (state == "peering")
868 type = PG_STATE_PEERING;
869 else if (state == "repair")
870 type = PG_STATE_REPAIR;
871 else if (state == "recovering")
872 type = PG_STATE_RECOVERING;
c07f9fc5
FG
873 else if (state == "forced_recovery")
874 type = PG_STATE_FORCED_RECOVERY;
7c673cae
FG
875 else if (state == "backfill_wait")
876 type = PG_STATE_BACKFILL_WAIT;
877 else if (state == "incomplete")
878 type = PG_STATE_INCOMPLETE;
879 else if (state == "stale")
880 type = PG_STATE_STALE;
881 else if (state == "remapped")
882 type = PG_STATE_REMAPPED;
883 else if (state == "deep_scrub")
884 type = PG_STATE_DEEP_SCRUB;
885 else if (state == "backfill")
886 type = PG_STATE_BACKFILL;
c07f9fc5
FG
887 else if (state == "forced_backfill")
888 type = PG_STATE_FORCED_BACKFILL;
7c673cae
FG
889 else if (state == "backfill_toofull")
890 type = PG_STATE_BACKFILL_TOOFULL;
891 else if (state == "recovery_wait")
892 type = PG_STATE_RECOVERY_WAIT;
893 else if (state == "recovery_toofull")
894 type = PG_STATE_RECOVERY_TOOFULL;
895 else if (state == "undersized")
896 type = PG_STATE_UNDERSIZED;
897 else if (state == "activating")
898 type = PG_STATE_ACTIVATING;
899 else if (state == "peered")
900 type = PG_STATE_PEERED;
901 else if (state == "snaptrim")
902 type = PG_STATE_SNAPTRIM;
903 else if (state == "snaptrim_wait")
904 type = PG_STATE_SNAPTRIM_WAIT;
224ce89b
WB
905 else if (state == "snaptrim_error")
906 type = PG_STATE_SNAPTRIM_ERROR;
7c673cae
FG
907 else
908 type = -1;
909 return type;
910}
911
912// -- eversion_t --
913string eversion_t::get_key_name() const
914{
915 char key[32];
916 // Below is equivalent of sprintf("%010u.%020llu");
917 key[31] = 0;
918 ritoa<uint64_t, 10, 20>(version, key + 31);
919 key[10] = '.';
920 ritoa<uint32_t, 10, 10>(epoch, key + 10);
921 return string(key);
922}
923
924
925// -- pool_snap_info_t --
926void pool_snap_info_t::dump(Formatter *f) const
927{
928 f->dump_unsigned("snapid", snapid);
929 f->dump_stream("stamp") << stamp;
930 f->dump_string("name", name);
931}
932
933void pool_snap_info_t::encode(bufferlist& bl, uint64_t features) const
934{
935 if ((features & CEPH_FEATURE_PGPOOL3) == 0) {
936 __u8 struct_v = 1;
937 ::encode(struct_v, bl);
938 ::encode(snapid, bl);
939 ::encode(stamp, bl);
940 ::encode(name, bl);
941 return;
942 }
943 ENCODE_START(2, 2, bl);
944 ::encode(snapid, bl);
945 ::encode(stamp, bl);
946 ::encode(name, bl);
947 ENCODE_FINISH(bl);
948}
949
950void pool_snap_info_t::decode(bufferlist::iterator& bl)
951{
952 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
953 ::decode(snapid, bl);
954 ::decode(stamp, bl);
955 ::decode(name, bl);
956 DECODE_FINISH(bl);
957}
958
959void pool_snap_info_t::generate_test_instances(list<pool_snap_info_t*>& o)
960{
961 o.push_back(new pool_snap_info_t);
962 o.push_back(new pool_snap_info_t);
963 o.back()->snapid = 1;
964 o.back()->stamp = utime_t(1, 2);
965 o.back()->name = "foo";
966}
967
968// -- pool_opts_t --
969
970typedef std::map<std::string, pool_opts_t::opt_desc_t> opt_mapping_t;
971static opt_mapping_t opt_mapping = boost::assign::map_list_of
972 ("scrub_min_interval", pool_opts_t::opt_desc_t(
973 pool_opts_t::SCRUB_MIN_INTERVAL, pool_opts_t::DOUBLE))
974 ("scrub_max_interval", pool_opts_t::opt_desc_t(
975 pool_opts_t::SCRUB_MAX_INTERVAL, pool_opts_t::DOUBLE))
976 ("deep_scrub_interval", pool_opts_t::opt_desc_t(
977 pool_opts_t::DEEP_SCRUB_INTERVAL, pool_opts_t::DOUBLE))
978 ("recovery_priority", pool_opts_t::opt_desc_t(
979 pool_opts_t::RECOVERY_PRIORITY, pool_opts_t::INT))
980 ("recovery_op_priority", pool_opts_t::opt_desc_t(
981 pool_opts_t::RECOVERY_OP_PRIORITY, pool_opts_t::INT))
982 ("scrub_priority", pool_opts_t::opt_desc_t(
983 pool_opts_t::SCRUB_PRIORITY, pool_opts_t::INT))
984 ("compression_mode", pool_opts_t::opt_desc_t(
985 pool_opts_t::COMPRESSION_MODE, pool_opts_t::STR))
986 ("compression_algorithm", pool_opts_t::opt_desc_t(
987 pool_opts_t::COMPRESSION_ALGORITHM, pool_opts_t::STR))
988 ("compression_required_ratio", pool_opts_t::opt_desc_t(
989 pool_opts_t::COMPRESSION_REQUIRED_RATIO, pool_opts_t::DOUBLE))
990 ("compression_max_blob_size", pool_opts_t::opt_desc_t(
991 pool_opts_t::COMPRESSION_MAX_BLOB_SIZE, pool_opts_t::INT))
992 ("compression_min_blob_size", pool_opts_t::opt_desc_t(
993 pool_opts_t::COMPRESSION_MIN_BLOB_SIZE, pool_opts_t::INT))
994 ("csum_type", pool_opts_t::opt_desc_t(
995 pool_opts_t::CSUM_TYPE, pool_opts_t::INT))
996 ("csum_max_block", pool_opts_t::opt_desc_t(
997 pool_opts_t::CSUM_MAX_BLOCK, pool_opts_t::INT))
998 ("csum_min_block", pool_opts_t::opt_desc_t(
999 pool_opts_t::CSUM_MIN_BLOCK, pool_opts_t::INT));
1000
1001bool pool_opts_t::is_opt_name(const std::string& name) {
224ce89b 1002 return opt_mapping.count(name);
7c673cae
FG
1003}
1004
1005pool_opts_t::opt_desc_t pool_opts_t::get_opt_desc(const std::string& name) {
1006 opt_mapping_t::iterator i = opt_mapping.find(name);
1007 assert(i != opt_mapping.end());
1008 return i->second;
1009}
1010
1011bool pool_opts_t::is_set(pool_opts_t::key_t key) const {
224ce89b 1012 return opts.count(key);
7c673cae
FG
1013}
1014
1015const pool_opts_t::value_t& pool_opts_t::get(pool_opts_t::key_t key) const {
1016 opts_t::const_iterator i = opts.find(key);
1017 assert(i != opts.end());
1018 return i->second;
1019}
1020
1021bool pool_opts_t::unset(pool_opts_t::key_t key) {
1022 return opts.erase(key) > 0;
1023}
1024
1025class pool_opts_dumper_t : public boost::static_visitor<>
1026{
1027public:
1028 pool_opts_dumper_t(const std::string& name_, Formatter* f_) :
1029 name(name_.c_str()), f(f_) {}
1030
1031 void operator()(std::string s) const {
1032 f->dump_string(name, s);
1033 }
1034 void operator()(int i) const {
1035 f->dump_int(name, i);
1036 }
1037 void operator()(double d) const {
1038 f->dump_float(name, d);
1039 }
1040
1041private:
1042 const char* name;
1043 Formatter* f;
1044};
1045
1046void pool_opts_t::dump(const std::string& name, Formatter* f) const
1047{
1048 const opt_desc_t& desc = get_opt_desc(name);
1049 opts_t::const_iterator i = opts.find(desc.key);
1050 if (i == opts.end()) {
1051 return;
1052 }
1053 boost::apply_visitor(pool_opts_dumper_t(name, f), i->second);
1054}
1055
1056void pool_opts_t::dump(Formatter* f) const
1057{
1058 for (opt_mapping_t::iterator i = opt_mapping.begin(); i != opt_mapping.end();
1059 ++i) {
1060 const std::string& name = i->first;
1061 const opt_desc_t& desc = i->second;
1062 opts_t::const_iterator j = opts.find(desc.key);
1063 if (j == opts.end()) {
1064 continue;
1065 }
1066 boost::apply_visitor(pool_opts_dumper_t(name, f), j->second);
1067 }
1068}
1069
1070class pool_opts_encoder_t : public boost::static_visitor<>
1071{
1072public:
1073 explicit pool_opts_encoder_t(bufferlist& bl_) : bl(bl_) {}
1074
1075 void operator()(std::string s) const {
1076 ::encode(static_cast<int32_t>(pool_opts_t::STR), bl);
1077 ::encode(s, bl);
1078 }
1079 void operator()(int i) const {
1080 ::encode(static_cast<int32_t>(pool_opts_t::INT), bl);
1081 ::encode(i, bl);
1082 }
1083 void operator()(double d) const {
1084 ::encode(static_cast<int32_t>(pool_opts_t::DOUBLE), bl);
1085 ::encode(d, bl);
1086 }
1087
1088private:
1089 bufferlist& bl;
1090};
1091
1092void pool_opts_t::encode(bufferlist& bl) const {
1093 ENCODE_START(1, 1, bl);
1094 uint32_t n = static_cast<uint32_t>(opts.size());
1095 ::encode(n, bl);
1096 for (opts_t::const_iterator i = opts.begin(); i != opts.end(); ++i) {
1097 ::encode(static_cast<int32_t>(i->first), bl);
1098 boost::apply_visitor(pool_opts_encoder_t(bl), i->second);
1099 }
1100 ENCODE_FINISH(bl);
1101}
1102
1103void pool_opts_t::decode(bufferlist::iterator& bl) {
1104 DECODE_START(1, bl);
1105 __u32 n;
1106 ::decode(n, bl);
1107 opts.clear();
1108 while (n--) {
1109 int32_t k, t;
1110 ::decode(k, bl);
1111 ::decode(t, bl);
1112 if (t == STR) {
1113 std::string s;
1114 ::decode(s, bl);
1115 opts[static_cast<key_t>(k)] = s;
1116 } else if (t == INT) {
1117 int i;
1118 ::decode(i, bl);
1119 opts[static_cast<key_t>(k)] = i;
1120 } else if (t == DOUBLE) {
1121 double d;
1122 ::decode(d, bl);
1123 opts[static_cast<key_t>(k)] = d;
1124 } else {
1125 assert(!"invalid type");
1126 }
1127 }
1128 DECODE_FINISH(bl);
1129}
1130
1131ostream& operator<<(ostream& out, const pool_opts_t& opts)
1132{
1133 for (opt_mapping_t::iterator i = opt_mapping.begin(); i != opt_mapping.end();
1134 ++i) {
1135 const std::string& name = i->first;
1136 const pool_opts_t::opt_desc_t& desc = i->second;
1137 pool_opts_t::opts_t::const_iterator j = opts.opts.find(desc.key);
1138 if (j == opts.opts.end()) {
1139 continue;
1140 }
1141 out << " " << name << " " << j->second;
1142 }
1143 return out;
1144}
1145
1146// -- pg_pool_t --
1147
c07f9fc5
FG
1148const char *pg_pool_t::APPLICATION_NAME_CEPHFS("cephfs");
1149const char *pg_pool_t::APPLICATION_NAME_RBD("rbd");
1150const char *pg_pool_t::APPLICATION_NAME_RGW("rgw");
1151
7c673cae
FG
1152void pg_pool_t::dump(Formatter *f) const
1153{
1154 f->dump_unsigned("flags", get_flags());
1155 f->dump_string("flags_names", get_flags_string());
1156 f->dump_int("type", get_type());
1157 f->dump_int("size", get_size());
1158 f->dump_int("min_size", get_min_size());
31f18b77 1159 f->dump_int("crush_rule", get_crush_rule());
7c673cae
FG
1160 f->dump_int("object_hash", get_object_hash());
1161 f->dump_unsigned("pg_num", get_pg_num());
1162 f->dump_unsigned("pg_placement_num", get_pgp_num());
1163 f->dump_unsigned("crash_replay_interval", get_crash_replay_interval());
1164 f->dump_stream("last_change") << get_last_change();
1165 f->dump_stream("last_force_op_resend") << get_last_force_op_resend();
1166 f->dump_stream("last_force_op_resend_preluminous")
1167 << get_last_force_op_resend_preluminous();
1168 f->dump_unsigned("auid", get_auid());
1169 f->dump_string("snap_mode", is_pool_snaps_mode() ? "pool" : "selfmanaged");
1170 f->dump_unsigned("snap_seq", get_snap_seq());
1171 f->dump_unsigned("snap_epoch", get_snap_epoch());
1172 f->open_array_section("pool_snaps");
1173 for (map<snapid_t, pool_snap_info_t>::const_iterator p = snaps.begin(); p != snaps.end(); ++p) {
1174 f->open_object_section("pool_snap_info");
1175 p->second.dump(f);
1176 f->close_section();
1177 }
1178 f->close_section();
1179 f->dump_stream("removed_snaps") << removed_snaps;
1180 f->dump_unsigned("quota_max_bytes", quota_max_bytes);
1181 f->dump_unsigned("quota_max_objects", quota_max_objects);
1182 f->open_array_section("tiers");
1183 for (set<uint64_t>::const_iterator p = tiers.begin(); p != tiers.end(); ++p)
1184 f->dump_unsigned("pool_id", *p);
1185 f->close_section();
1186 f->dump_int("tier_of", tier_of);
1187 f->dump_int("read_tier", read_tier);
1188 f->dump_int("write_tier", write_tier);
1189 f->dump_string("cache_mode", get_cache_mode_name());
1190 f->dump_unsigned("target_max_bytes", target_max_bytes);
1191 f->dump_unsigned("target_max_objects", target_max_objects);
1192 f->dump_unsigned("cache_target_dirty_ratio_micro",
1193 cache_target_dirty_ratio_micro);
1194 f->dump_unsigned("cache_target_dirty_high_ratio_micro",
1195 cache_target_dirty_high_ratio_micro);
1196 f->dump_unsigned("cache_target_full_ratio_micro",
1197 cache_target_full_ratio_micro);
1198 f->dump_unsigned("cache_min_flush_age", cache_min_flush_age);
1199 f->dump_unsigned("cache_min_evict_age", cache_min_evict_age);
1200 f->dump_string("erasure_code_profile", erasure_code_profile);
1201 f->open_object_section("hit_set_params");
1202 hit_set_params.dump(f);
1203 f->close_section(); // hit_set_params
1204 f->dump_unsigned("hit_set_period", hit_set_period);
1205 f->dump_unsigned("hit_set_count", hit_set_count);
1206 f->dump_bool("use_gmt_hitset", use_gmt_hitset);
1207 f->dump_unsigned("min_read_recency_for_promote", min_read_recency_for_promote);
1208 f->dump_unsigned("min_write_recency_for_promote", min_write_recency_for_promote);
1209 f->dump_unsigned("hit_set_grade_decay_rate", hit_set_grade_decay_rate);
1210 f->dump_unsigned("hit_set_search_last_n", hit_set_search_last_n);
1211 f->open_array_section("grade_table");
1212 for (unsigned i = 0; i < hit_set_count; ++i)
1213 f->dump_unsigned("value", get_grade(i));
1214 f->close_section();
1215 f->dump_unsigned("stripe_width", get_stripe_width());
1216 f->dump_unsigned("expected_num_objects", expected_num_objects);
1217 f->dump_bool("fast_read", fast_read);
1218 f->open_object_section("options");
1219 opts.dump(f);
1220 f->close_section(); // options
c07f9fc5
FG
1221 f->open_object_section("application_metadata");
1222 for (auto &app_pair : application_metadata) {
1223 f->open_object_section(app_pair.first.c_str());
1224 for (auto &kv_pair : app_pair.second) {
1225 f->dump_string(kv_pair.first.c_str(), kv_pair.second);
1226 }
1227 f->close_section(); // application
1228 }
1229 f->close_section(); // application_metadata
7c673cae
FG
1230}
1231
1232void pg_pool_t::convert_to_pg_shards(const vector<int> &from, set<pg_shard_t>* to) const {
1233 for (size_t i = 0; i < from.size(); ++i) {
1234 if (from[i] != CRUSH_ITEM_NONE) {
1235 to->insert(
1236 pg_shard_t(
1237 from[i],
1238 ec_pool() ? shard_id_t(i) : shard_id_t::NO_SHARD));
1239 }
1240 }
1241}
1242
1243void pg_pool_t::calc_pg_masks()
1244{
1245 pg_num_mask = (1 << cbits(pg_num-1)) - 1;
1246 pgp_num_mask = (1 << cbits(pgp_num-1)) - 1;
1247}
1248
1249unsigned pg_pool_t::get_pg_num_divisor(pg_t pgid) const
1250{
1251 if (pg_num == pg_num_mask + 1)
1252 return pg_num; // power-of-2 split
1253 unsigned mask = pg_num_mask >> 1;
1254 if ((pgid.ps() & mask) < (pg_num & mask))
1255 return pg_num_mask + 1; // smaller bin size (already split)
1256 else
1257 return (pg_num_mask + 1) >> 1; // bigger bin (not yet split)
1258}
1259
1260/*
1261 * we have two snap modes:
1262 * - pool global snaps
1263 * - snap existence/non-existence defined by snaps[] and snap_seq
1264 * - user managed snaps
1265 * - removal governed by removed_snaps
1266 *
1267 * we know which mode we're using based on whether removed_snaps is empty.
1268 */
1269bool pg_pool_t::is_pool_snaps_mode() const
1270{
1271 return removed_snaps.empty() && get_snap_seq() > 0;
1272}
1273
1274bool pg_pool_t::is_unmanaged_snaps_mode() const
1275{
1276 return removed_snaps.size() && get_snap_seq() > 0;
1277}
1278
1279bool pg_pool_t::is_removed_snap(snapid_t s) const
1280{
1281 if (is_pool_snaps_mode())
1282 return s <= get_snap_seq() && snaps.count(s) == 0;
1283 else
1284 return removed_snaps.contains(s);
1285}
1286
1287/*
1288 * build set of known-removed sets from either pool snaps or
1289 * explicit removed_snaps set.
1290 */
1291void pg_pool_t::build_removed_snaps(interval_set<snapid_t>& rs) const
1292{
1293 if (is_pool_snaps_mode()) {
1294 rs.clear();
1295 for (snapid_t s = 1; s <= get_snap_seq(); s = s + 1)
1296 if (snaps.count(s) == 0)
1297 rs.insert(s);
1298 } else {
1299 rs = removed_snaps;
1300 }
1301}
1302
1303snapid_t pg_pool_t::snap_exists(const char *s) const
1304{
1305 for (map<snapid_t,pool_snap_info_t>::const_iterator p = snaps.begin();
1306 p != snaps.end();
1307 ++p)
1308 if (p->second.name == s)
1309 return p->second.snapid;
1310 return 0;
1311}
1312
1313void pg_pool_t::add_snap(const char *n, utime_t stamp)
1314{
1315 assert(!is_unmanaged_snaps_mode());
1316 snapid_t s = get_snap_seq() + 1;
1317 snap_seq = s;
1318 snaps[s].snapid = s;
1319 snaps[s].name = n;
1320 snaps[s].stamp = stamp;
1321}
1322
1323void pg_pool_t::add_unmanaged_snap(uint64_t& snapid)
1324{
1325 if (removed_snaps.empty()) {
1326 assert(!is_pool_snaps_mode());
1327 removed_snaps.insert(snapid_t(1));
1328 snap_seq = 1;
1329 }
1330 snapid = snap_seq = snap_seq + 1;
1331}
1332
1333void pg_pool_t::remove_snap(snapid_t s)
1334{
1335 assert(snaps.count(s));
1336 snaps.erase(s);
1337 snap_seq = snap_seq + 1;
1338}
1339
1340void pg_pool_t::remove_unmanaged_snap(snapid_t s)
1341{
1342 assert(is_unmanaged_snaps_mode());
1343 removed_snaps.insert(s);
1344 snap_seq = snap_seq + 1;
1345 removed_snaps.insert(get_snap_seq());
1346}
1347
1348SnapContext pg_pool_t::get_snap_context() const
1349{
1350 vector<snapid_t> s(snaps.size());
1351 unsigned i = 0;
1352 for (map<snapid_t, pool_snap_info_t>::const_reverse_iterator p = snaps.rbegin();
1353 p != snaps.rend();
1354 ++p)
1355 s[i++] = p->first;
1356 return SnapContext(get_snap_seq(), s);
1357}
1358
1359uint32_t pg_pool_t::hash_key(const string& key, const string& ns) const
1360{
1361 if (ns.empty())
1362 return ceph_str_hash(object_hash, key.data(), key.length());
1363 int nsl = ns.length();
1364 int len = key.length() + nsl + 1;
1365 char buf[len];
1366 memcpy(&buf[0], ns.data(), nsl);
1367 buf[nsl] = '\037';
1368 memcpy(&buf[nsl+1], key.data(), key.length());
1369 return ceph_str_hash(object_hash, &buf[0], len);
1370}
1371
1372uint32_t pg_pool_t::raw_hash_to_pg(uint32_t v) const
1373{
1374 return ceph_stable_mod(v, pg_num, pg_num_mask);
1375}
1376
1377/*
1378 * map a raw pg (with full precision ps) into an actual pg, for storage
1379 */
1380pg_t pg_pool_t::raw_pg_to_pg(pg_t pg) const
1381{
1382 pg.set_ps(ceph_stable_mod(pg.ps(), pg_num, pg_num_mask));
1383 return pg;
1384}
1385
1386/*
1387 * map raw pg (full precision ps) into a placement seed. include
1388 * pool id in that value so that different pools don't use the same
1389 * seeds.
1390 */
1391ps_t pg_pool_t::raw_pg_to_pps(pg_t pg) const
1392{
1393 if (flags & FLAG_HASHPSPOOL) {
1394 // Hash the pool id so that pool PGs do not overlap.
1395 return
1396 crush_hash32_2(CRUSH_HASH_RJENKINS1,
1397 ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask),
1398 pg.pool());
1399 } else {
1400 // Legacy behavior; add ps and pool together. This is not a great
1401 // idea because the PGs from each pool will essentially overlap on
1402 // top of each other: 0.5 == 1.4 == 2.3 == ...
1403 return
1404 ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask) +
1405 pg.pool();
1406 }
1407}
1408
1409uint32_t pg_pool_t::get_random_pg_position(pg_t pg, uint32_t seed) const
1410{
1411 uint32_t r = crush_hash32_2(CRUSH_HASH_RJENKINS1, seed, 123);
1412 if (pg_num == pg_num_mask + 1) {
1413 r &= ~pg_num_mask;
1414 } else {
1415 unsigned smaller_mask = pg_num_mask >> 1;
1416 if ((pg.ps() & smaller_mask) < (pg_num & smaller_mask)) {
1417 r &= ~pg_num_mask;
1418 } else {
1419 r &= ~smaller_mask;
1420 }
1421 }
1422 r |= pg.ps();
1423 return r;
1424}
1425
1426void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
1427{
1428 if ((features & CEPH_FEATURE_PGPOOL3) == 0) {
1429 // this encoding matches the old struct ceph_pg_pool
1430 __u8 struct_v = 2;
1431 ::encode(struct_v, bl);
1432 ::encode(type, bl);
1433 ::encode(size, bl);
31f18b77 1434 ::encode(crush_rule, bl);
7c673cae
FG
1435 ::encode(object_hash, bl);
1436 ::encode(pg_num, bl);
1437 ::encode(pgp_num, bl);
1438 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
1439 ::encode(lpg_num, bl);
1440 ::encode(lpgp_num, bl);
1441 ::encode(last_change, bl);
1442 ::encode(snap_seq, bl);
1443 ::encode(snap_epoch, bl);
1444
1445 __u32 n = snaps.size();
1446 ::encode(n, bl);
1447 n = removed_snaps.num_intervals();
1448 ::encode(n, bl);
1449
1450 ::encode(auid, bl);
1451
1452 ::encode_nohead(snaps, bl, features);
1453 ::encode_nohead(removed_snaps, bl);
1454 return;
1455 }
1456
1457 if ((features & CEPH_FEATURE_OSDENC) == 0) {
1458 __u8 struct_v = 4;
1459 ::encode(struct_v, bl);
1460 ::encode(type, bl);
1461 ::encode(size, bl);
31f18b77 1462 ::encode(crush_rule, bl);
7c673cae
FG
1463 ::encode(object_hash, bl);
1464 ::encode(pg_num, bl);
1465 ::encode(pgp_num, bl);
1466 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
1467 ::encode(lpg_num, bl);
1468 ::encode(lpgp_num, bl);
1469 ::encode(last_change, bl);
1470 ::encode(snap_seq, bl);
1471 ::encode(snap_epoch, bl);
1472 ::encode(snaps, bl, features);
1473 ::encode(removed_snaps, bl);
1474 ::encode(auid, bl);
1475 ::encode(flags, bl);
1476 ::encode(crash_replay_interval, bl);
1477 return;
1478 }
1479
1480 if ((features & CEPH_FEATURE_OSD_POOLRESEND) == 0) {
1481 // we simply added last_force_op_resend here, which is a fully
1482 // backward compatible change. however, encoding the same map
1483 // differently between monitors triggers scrub noise (even though
1484 // they are decodable without the feature), so let's be pendantic
1485 // about it.
1486 ENCODE_START(14, 5, bl);
1487 ::encode(type, bl);
1488 ::encode(size, bl);
31f18b77 1489 ::encode(crush_rule, bl);
7c673cae
FG
1490 ::encode(object_hash, bl);
1491 ::encode(pg_num, bl);
1492 ::encode(pgp_num, bl);
1493 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
1494 ::encode(lpg_num, bl);
1495 ::encode(lpgp_num, bl);
1496 ::encode(last_change, bl);
1497 ::encode(snap_seq, bl);
1498 ::encode(snap_epoch, bl);
1499 ::encode(snaps, bl, features);
1500 ::encode(removed_snaps, bl);
1501 ::encode(auid, bl);
1502 ::encode(flags, bl);
1503 ::encode(crash_replay_interval, bl);
1504 ::encode(min_size, bl);
1505 ::encode(quota_max_bytes, bl);
1506 ::encode(quota_max_objects, bl);
1507 ::encode(tiers, bl);
1508 ::encode(tier_of, bl);
1509 __u8 c = cache_mode;
1510 ::encode(c, bl);
1511 ::encode(read_tier, bl);
1512 ::encode(write_tier, bl);
1513 ::encode(properties, bl);
1514 ::encode(hit_set_params, bl);
1515 ::encode(hit_set_period, bl);
1516 ::encode(hit_set_count, bl);
1517 ::encode(stripe_width, bl);
1518 ::encode(target_max_bytes, bl);
1519 ::encode(target_max_objects, bl);
1520 ::encode(cache_target_dirty_ratio_micro, bl);
1521 ::encode(cache_target_full_ratio_micro, bl);
1522 ::encode(cache_min_flush_age, bl);
1523 ::encode(cache_min_evict_age, bl);
1524 ::encode(erasure_code_profile, bl);
1525 ENCODE_FINISH(bl);
1526 return;
1527 }
1528
c07f9fc5 1529 uint8_t v = 26;
7c673cae
FG
1530 if (!(features & CEPH_FEATURE_NEW_OSDOP_ENCODING)) {
1531 // this was the first post-hammer thing we added; if it's missing, encode
1532 // like hammer.
1533 v = 21;
1534 }
c07f9fc5 1535 if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
7c673cae
FG
1536 v = 24;
1537 }
1538
1539 ENCODE_START(v, 5, bl);
1540 ::encode(type, bl);
1541 ::encode(size, bl);
31f18b77 1542 ::encode(crush_rule, bl);
7c673cae
FG
1543 ::encode(object_hash, bl);
1544 ::encode(pg_num, bl);
1545 ::encode(pgp_num, bl);
1546 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
1547 ::encode(lpg_num, bl);
1548 ::encode(lpgp_num, bl);
1549 ::encode(last_change, bl);
1550 ::encode(snap_seq, bl);
1551 ::encode(snap_epoch, bl);
1552 ::encode(snaps, bl, features);
1553 ::encode(removed_snaps, bl);
1554 ::encode(auid, bl);
1555 ::encode(flags, bl);
1556 ::encode(crash_replay_interval, bl);
1557 ::encode(min_size, bl);
1558 ::encode(quota_max_bytes, bl);
1559 ::encode(quota_max_objects, bl);
1560 ::encode(tiers, bl);
1561 ::encode(tier_of, bl);
1562 __u8 c = cache_mode;
1563 ::encode(c, bl);
1564 ::encode(read_tier, bl);
1565 ::encode(write_tier, bl);
1566 ::encode(properties, bl);
1567 ::encode(hit_set_params, bl);
1568 ::encode(hit_set_period, bl);
1569 ::encode(hit_set_count, bl);
1570 ::encode(stripe_width, bl);
1571 ::encode(target_max_bytes, bl);
1572 ::encode(target_max_objects, bl);
1573 ::encode(cache_target_dirty_ratio_micro, bl);
1574 ::encode(cache_target_full_ratio_micro, bl);
1575 ::encode(cache_min_flush_age, bl);
1576 ::encode(cache_min_evict_age, bl);
1577 ::encode(erasure_code_profile, bl);
1578 ::encode(last_force_op_resend_preluminous, bl);
1579 ::encode(min_read_recency_for_promote, bl);
1580 ::encode(expected_num_objects, bl);
1581 if (v >= 19) {
1582 ::encode(cache_target_dirty_high_ratio_micro, bl);
1583 }
1584 if (v >= 20) {
1585 ::encode(min_write_recency_for_promote, bl);
1586 }
1587 if (v >= 21) {
1588 ::encode(use_gmt_hitset, bl);
1589 }
1590 if (v >= 22) {
1591 ::encode(fast_read, bl);
1592 }
1593 if (v >= 23) {
1594 ::encode(hit_set_grade_decay_rate, bl);
1595 ::encode(hit_set_search_last_n, bl);
1596 }
1597 if (v >= 24) {
1598 ::encode(opts, bl);
1599 }
1600 if (v >= 25) {
1601 ::encode(last_force_op_resend, bl);
1602 }
c07f9fc5
FG
1603 if (v >= 26) {
1604 ::encode(application_metadata, bl);
1605 }
7c673cae
FG
1606 ENCODE_FINISH(bl);
1607}
1608
1609void pg_pool_t::decode(bufferlist::iterator& bl)
1610{
c07f9fc5 1611 DECODE_START_LEGACY_COMPAT_LEN(26, 5, 5, bl);
7c673cae
FG
1612 ::decode(type, bl);
1613 ::decode(size, bl);
31f18b77 1614 ::decode(crush_rule, bl);
7c673cae
FG
1615 ::decode(object_hash, bl);
1616 ::decode(pg_num, bl);
1617 ::decode(pgp_num, bl);
1618 {
1619 __u32 lpg_num, lpgp_num;
1620 ::decode(lpg_num, bl);
1621 ::decode(lpgp_num, bl);
1622 }
1623 ::decode(last_change, bl);
1624 ::decode(snap_seq, bl);
1625 ::decode(snap_epoch, bl);
1626
1627 if (struct_v >= 3) {
1628 ::decode(snaps, bl);
1629 ::decode(removed_snaps, bl);
1630 ::decode(auid, bl);
1631 } else {
1632 __u32 n, m;
1633 ::decode(n, bl);
1634 ::decode(m, bl);
1635 ::decode(auid, bl);
1636 ::decode_nohead(n, snaps, bl);
1637 ::decode_nohead(m, removed_snaps, bl);
1638 }
1639
1640 if (struct_v >= 4) {
1641 ::decode(flags, bl);
1642 ::decode(crash_replay_interval, bl);
1643 } else {
1644 flags = 0;
1645
1646 // if this looks like the 'data' pool, set the
1647 // crash_replay_interval appropriately. unfortunately, we can't
1648 // be precise here. this should be good enough to preserve replay
1649 // on the data pool for the majority of cluster upgrades, though.
31f18b77 1650 if (crush_rule == 0 && auid == 0)
7c673cae
FG
1651 crash_replay_interval = 60;
1652 else
1653 crash_replay_interval = 0;
1654 }
1655 if (struct_v >= 7) {
1656 ::decode(min_size, bl);
1657 } else {
1658 min_size = size - size/2;
1659 }
1660 if (struct_v >= 8) {
1661 ::decode(quota_max_bytes, bl);
1662 ::decode(quota_max_objects, bl);
1663 }
1664 if (struct_v >= 9) {
1665 ::decode(tiers, bl);
1666 ::decode(tier_of, bl);
1667 __u8 v;
1668 ::decode(v, bl);
1669 cache_mode = (cache_mode_t)v;
1670 ::decode(read_tier, bl);
1671 ::decode(write_tier, bl);
1672 }
1673 if (struct_v >= 10) {
1674 ::decode(properties, bl);
1675 }
1676 if (struct_v >= 11) {
1677 ::decode(hit_set_params, bl);
1678 ::decode(hit_set_period, bl);
1679 ::decode(hit_set_count, bl);
1680 } else {
1681 pg_pool_t def;
1682 hit_set_period = def.hit_set_period;
1683 hit_set_count = def.hit_set_count;
1684 }
1685 if (struct_v >= 12) {
1686 ::decode(stripe_width, bl);
1687 } else {
1688 set_stripe_width(0);
1689 }
1690 if (struct_v >= 13) {
1691 ::decode(target_max_bytes, bl);
1692 ::decode(target_max_objects, bl);
1693 ::decode(cache_target_dirty_ratio_micro, bl);
1694 ::decode(cache_target_full_ratio_micro, bl);
1695 ::decode(cache_min_flush_age, bl);
1696 ::decode(cache_min_evict_age, bl);
1697 } else {
1698 target_max_bytes = 0;
1699 target_max_objects = 0;
1700 cache_target_dirty_ratio_micro = 0;
1701 cache_target_full_ratio_micro = 0;
1702 cache_min_flush_age = 0;
1703 cache_min_evict_age = 0;
1704 }
1705 if (struct_v >= 14) {
1706 ::decode(erasure_code_profile, bl);
1707 }
1708 if (struct_v >= 15) {
1709 ::decode(last_force_op_resend_preluminous, bl);
1710 } else {
1711 last_force_op_resend_preluminous = 0;
1712 }
1713 if (struct_v >= 16) {
1714 ::decode(min_read_recency_for_promote, bl);
1715 } else {
1716 min_read_recency_for_promote = 1;
1717 }
1718 if (struct_v >= 17) {
1719 ::decode(expected_num_objects, bl);
1720 } else {
1721 expected_num_objects = 0;
1722 }
1723 if (struct_v >= 19) {
1724 ::decode(cache_target_dirty_high_ratio_micro, bl);
1725 } else {
1726 cache_target_dirty_high_ratio_micro = cache_target_dirty_ratio_micro;
1727 }
1728 if (struct_v >= 20) {
1729 ::decode(min_write_recency_for_promote, bl);
1730 } else {
1731 min_write_recency_for_promote = 1;
1732 }
1733 if (struct_v >= 21) {
1734 ::decode(use_gmt_hitset, bl);
1735 } else {
1736 use_gmt_hitset = false;
1737 }
1738 if (struct_v >= 22) {
1739 ::decode(fast_read, bl);
1740 } else {
1741 fast_read = false;
1742 }
1743 if (struct_v >= 23) {
1744 ::decode(hit_set_grade_decay_rate, bl);
1745 ::decode(hit_set_search_last_n, bl);
1746 } else {
1747 hit_set_grade_decay_rate = 0;
1748 hit_set_search_last_n = 1;
1749 }
1750 if (struct_v >= 24) {
1751 ::decode(opts, bl);
1752 }
1753 if (struct_v >= 25) {
1754 ::decode(last_force_op_resend, bl);
1755 } else {
1756 last_force_op_resend = last_force_op_resend_preluminous;
1757 }
c07f9fc5
FG
1758 if (struct_v >= 26) {
1759 ::decode(application_metadata, bl);
1760 }
7c673cae
FG
1761 DECODE_FINISH(bl);
1762 calc_pg_masks();
1763 calc_grade_table();
1764}
1765
1766void pg_pool_t::generate_test_instances(list<pg_pool_t*>& o)
1767{
1768 pg_pool_t a;
1769 o.push_back(new pg_pool_t(a));
1770
1771 a.type = TYPE_REPLICATED;
1772 a.size = 2;
31f18b77 1773 a.crush_rule = 3;
7c673cae
FG
1774 a.object_hash = 4;
1775 a.pg_num = 6;
1776 a.pgp_num = 5;
1777 a.last_change = 9;
1778 a.last_force_op_resend = 123823;
1779 a.last_force_op_resend_preluminous = 123824;
1780 a.snap_seq = 10;
1781 a.snap_epoch = 11;
1782 a.auid = 12;
1783 a.crash_replay_interval = 13;
1784 a.quota_max_bytes = 473;
1785 a.quota_max_objects = 474;
1786 o.push_back(new pg_pool_t(a));
1787
1788 a.snaps[3].name = "asdf";
1789 a.snaps[3].snapid = 3;
1790 a.snaps[3].stamp = utime_t(123, 4);
1791 a.snaps[6].name = "qwer";
1792 a.snaps[6].snapid = 6;
1793 a.snaps[6].stamp = utime_t(23423, 4);
1794 o.push_back(new pg_pool_t(a));
1795
1796 a.removed_snaps.insert(2); // not quite valid to combine with snaps!
1797 a.quota_max_bytes = 2473;
1798 a.quota_max_objects = 4374;
1799 a.tiers.insert(0);
1800 a.tiers.insert(1);
1801 a.tier_of = 2;
1802 a.cache_mode = CACHEMODE_WRITEBACK;
1803 a.read_tier = 1;
1804 a.write_tier = 1;
1805 a.hit_set_params = HitSet::Params(new BloomHitSet::Params);
1806 a.hit_set_period = 3600;
1807 a.hit_set_count = 8;
1808 a.min_read_recency_for_promote = 1;
1809 a.min_write_recency_for_promote = 1;
1810 a.hit_set_grade_decay_rate = 50;
1811 a.hit_set_search_last_n = 1;
1812 a.calc_grade_table();
1813 a.set_stripe_width(12345);
1814 a.target_max_bytes = 1238132132;
1815 a.target_max_objects = 1232132;
1816 a.cache_target_dirty_ratio_micro = 187232;
1817 a.cache_target_dirty_high_ratio_micro = 309856;
1818 a.cache_target_full_ratio_micro = 987222;
1819 a.cache_min_flush_age = 231;
1820 a.cache_min_evict_age = 2321;
1821 a.erasure_code_profile = "profile in osdmap";
1822 a.expected_num_objects = 123456;
1823 a.fast_read = false;
c07f9fc5 1824 a.application_metadata = {{"rbd", {{"key", "value"}}}};
7c673cae
FG
1825 o.push_back(new pg_pool_t(a));
1826}
1827
1828ostream& operator<<(ostream& out, const pg_pool_t& p)
1829{
1830 out << p.get_type_name()
1831 << " size " << p.get_size()
1832 << " min_size " << p.get_min_size()
31f18b77 1833 << " crush_rule " << p.get_crush_rule()
7c673cae
FG
1834 << " object_hash " << p.get_object_hash_name()
1835 << " pg_num " << p.get_pg_num()
1836 << " pgp_num " << p.get_pgp_num()
1837 << " last_change " << p.get_last_change();
1838 if (p.get_last_force_op_resend() ||
1839 p.get_last_force_op_resend_preluminous())
1840 out << " lfor " << p.get_last_force_op_resend() << "/"
1841 << p.get_last_force_op_resend_preluminous();
1842 if (p.get_auid())
1843 out << " owner " << p.get_auid();
1844 if (p.flags)
1845 out << " flags " << p.get_flags_string();
1846 if (p.crash_replay_interval)
1847 out << " crash_replay_interval " << p.crash_replay_interval;
1848 if (p.quota_max_bytes)
1849 out << " max_bytes " << p.quota_max_bytes;
1850 if (p.quota_max_objects)
1851 out << " max_objects " << p.quota_max_objects;
1852 if (!p.tiers.empty())
1853 out << " tiers " << p.tiers;
1854 if (p.is_tier())
1855 out << " tier_of " << p.tier_of;
1856 if (p.has_read_tier())
1857 out << " read_tier " << p.read_tier;
1858 if (p.has_write_tier())
1859 out << " write_tier " << p.write_tier;
1860 if (p.cache_mode)
1861 out << " cache_mode " << p.get_cache_mode_name();
1862 if (p.target_max_bytes)
1863 out << " target_bytes " << p.target_max_bytes;
1864 if (p.target_max_objects)
1865 out << " target_objects " << p.target_max_objects;
1866 if (p.hit_set_params.get_type() != HitSet::TYPE_NONE) {
1867 out << " hit_set " << p.hit_set_params
1868 << " " << p.hit_set_period << "s"
1869 << " x" << p.hit_set_count << " decay_rate "
1870 << p.hit_set_grade_decay_rate
1871 << " search_last_n " << p.hit_set_search_last_n;
1872 }
1873 if (p.min_read_recency_for_promote)
1874 out << " min_read_recency_for_promote " << p.min_read_recency_for_promote;
1875 if (p.min_write_recency_for_promote)
1876 out << " min_write_recency_for_promote " << p.min_write_recency_for_promote;
1877 out << " stripe_width " << p.get_stripe_width();
1878 if (p.expected_num_objects)
1879 out << " expected_num_objects " << p.expected_num_objects;
1880 if (p.fast_read)
1881 out << " fast_read " << p.fast_read;
1882 out << p.opts;
c07f9fc5
FG
1883 if (!p.application_metadata.empty()) {
1884 out << " application ";
1885 for (auto it = p.application_metadata.begin();
1886 it != p.application_metadata.end(); ++it) {
1887 if (it != p.application_metadata.begin())
1888 out << ",";
1889 out << it->first;
1890 }
1891 }
7c673cae
FG
1892 return out;
1893}
1894
1895
1896// -- object_stat_sum_t --
1897
1898void object_stat_sum_t::dump(Formatter *f) const
1899{
1900 f->dump_int("num_bytes", num_bytes);
1901 f->dump_int("num_objects", num_objects);
1902 f->dump_int("num_object_clones", num_object_clones);
1903 f->dump_int("num_object_copies", num_object_copies);
1904 f->dump_int("num_objects_missing_on_primary", num_objects_missing_on_primary);
1905 f->dump_int("num_objects_missing", num_objects_missing);
1906 f->dump_int("num_objects_degraded", num_objects_degraded);
1907 f->dump_int("num_objects_misplaced", num_objects_misplaced);
1908 f->dump_int("num_objects_unfound", num_objects_unfound);
1909 f->dump_int("num_objects_dirty", num_objects_dirty);
1910 f->dump_int("num_whiteouts", num_whiteouts);
1911 f->dump_int("num_read", num_rd);
1912 f->dump_int("num_read_kb", num_rd_kb);
1913 f->dump_int("num_write", num_wr);
1914 f->dump_int("num_write_kb", num_wr_kb);
1915 f->dump_int("num_scrub_errors", num_scrub_errors);
1916 f->dump_int("num_shallow_scrub_errors", num_shallow_scrub_errors);
1917 f->dump_int("num_deep_scrub_errors", num_deep_scrub_errors);
1918 f->dump_int("num_objects_recovered", num_objects_recovered);
1919 f->dump_int("num_bytes_recovered", num_bytes_recovered);
1920 f->dump_int("num_keys_recovered", num_keys_recovered);
1921 f->dump_int("num_objects_omap", num_objects_omap);
1922 f->dump_int("num_objects_hit_set_archive", num_objects_hit_set_archive);
1923 f->dump_int("num_bytes_hit_set_archive", num_bytes_hit_set_archive);
1924 f->dump_int("num_flush", num_flush);
1925 f->dump_int("num_flush_kb", num_flush_kb);
1926 f->dump_int("num_evict", num_evict);
1927 f->dump_int("num_evict_kb", num_evict_kb);
1928 f->dump_int("num_promote", num_promote);
1929 f->dump_int("num_flush_mode_high", num_flush_mode_high);
1930 f->dump_int("num_flush_mode_low", num_flush_mode_low);
1931 f->dump_int("num_evict_mode_some", num_evict_mode_some);
1932 f->dump_int("num_evict_mode_full", num_evict_mode_full);
1933 f->dump_int("num_objects_pinned", num_objects_pinned);
1934 f->dump_int("num_legacy_snapsets", num_legacy_snapsets);
1935}
1936
1937void object_stat_sum_t::encode(bufferlist& bl) const
1938{
1939 ENCODE_START(16, 14, bl);
1940#if defined(CEPH_LITTLE_ENDIAN)
1941 bl.append((char *)(&num_bytes), sizeof(object_stat_sum_t));
1942#else
1943 ::encode(num_bytes, bl);
1944 ::encode(num_objects, bl);
1945 ::encode(num_object_clones, bl);
1946 ::encode(num_object_copies, bl);
1947 ::encode(num_objects_missing_on_primary, bl);
1948 ::encode(num_objects_degraded, bl);
1949 ::encode(num_objects_unfound, bl);
1950 ::encode(num_rd, bl);
1951 ::encode(num_rd_kb, bl);
1952 ::encode(num_wr, bl);
1953 ::encode(num_wr_kb, bl);
1954 ::encode(num_scrub_errors, bl);
1955 ::encode(num_objects_recovered, bl);
1956 ::encode(num_bytes_recovered, bl);
1957 ::encode(num_keys_recovered, bl);
1958 ::encode(num_shallow_scrub_errors, bl);
1959 ::encode(num_deep_scrub_errors, bl);
1960 ::encode(num_objects_dirty, bl);
1961 ::encode(num_whiteouts, bl);
1962 ::encode(num_objects_omap, bl);
1963 ::encode(num_objects_hit_set_archive, bl);
1964 ::encode(num_objects_misplaced, bl);
1965 ::encode(num_bytes_hit_set_archive, bl);
1966 ::encode(num_flush, bl);
1967 ::encode(num_flush_kb, bl);
1968 ::encode(num_evict, bl);
1969 ::encode(num_evict_kb, bl);
1970 ::encode(num_promote, bl);
1971 ::encode(num_flush_mode_high, bl);
1972 ::encode(num_flush_mode_low, bl);
1973 ::encode(num_evict_mode_some, bl);
1974 ::encode(num_evict_mode_full, bl);
1975 ::encode(num_objects_pinned, bl);
1976 ::encode(num_objects_missing, bl);
1977 ::encode(num_legacy_snapsets, bl);
1978#endif
1979 ENCODE_FINISH(bl);
1980}
1981
1982void object_stat_sum_t::decode(bufferlist::iterator& bl)
1983{
1984 bool decode_finish = false;
1985 DECODE_START(16, bl);
1986#if defined(CEPH_LITTLE_ENDIAN)
1987 if (struct_v >= 16) {
1988 bl.copy(sizeof(object_stat_sum_t), (char*)(&num_bytes));
1989 decode_finish = true;
1990 }
1991#endif
1992 if (!decode_finish) {
1993 ::decode(num_bytes, bl);
1994 ::decode(num_objects, bl);
1995 ::decode(num_object_clones, bl);
1996 ::decode(num_object_copies, bl);
1997 ::decode(num_objects_missing_on_primary, bl);
1998 ::decode(num_objects_degraded, bl);
1999 ::decode(num_objects_unfound, bl);
2000 ::decode(num_rd, bl);
2001 ::decode(num_rd_kb, bl);
2002 ::decode(num_wr, bl);
2003 ::decode(num_wr_kb, bl);
2004 ::decode(num_scrub_errors, bl);
2005 ::decode(num_objects_recovered, bl);
2006 ::decode(num_bytes_recovered, bl);
2007 ::decode(num_keys_recovered, bl);
2008 ::decode(num_shallow_scrub_errors, bl);
2009 ::decode(num_deep_scrub_errors, bl);
2010 ::decode(num_objects_dirty, bl);
2011 ::decode(num_whiteouts, bl);
2012 ::decode(num_objects_omap, bl);
2013 ::decode(num_objects_hit_set_archive, bl);
2014 ::decode(num_objects_misplaced, bl);
2015 ::decode(num_bytes_hit_set_archive, bl);
2016 ::decode(num_flush, bl);
2017 ::decode(num_flush_kb, bl);
2018 ::decode(num_evict, bl);
2019 ::decode(num_evict_kb, bl);
2020 ::decode(num_promote, bl);
2021 ::decode(num_flush_mode_high, bl);
2022 ::decode(num_flush_mode_low, bl);
2023 ::decode(num_evict_mode_some, bl);
2024 ::decode(num_evict_mode_full, bl);
2025 ::decode(num_objects_pinned, bl);
2026 ::decode(num_objects_missing, bl);
2027 if (struct_v >= 16) {
2028 ::decode(num_legacy_snapsets, bl);
2029 } else {
2030 num_legacy_snapsets = num_object_clones; // upper bound
2031 }
2032 }
2033 DECODE_FINISH(bl);
2034}
2035
2036void object_stat_sum_t::generate_test_instances(list<object_stat_sum_t*>& o)
2037{
2038 object_stat_sum_t a;
2039
2040 a.num_bytes = 1;
2041 a.num_objects = 3;
2042 a.num_object_clones = 4;
2043 a.num_object_copies = 5;
2044 a.num_objects_missing_on_primary = 6;
2045 a.num_objects_missing = 123;
2046 a.num_objects_degraded = 7;
2047 a.num_objects_unfound = 8;
2048 a.num_rd = 9; a.num_rd_kb = 10;
2049 a.num_wr = 11; a.num_wr_kb = 12;
2050 a.num_objects_recovered = 14;
2051 a.num_bytes_recovered = 15;
2052 a.num_keys_recovered = 16;
2053 a.num_deep_scrub_errors = 17;
2054 a.num_shallow_scrub_errors = 18;
2055 a.num_scrub_errors = a.num_deep_scrub_errors + a.num_shallow_scrub_errors;
2056 a.num_objects_dirty = 21;
2057 a.num_whiteouts = 22;
2058 a.num_objects_misplaced = 1232;
2059 a.num_objects_hit_set_archive = 2;
2060 a.num_bytes_hit_set_archive = 27;
2061 a.num_flush = 5;
2062 a.num_flush_kb = 6;
2063 a.num_evict = 7;
2064 a.num_evict_kb = 8;
2065 a.num_promote = 9;
2066 a.num_flush_mode_high = 0;
2067 a.num_flush_mode_low = 1;
2068 a.num_evict_mode_some = 1;
2069 a.num_evict_mode_full = 0;
2070 a.num_objects_pinned = 20;
2071 o.push_back(new object_stat_sum_t(a));
2072}
2073
2074void object_stat_sum_t::add(const object_stat_sum_t& o)
2075{
2076 num_bytes += o.num_bytes;
2077 num_objects += o.num_objects;
2078 num_object_clones += o.num_object_clones;
2079 num_object_copies += o.num_object_copies;
2080 num_objects_missing_on_primary += o.num_objects_missing_on_primary;
2081 num_objects_missing += o.num_objects_missing;
2082 num_objects_degraded += o.num_objects_degraded;
2083 num_objects_misplaced += o.num_objects_misplaced;
2084 num_rd += o.num_rd;
2085 num_rd_kb += o.num_rd_kb;
2086 num_wr += o.num_wr;
2087 num_wr_kb += o.num_wr_kb;
2088 num_objects_unfound += o.num_objects_unfound;
2089 num_scrub_errors += o.num_scrub_errors;
2090 num_shallow_scrub_errors += o.num_shallow_scrub_errors;
2091 num_deep_scrub_errors += o.num_deep_scrub_errors;
2092 num_objects_recovered += o.num_objects_recovered;
2093 num_bytes_recovered += o.num_bytes_recovered;
2094 num_keys_recovered += o.num_keys_recovered;
2095 num_objects_dirty += o.num_objects_dirty;
2096 num_whiteouts += o.num_whiteouts;
2097 num_objects_omap += o.num_objects_omap;
2098 num_objects_hit_set_archive += o.num_objects_hit_set_archive;
2099 num_bytes_hit_set_archive += o.num_bytes_hit_set_archive;
2100 num_flush += o.num_flush;
2101 num_flush_kb += o.num_flush_kb;
2102 num_evict += o.num_evict;
2103 num_evict_kb += o.num_evict_kb;
2104 num_promote += o.num_promote;
2105 num_flush_mode_high += o.num_flush_mode_high;
2106 num_flush_mode_low += o.num_flush_mode_low;
2107 num_evict_mode_some += o.num_evict_mode_some;
2108 num_evict_mode_full += o.num_evict_mode_full;
2109 num_objects_pinned += o.num_objects_pinned;
2110 num_legacy_snapsets += o.num_legacy_snapsets;
2111}
2112
2113void object_stat_sum_t::sub(const object_stat_sum_t& o)
2114{
2115 num_bytes -= o.num_bytes;
2116 num_objects -= o.num_objects;
2117 num_object_clones -= o.num_object_clones;
2118 num_object_copies -= o.num_object_copies;
2119 num_objects_missing_on_primary -= o.num_objects_missing_on_primary;
2120 num_objects_missing -= o.num_objects_missing;
2121 num_objects_degraded -= o.num_objects_degraded;
2122 num_objects_misplaced -= o.num_objects_misplaced;
2123 num_rd -= o.num_rd;
2124 num_rd_kb -= o.num_rd_kb;
2125 num_wr -= o.num_wr;
2126 num_wr_kb -= o.num_wr_kb;
2127 num_objects_unfound -= o.num_objects_unfound;
2128 num_scrub_errors -= o.num_scrub_errors;
2129 num_shallow_scrub_errors -= o.num_shallow_scrub_errors;
2130 num_deep_scrub_errors -= o.num_deep_scrub_errors;
2131 num_objects_recovered -= o.num_objects_recovered;
2132 num_bytes_recovered -= o.num_bytes_recovered;
2133 num_keys_recovered -= o.num_keys_recovered;
2134 num_objects_dirty -= o.num_objects_dirty;
2135 num_whiteouts -= o.num_whiteouts;
2136 num_objects_omap -= o.num_objects_omap;
2137 num_objects_hit_set_archive -= o.num_objects_hit_set_archive;
2138 num_bytes_hit_set_archive -= o.num_bytes_hit_set_archive;
2139 num_flush -= o.num_flush;
2140 num_flush_kb -= o.num_flush_kb;
2141 num_evict -= o.num_evict;
2142 num_evict_kb -= o.num_evict_kb;
2143 num_promote -= o.num_promote;
2144 num_flush_mode_high -= o.num_flush_mode_high;
2145 num_flush_mode_low -= o.num_flush_mode_low;
2146 num_evict_mode_some -= o.num_evict_mode_some;
2147 num_evict_mode_full -= o.num_evict_mode_full;
2148 num_objects_pinned -= o.num_objects_pinned;
2149 num_legacy_snapsets -= o.num_legacy_snapsets;
2150}
2151
2152bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r)
2153{
2154 return
2155 l.num_bytes == r.num_bytes &&
2156 l.num_objects == r.num_objects &&
2157 l.num_object_clones == r.num_object_clones &&
2158 l.num_object_copies == r.num_object_copies &&
2159 l.num_objects_missing_on_primary == r.num_objects_missing_on_primary &&
2160 l.num_objects_missing == r.num_objects_missing &&
2161 l.num_objects_degraded == r.num_objects_degraded &&
2162 l.num_objects_misplaced == r.num_objects_misplaced &&
2163 l.num_objects_unfound == r.num_objects_unfound &&
2164 l.num_rd == r.num_rd &&
2165 l.num_rd_kb == r.num_rd_kb &&
2166 l.num_wr == r.num_wr &&
2167 l.num_wr_kb == r.num_wr_kb &&
2168 l.num_scrub_errors == r.num_scrub_errors &&
2169 l.num_shallow_scrub_errors == r.num_shallow_scrub_errors &&
2170 l.num_deep_scrub_errors == r.num_deep_scrub_errors &&
2171 l.num_objects_recovered == r.num_objects_recovered &&
2172 l.num_bytes_recovered == r.num_bytes_recovered &&
2173 l.num_keys_recovered == r.num_keys_recovered &&
2174 l.num_objects_dirty == r.num_objects_dirty &&
2175 l.num_whiteouts == r.num_whiteouts &&
2176 l.num_objects_omap == r.num_objects_omap &&
2177 l.num_objects_hit_set_archive == r.num_objects_hit_set_archive &&
2178 l.num_bytes_hit_set_archive == r.num_bytes_hit_set_archive &&
2179 l.num_flush == r.num_flush &&
2180 l.num_flush_kb == r.num_flush_kb &&
2181 l.num_evict == r.num_evict &&
2182 l.num_evict_kb == r.num_evict_kb &&
2183 l.num_promote == r.num_promote &&
2184 l.num_flush_mode_high == r.num_flush_mode_high &&
2185 l.num_flush_mode_low == r.num_flush_mode_low &&
2186 l.num_evict_mode_some == r.num_evict_mode_some &&
2187 l.num_evict_mode_full == r.num_evict_mode_full &&
2188 l.num_objects_pinned == r.num_objects_pinned &&
2189 l.num_legacy_snapsets == r.num_legacy_snapsets;
2190}
2191
2192// -- object_stat_collection_t --
2193
2194void object_stat_collection_t::dump(Formatter *f) const
2195{
2196 f->open_object_section("stat_sum");
2197 sum.dump(f);
2198 f->close_section();
2199}
2200
2201void object_stat_collection_t::encode(bufferlist& bl) const
2202{
2203 ENCODE_START(2, 2, bl);
2204 ::encode(sum, bl);
2205 ::encode((__u32)0, bl);
2206 ENCODE_FINISH(bl);
2207}
2208
2209void object_stat_collection_t::decode(bufferlist::iterator& bl)
2210{
2211 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
2212 ::decode(sum, bl);
2213 {
2214 map<string,object_stat_sum_t> cat_sum;
2215 ::decode(cat_sum, bl);
2216 }
2217 DECODE_FINISH(bl);
2218}
2219
2220void object_stat_collection_t::generate_test_instances(list<object_stat_collection_t*>& o)
2221{
2222 object_stat_collection_t a;
2223 o.push_back(new object_stat_collection_t(a));
2224 list<object_stat_sum_t*> l;
2225 object_stat_sum_t::generate_test_instances(l);
2226 for (list<object_stat_sum_t*>::iterator p = l.begin(); p != l.end(); ++p) {
2227 a.add(**p);
2228 o.push_back(new object_stat_collection_t(a));
2229 }
2230}
2231
2232
2233// -- pg_stat_t --
2234
2235bool pg_stat_t::is_acting_osd(int32_t osd, bool primary) const
2236{
2237 if (primary && osd == acting_primary) {
2238 return true;
2239 } else if (!primary) {
2240 for(vector<int32_t>::const_iterator it = acting.begin();
2241 it != acting.end(); ++it)
2242 {
2243 if (*it == osd)
2244 return true;
2245 }
2246 }
2247 return false;
2248}
2249
2250void pg_stat_t::dump(Formatter *f) const
2251{
2252 f->dump_stream("version") << version;
2253 f->dump_stream("reported_seq") << reported_seq;
2254 f->dump_stream("reported_epoch") << reported_epoch;
2255 f->dump_string("state", pg_state_string(state));
2256 f->dump_stream("last_fresh") << last_fresh;
2257 f->dump_stream("last_change") << last_change;
2258 f->dump_stream("last_active") << last_active;
2259 f->dump_stream("last_peered") << last_peered;
2260 f->dump_stream("last_clean") << last_clean;
2261 f->dump_stream("last_became_active") << last_became_active;
2262 f->dump_stream("last_became_peered") << last_became_peered;
2263 f->dump_stream("last_unstale") << last_unstale;
2264 f->dump_stream("last_undegraded") << last_undegraded;
2265 f->dump_stream("last_fullsized") << last_fullsized;
2266 f->dump_unsigned("mapping_epoch", mapping_epoch);
2267 f->dump_stream("log_start") << log_start;
2268 f->dump_stream("ondisk_log_start") << ondisk_log_start;
2269 f->dump_unsigned("created", created);
2270 f->dump_unsigned("last_epoch_clean", last_epoch_clean);
2271 f->dump_stream("parent") << parent;
2272 f->dump_unsigned("parent_split_bits", parent_split_bits);
2273 f->dump_stream("last_scrub") << last_scrub;
2274 f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
2275 f->dump_stream("last_deep_scrub") << last_deep_scrub;
2276 f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
2277 f->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp;
2278 f->dump_int("log_size", log_size);
2279 f->dump_int("ondisk_log_size", ondisk_log_size);
2280 f->dump_bool("stats_invalid", stats_invalid);
2281 f->dump_bool("dirty_stats_invalid", dirty_stats_invalid);
2282 f->dump_bool("omap_stats_invalid", omap_stats_invalid);
2283 f->dump_bool("hitset_stats_invalid", hitset_stats_invalid);
2284 f->dump_bool("hitset_bytes_stats_invalid", hitset_bytes_stats_invalid);
2285 f->dump_bool("pin_stats_invalid", pin_stats_invalid);
2286 stats.dump(f);
2287 f->open_array_section("up");
2288 for (vector<int32_t>::const_iterator p = up.begin(); p != up.end(); ++p)
2289 f->dump_int("osd", *p);
2290 f->close_section();
2291 f->open_array_section("acting");
2292 for (vector<int32_t>::const_iterator p = acting.begin(); p != acting.end(); ++p)
2293 f->dump_int("osd", *p);
2294 f->close_section();
2295 f->open_array_section("blocked_by");
2296 for (vector<int32_t>::const_iterator p = blocked_by.begin();
2297 p != blocked_by.end(); ++p)
2298 f->dump_int("osd", *p);
2299 f->close_section();
2300 f->dump_int("up_primary", up_primary);
2301 f->dump_int("acting_primary", acting_primary);
2302}
2303
2304void pg_stat_t::dump_brief(Formatter *f) const
2305{
2306 f->dump_string("state", pg_state_string(state));
2307 f->open_array_section("up");
2308 for (vector<int32_t>::const_iterator p = up.begin(); p != up.end(); ++p)
2309 f->dump_int("osd", *p);
2310 f->close_section();
2311 f->open_array_section("acting");
2312 for (vector<int32_t>::const_iterator p = acting.begin(); p != acting.end(); ++p)
2313 f->dump_int("osd", *p);
2314 f->close_section();
2315 f->dump_int("up_primary", up_primary);
2316 f->dump_int("acting_primary", acting_primary);
2317}
2318
2319void pg_stat_t::encode(bufferlist &bl) const
2320{
2321 ENCODE_START(22, 22, bl);
2322 ::encode(version, bl);
2323 ::encode(reported_seq, bl);
2324 ::encode(reported_epoch, bl);
2325 ::encode(state, bl);
2326 ::encode(log_start, bl);
2327 ::encode(ondisk_log_start, bl);
2328 ::encode(created, bl);
2329 ::encode(last_epoch_clean, bl);
2330 ::encode(parent, bl);
2331 ::encode(parent_split_bits, bl);
2332 ::encode(last_scrub, bl);
2333 ::encode(last_scrub_stamp, bl);
2334 ::encode(stats, bl);
2335 ::encode(log_size, bl);
2336 ::encode(ondisk_log_size, bl);
2337 ::encode(up, bl);
2338 ::encode(acting, bl);
2339 ::encode(last_fresh, bl);
2340 ::encode(last_change, bl);
2341 ::encode(last_active, bl);
2342 ::encode(last_clean, bl);
2343 ::encode(last_unstale, bl);
2344 ::encode(mapping_epoch, bl);
2345 ::encode(last_deep_scrub, bl);
2346 ::encode(last_deep_scrub_stamp, bl);
2347 ::encode(stats_invalid, bl);
2348 ::encode(last_clean_scrub_stamp, bl);
2349 ::encode(last_became_active, bl);
2350 ::encode(dirty_stats_invalid, bl);
2351 ::encode(up_primary, bl);
2352 ::encode(acting_primary, bl);
2353 ::encode(omap_stats_invalid, bl);
2354 ::encode(hitset_stats_invalid, bl);
2355 ::encode(blocked_by, bl);
2356 ::encode(last_undegraded, bl);
2357 ::encode(last_fullsized, bl);
2358 ::encode(hitset_bytes_stats_invalid, bl);
2359 ::encode(last_peered, bl);
2360 ::encode(last_became_peered, bl);
2361 ::encode(pin_stats_invalid, bl);
2362 ENCODE_FINISH(bl);
2363}
2364
2365void pg_stat_t::decode(bufferlist::iterator &bl)
2366{
2367 bool tmp;
2368 DECODE_START(22, bl);
2369 ::decode(version, bl);
2370 ::decode(reported_seq, bl);
2371 ::decode(reported_epoch, bl);
2372 ::decode(state, bl);
2373 ::decode(log_start, bl);
2374 ::decode(ondisk_log_start, bl);
2375 ::decode(created, bl);
2376 ::decode(last_epoch_clean, bl);
2377 ::decode(parent, bl);
2378 ::decode(parent_split_bits, bl);
2379 ::decode(last_scrub, bl);
2380 ::decode(last_scrub_stamp, bl);
2381 ::decode(stats, bl);
2382 ::decode(log_size, bl);
2383 ::decode(ondisk_log_size, bl);
2384 ::decode(up, bl);
2385 ::decode(acting, bl);
2386 ::decode(last_fresh, bl);
2387 ::decode(last_change, bl);
2388 ::decode(last_active, bl);
2389 ::decode(last_clean, bl);
2390 ::decode(last_unstale, bl);
2391 ::decode(mapping_epoch, bl);
2392 ::decode(last_deep_scrub, bl);
2393 ::decode(last_deep_scrub_stamp, bl);
2394 ::decode(tmp, bl);
2395 stats_invalid = tmp;
2396 ::decode(last_clean_scrub_stamp, bl);
2397 ::decode(last_became_active, bl);
2398 ::decode(tmp, bl);
2399 dirty_stats_invalid = tmp;
2400 ::decode(up_primary, bl);
2401 ::decode(acting_primary, bl);
2402 ::decode(tmp, bl);
2403 omap_stats_invalid = tmp;
2404 ::decode(tmp, bl);
2405 hitset_stats_invalid = tmp;
2406 ::decode(blocked_by, bl);
2407 ::decode(last_undegraded, bl);
2408 ::decode(last_fullsized, bl);
2409 ::decode(tmp, bl);
2410 hitset_bytes_stats_invalid = tmp;
2411 ::decode(last_peered, bl);
2412 ::decode(last_became_peered, bl);
2413 ::decode(tmp, bl);
2414 pin_stats_invalid = tmp;
2415 DECODE_FINISH(bl);
2416}
2417
2418void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o)
2419{
2420 pg_stat_t a;
2421 o.push_back(new pg_stat_t(a));
2422
2423 a.version = eversion_t(1, 3);
2424 a.reported_epoch = 1;
2425 a.reported_seq = 2;
2426 a.state = 123;
2427 a.mapping_epoch = 998;
2428 a.last_fresh = utime_t(1002, 1);
2429 a.last_change = utime_t(1002, 2);
2430 a.last_active = utime_t(1002, 3);
2431 a.last_clean = utime_t(1002, 4);
2432 a.last_unstale = utime_t(1002, 5);
2433 a.last_undegraded = utime_t(1002, 7);
2434 a.last_fullsized = utime_t(1002, 8);
2435 a.log_start = eversion_t(1, 4);
2436 a.ondisk_log_start = eversion_t(1, 5);
2437 a.created = 6;
2438 a.last_epoch_clean = 7;
2439 a.parent = pg_t(1, 2, 3);
2440 a.parent_split_bits = 12;
2441 a.last_scrub = eversion_t(9, 10);
2442 a.last_scrub_stamp = utime_t(11, 12);
2443 a.last_deep_scrub = eversion_t(13, 14);
2444 a.last_deep_scrub_stamp = utime_t(15, 16);
2445 a.last_clean_scrub_stamp = utime_t(17, 18);
2446 list<object_stat_collection_t*> l;
2447 object_stat_collection_t::generate_test_instances(l);
2448 a.stats = *l.back();
2449 a.log_size = 99;
2450 a.ondisk_log_size = 88;
2451 a.up.push_back(123);
2452 a.up_primary = 123;
2453 a.acting.push_back(456);
2454 a.acting_primary = 456;
2455 o.push_back(new pg_stat_t(a));
2456
2457 a.up.push_back(124);
2458 a.up_primary = 124;
2459 a.acting.push_back(124);
2460 a.acting_primary = 124;
2461 a.blocked_by.push_back(155);
2462 a.blocked_by.push_back(156);
2463 o.push_back(new pg_stat_t(a));
2464}
2465
2466bool operator==(const pg_stat_t& l, const pg_stat_t& r)
2467{
2468 return
2469 l.version == r.version &&
2470 l.reported_seq == r.reported_seq &&
2471 l.reported_epoch == r.reported_epoch &&
2472 l.state == r.state &&
2473 l.last_fresh == r.last_fresh &&
2474 l.last_change == r.last_change &&
2475 l.last_active == r.last_active &&
2476 l.last_peered == r.last_peered &&
2477 l.last_clean == r.last_clean &&
2478 l.last_unstale == r.last_unstale &&
2479 l.last_undegraded == r.last_undegraded &&
2480 l.last_fullsized == r.last_fullsized &&
2481 l.log_start == r.log_start &&
2482 l.ondisk_log_start == r.ondisk_log_start &&
2483 l.created == r.created &&
2484 l.last_epoch_clean == r.last_epoch_clean &&
2485 l.parent == r.parent &&
2486 l.parent_split_bits == r.parent_split_bits &&
2487 l.last_scrub == r.last_scrub &&
2488 l.last_deep_scrub == r.last_deep_scrub &&
2489 l.last_scrub_stamp == r.last_scrub_stamp &&
2490 l.last_deep_scrub_stamp == r.last_deep_scrub_stamp &&
2491 l.last_clean_scrub_stamp == r.last_clean_scrub_stamp &&
2492 l.stats == r.stats &&
2493 l.stats_invalid == r.stats_invalid &&
2494 l.log_size == r.log_size &&
2495 l.ondisk_log_size == r.ondisk_log_size &&
2496 l.up == r.up &&
2497 l.acting == r.acting &&
2498 l.mapping_epoch == r.mapping_epoch &&
2499 l.blocked_by == r.blocked_by &&
2500 l.last_became_active == r.last_became_active &&
2501 l.last_became_peered == r.last_became_peered &&
2502 l.dirty_stats_invalid == r.dirty_stats_invalid &&
2503 l.omap_stats_invalid == r.omap_stats_invalid &&
2504 l.hitset_stats_invalid == r.hitset_stats_invalid &&
2505 l.hitset_bytes_stats_invalid == r.hitset_bytes_stats_invalid &&
2506 l.up_primary == r.up_primary &&
2507 l.acting_primary == r.acting_primary &&
2508 l.pin_stats_invalid == r.pin_stats_invalid;
2509}
2510
2511// -- pool_stat_t --
2512
2513void pool_stat_t::dump(Formatter *f) const
2514{
2515 stats.dump(f);
2516 f->dump_int("log_size", log_size);
2517 f->dump_int("ondisk_log_size", ondisk_log_size);
2518 f->dump_int("up", up);
2519 f->dump_int("acting", acting);
2520}
2521
2522void pool_stat_t::encode(bufferlist &bl, uint64_t features) const
2523{
2524 if ((features & CEPH_FEATURE_OSDENC) == 0) {
2525 __u8 v = 4;
2526 ::encode(v, bl);
2527 ::encode(stats, bl);
2528 ::encode(log_size, bl);
2529 ::encode(ondisk_log_size, bl);
2530 return;
2531 }
2532
2533 ENCODE_START(6, 5, bl);
2534 ::encode(stats, bl);
2535 ::encode(log_size, bl);
2536 ::encode(ondisk_log_size, bl);
2537 ::encode(up, bl);
2538 ::encode(acting, bl);
2539 ENCODE_FINISH(bl);
2540}
2541
2542void pool_stat_t::decode(bufferlist::iterator &bl)
2543{
2544 DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl);
2545 if (struct_v >= 4) {
2546 ::decode(stats, bl);
2547 ::decode(log_size, bl);
2548 ::decode(ondisk_log_size, bl);
2549 if (struct_v >= 6) {
2550 ::decode(up, bl);
2551 ::decode(acting, bl);
2552 } else {
2553 up = 0;
2554 acting = 0;
2555 }
2556 } else {
2557 ::decode(stats.sum.num_bytes, bl);
2558 uint64_t num_kb;
2559 ::decode(num_kb, bl);
2560 ::decode(stats.sum.num_objects, bl);
2561 ::decode(stats.sum.num_object_clones, bl);
2562 ::decode(stats.sum.num_object_copies, bl);
2563 ::decode(stats.sum.num_objects_missing_on_primary, bl);
2564 ::decode(stats.sum.num_objects_degraded, bl);
2565 ::decode(log_size, bl);
2566 ::decode(ondisk_log_size, bl);
2567 if (struct_v >= 2) {
2568 ::decode(stats.sum.num_rd, bl);
2569 ::decode(stats.sum.num_rd_kb, bl);
2570 ::decode(stats.sum.num_wr, bl);
2571 ::decode(stats.sum.num_wr_kb, bl);
2572 }
2573 if (struct_v >= 3) {
2574 ::decode(stats.sum.num_objects_unfound, bl);
2575 }
2576 }
2577 DECODE_FINISH(bl);
2578}
2579
2580void pool_stat_t::generate_test_instances(list<pool_stat_t*>& o)
2581{
2582 pool_stat_t a;
2583 o.push_back(new pool_stat_t(a));
2584
2585 list<object_stat_collection_t*> l;
2586 object_stat_collection_t::generate_test_instances(l);
2587 a.stats = *l.back();
2588 a.log_size = 123;
2589 a.ondisk_log_size = 456;
2590 a.acting = 3;
2591 a.up = 4;
2592 o.push_back(new pool_stat_t(a));
2593}
2594
2595
2596// -- pg_history_t --
2597
2598void pg_history_t::encode(bufferlist &bl) const
2599{
31f18b77 2600 ENCODE_START(9, 4, bl);
7c673cae
FG
2601 ::encode(epoch_created, bl);
2602 ::encode(last_epoch_started, bl);
2603 ::encode(last_epoch_clean, bl);
2604 ::encode(last_epoch_split, bl);
2605 ::encode(same_interval_since, bl);
2606 ::encode(same_up_since, bl);
2607 ::encode(same_primary_since, bl);
2608 ::encode(last_scrub, bl);
2609 ::encode(last_scrub_stamp, bl);
2610 ::encode(last_deep_scrub, bl);
2611 ::encode(last_deep_scrub_stamp, bl);
2612 ::encode(last_clean_scrub_stamp, bl);
2613 ::encode(last_epoch_marked_full, bl);
2614 ::encode(last_interval_started, bl);
2615 ::encode(last_interval_clean, bl);
31f18b77 2616 ::encode(epoch_pool_created, bl);
7c673cae
FG
2617 ENCODE_FINISH(bl);
2618}
2619
2620void pg_history_t::decode(bufferlist::iterator &bl)
2621{
31f18b77 2622 DECODE_START_LEGACY_COMPAT_LEN(9, 4, 4, bl);
7c673cae
FG
2623 ::decode(epoch_created, bl);
2624 ::decode(last_epoch_started, bl);
2625 if (struct_v >= 3)
2626 ::decode(last_epoch_clean, bl);
2627 else
2628 last_epoch_clean = last_epoch_started; // careful, it's a lie!
2629 ::decode(last_epoch_split, bl);
2630 ::decode(same_interval_since, bl);
2631 ::decode(same_up_since, bl);
2632 ::decode(same_primary_since, bl);
2633 if (struct_v >= 2) {
2634 ::decode(last_scrub, bl);
2635 ::decode(last_scrub_stamp, bl);
2636 }
2637 if (struct_v >= 5) {
2638 ::decode(last_deep_scrub, bl);
2639 ::decode(last_deep_scrub_stamp, bl);
2640 }
2641 if (struct_v >= 6) {
2642 ::decode(last_clean_scrub_stamp, bl);
2643 }
2644 if (struct_v >= 7) {
2645 ::decode(last_epoch_marked_full, bl);
2646 }
2647 if (struct_v >= 8) {
2648 ::decode(last_interval_started, bl);
2649 ::decode(last_interval_clean, bl);
2650 } else {
2651 if (last_epoch_started >= same_interval_since) {
2652 last_interval_started = same_interval_since;
2653 } else {
2654 last_interval_started = last_epoch_started; // best guess
2655 }
2656 if (last_epoch_clean >= same_interval_since) {
2657 last_interval_clean = same_interval_since;
2658 } else {
2659 last_interval_clean = last_epoch_clean; // best guess
2660 }
2661 }
31f18b77
FG
2662 if (struct_v >= 9) {
2663 ::decode(epoch_pool_created, bl);
2664 } else {
2665 epoch_pool_created = epoch_created;
2666 }
7c673cae
FG
2667 DECODE_FINISH(bl);
2668}
2669
2670void pg_history_t::dump(Formatter *f) const
2671{
2672 f->dump_int("epoch_created", epoch_created);
31f18b77 2673 f->dump_int("epoch_pool_created", epoch_pool_created);
7c673cae
FG
2674 f->dump_int("last_epoch_started", last_epoch_started);
2675 f->dump_int("last_interval_started", last_interval_started);
2676 f->dump_int("last_epoch_clean", last_epoch_clean);
2677 f->dump_int("last_interval_clean", last_interval_clean);
2678 f->dump_int("last_epoch_split", last_epoch_split);
2679 f->dump_int("last_epoch_marked_full", last_epoch_marked_full);
2680 f->dump_int("same_up_since", same_up_since);
2681 f->dump_int("same_interval_since", same_interval_since);
2682 f->dump_int("same_primary_since", same_primary_since);
2683 f->dump_stream("last_scrub") << last_scrub;
2684 f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
2685 f->dump_stream("last_deep_scrub") << last_deep_scrub;
2686 f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
2687 f->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp;
2688}
2689
2690void pg_history_t::generate_test_instances(list<pg_history_t*>& o)
2691{
2692 o.push_back(new pg_history_t);
2693 o.push_back(new pg_history_t);
2694 o.back()->epoch_created = 1;
31f18b77 2695 o.back()->epoch_pool_created = 1;
7c673cae
FG
2696 o.back()->last_epoch_started = 2;
2697 o.back()->last_interval_started = 2;
2698 o.back()->last_epoch_clean = 3;
2699 o.back()->last_interval_clean = 2;
2700 o.back()->last_epoch_split = 4;
2701 o.back()->same_up_since = 5;
2702 o.back()->same_interval_since = 6;
2703 o.back()->same_primary_since = 7;
2704 o.back()->last_scrub = eversion_t(8, 9);
2705 o.back()->last_scrub_stamp = utime_t(10, 11);
2706 o.back()->last_deep_scrub = eversion_t(12, 13);
2707 o.back()->last_deep_scrub_stamp = utime_t(14, 15);
2708 o.back()->last_clean_scrub_stamp = utime_t(16, 17);
2709 o.back()->last_epoch_marked_full = 18;
2710}
2711
2712
2713// -- pg_info_t --
2714
2715void pg_info_t::encode(bufferlist &bl) const
2716{
2717 ENCODE_START(32, 26, bl);
2718 ::encode(pgid.pgid, bl);
2719 ::encode(last_update, bl);
2720 ::encode(last_complete, bl);
2721 ::encode(log_tail, bl);
2722 if (last_backfill_bitwise && !last_backfill.is_max()) {
2723 ::encode(hobject_t(), bl);
2724 } else {
2725 ::encode(last_backfill, bl);
2726 }
2727 ::encode(stats, bl);
2728 history.encode(bl);
2729 ::encode(purged_snaps, bl);
2730 ::encode(last_epoch_started, bl);
2731 ::encode(last_user_version, bl);
2732 ::encode(hit_set, bl);
2733 ::encode(pgid.shard, bl);
2734 ::encode(last_backfill, bl);
2735 ::encode(last_backfill_bitwise, bl);
2736 ::encode(last_interval_started, bl);
2737 ENCODE_FINISH(bl);
2738}
2739
2740void pg_info_t::decode(bufferlist::iterator &bl)
2741{
2742 DECODE_START(32, bl);
2743 ::decode(pgid.pgid, bl);
2744 ::decode(last_update, bl);
2745 ::decode(last_complete, bl);
2746 ::decode(log_tail, bl);
2747 {
2748 hobject_t old_last_backfill;
2749 ::decode(old_last_backfill, bl);
2750 }
2751 ::decode(stats, bl);
2752 history.decode(bl);
2753 ::decode(purged_snaps, bl);
2754 ::decode(last_epoch_started, bl);
2755 ::decode(last_user_version, bl);
2756 ::decode(hit_set, bl);
2757 ::decode(pgid.shard, bl);
2758 ::decode(last_backfill, bl);
2759 ::decode(last_backfill_bitwise, bl);
2760 if (struct_v >= 32) {
2761 ::decode(last_interval_started, bl);
2762 } else {
2763 last_interval_started = last_epoch_started;
2764 }
2765 DECODE_FINISH(bl);
2766}
2767
2768// -- pg_info_t --
2769
2770void pg_info_t::dump(Formatter *f) const
2771{
2772 f->dump_stream("pgid") << pgid;
2773 f->dump_stream("last_update") << last_update;
2774 f->dump_stream("last_complete") << last_complete;
2775 f->dump_stream("log_tail") << log_tail;
2776 f->dump_int("last_user_version", last_user_version);
2777 f->dump_stream("last_backfill") << last_backfill;
2778 f->dump_int("last_backfill_bitwise", (int)last_backfill_bitwise);
2779 f->open_array_section("purged_snaps");
2780 for (interval_set<snapid_t>::const_iterator i=purged_snaps.begin();
2781 i != purged_snaps.end();
2782 ++i) {
2783 f->open_object_section("purged_snap_interval");
2784 f->dump_stream("start") << i.get_start();
2785 f->dump_stream("length") << i.get_len();
2786 f->close_section();
2787 }
2788 f->close_section();
2789 f->open_object_section("history");
2790 history.dump(f);
2791 f->close_section();
2792 f->open_object_section("stats");
2793 stats.dump(f);
2794 f->close_section();
2795
2796 f->dump_int("empty", is_empty());
2797 f->dump_int("dne", dne());
2798 f->dump_int("incomplete", is_incomplete());
2799 f->dump_int("last_epoch_started", last_epoch_started);
2800
2801 f->open_object_section("hit_set_history");
2802 hit_set.dump(f);
2803 f->close_section();
2804}
2805
2806void pg_info_t::generate_test_instances(list<pg_info_t*>& o)
2807{
2808 o.push_back(new pg_info_t);
2809 o.push_back(new pg_info_t);
2810 list<pg_history_t*> h;
2811 pg_history_t::generate_test_instances(h);
2812 o.back()->history = *h.back();
2813 o.back()->pgid = spg_t(pg_t(1, 2, -1), shard_id_t::NO_SHARD);
2814 o.back()->last_update = eversion_t(3, 4);
2815 o.back()->last_complete = eversion_t(5, 6);
2816 o.back()->last_user_version = 2;
2817 o.back()->log_tail = eversion_t(7, 8);
2818 o.back()->last_backfill = hobject_t(object_t("objname"), "key", 123, 456, -1, "");
2819 o.back()->last_backfill_bitwise = true;
2820 {
2821 list<pg_stat_t*> s;
2822 pg_stat_t::generate_test_instances(s);
2823 o.back()->stats = *s.back();
2824 }
2825 {
2826 list<pg_hit_set_history_t*> s;
2827 pg_hit_set_history_t::generate_test_instances(s);
2828 o.back()->hit_set = *s.back();
2829 }
2830}
2831
2832// -- pg_notify_t --
2833void pg_notify_t::encode(bufferlist &bl) const
2834{
2835 ENCODE_START(2, 2, bl);
2836 ::encode(query_epoch, bl);
2837 ::encode(epoch_sent, bl);
2838 ::encode(info, bl);
2839 ::encode(to, bl);
2840 ::encode(from, bl);
2841 ENCODE_FINISH(bl);
2842}
2843
2844void pg_notify_t::decode(bufferlist::iterator &bl)
2845{
2846 DECODE_START(2, bl);
2847 ::decode(query_epoch, bl);
2848 ::decode(epoch_sent, bl);
2849 ::decode(info, bl);
2850 ::decode(to, bl);
2851 ::decode(from, bl);
2852 DECODE_FINISH(bl);
2853}
2854
2855void pg_notify_t::dump(Formatter *f) const
2856{
2857 f->dump_int("from", from);
2858 f->dump_int("to", to);
2859 f->dump_unsigned("query_epoch", query_epoch);
2860 f->dump_unsigned("epoch_sent", epoch_sent);
2861 {
2862 f->open_object_section("info");
2863 info.dump(f);
2864 f->close_section();
2865 }
2866}
2867
2868void pg_notify_t::generate_test_instances(list<pg_notify_t*>& o)
2869{
2870 o.push_back(new pg_notify_t(shard_id_t(3), shard_id_t::NO_SHARD, 1, 1, pg_info_t()));
2871 o.push_back(new pg_notify_t(shard_id_t(0), shard_id_t(0), 3, 10, pg_info_t()));
2872}
2873
2874ostream &operator<<(ostream &lhs, const pg_notify_t &notify)
2875{
2876 lhs << "(query:" << notify.query_epoch
2877 << " sent:" << notify.epoch_sent
2878 << " " << notify.info;
2879 if (notify.from != shard_id_t::NO_SHARD ||
2880 notify.to != shard_id_t::NO_SHARD)
2881 lhs << " " << (unsigned)notify.from
2882 << "->" << (unsigned)notify.to;
2883 return lhs << ")";
2884}
2885
2886// -- pg_interval_t --
2887
2888void PastIntervals::pg_interval_t::encode(bufferlist& bl) const
2889{
2890 ENCODE_START(4, 2, bl);
2891 ::encode(first, bl);
2892 ::encode(last, bl);
2893 ::encode(up, bl);
2894 ::encode(acting, bl);
2895 ::encode(maybe_went_rw, bl);
2896 ::encode(primary, bl);
2897 ::encode(up_primary, bl);
2898 ENCODE_FINISH(bl);
2899}
2900
2901void PastIntervals::pg_interval_t::decode(bufferlist::iterator& bl)
2902{
2903 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
2904 ::decode(first, bl);
2905 ::decode(last, bl);
2906 ::decode(up, bl);
2907 ::decode(acting, bl);
2908 ::decode(maybe_went_rw, bl);
2909 if (struct_v >= 3) {
2910 ::decode(primary, bl);
2911 } else {
2912 if (acting.size())
2913 primary = acting[0];
2914 }
2915 if (struct_v >= 4) {
2916 ::decode(up_primary, bl);
2917 } else {
2918 if (up.size())
2919 up_primary = up[0];
2920 }
2921 DECODE_FINISH(bl);
2922}
2923
2924void PastIntervals::pg_interval_t::dump(Formatter *f) const
2925{
2926 f->dump_unsigned("first", first);
2927 f->dump_unsigned("last", last);
2928 f->dump_int("maybe_went_rw", maybe_went_rw ? 1 : 0);
2929 f->open_array_section("up");
2930 for (vector<int>::const_iterator p = up.begin(); p != up.end(); ++p)
2931 f->dump_int("osd", *p);
2932 f->close_section();
2933 f->open_array_section("acting");
2934 for (vector<int>::const_iterator p = acting.begin(); p != acting.end(); ++p)
2935 f->dump_int("osd", *p);
2936 f->close_section();
2937 f->dump_int("primary", primary);
2938 f->dump_int("up_primary", up_primary);
2939}
2940
2941void PastIntervals::pg_interval_t::generate_test_instances(list<pg_interval_t*>& o)
2942{
2943 o.push_back(new pg_interval_t);
2944 o.push_back(new pg_interval_t);
2945 o.back()->up.push_back(1);
2946 o.back()->acting.push_back(2);
2947 o.back()->acting.push_back(3);
2948 o.back()->first = 4;
2949 o.back()->last = 5;
2950 o.back()->maybe_went_rw = true;
2951}
2952
2953WRITE_CLASS_ENCODER(PastIntervals::pg_interval_t)
2954
2955class pi_simple_rep : public PastIntervals::interval_rep {
2956 map<epoch_t, PastIntervals::pg_interval_t> interval_map;
2957
2958 pi_simple_rep(
2959 bool ec_pool,
2960 std::list<PastIntervals::pg_interval_t> &&intervals) {
2961 for (auto &&i: intervals)
2962 add_interval(ec_pool, i);
2963 }
2964
2965public:
2966 pi_simple_rep() = default;
2967 pi_simple_rep(const pi_simple_rep &) = default;
2968 pi_simple_rep(pi_simple_rep &&) = default;
2969 pi_simple_rep &operator=(pi_simple_rep &&) = default;
2970 pi_simple_rep &operator=(const pi_simple_rep &) = default;
2971
2972 size_t size() const override { return interval_map.size(); }
2973 bool empty() const override { return interval_map.empty(); }
2974 void clear() override { interval_map.clear(); }
2975 pair<epoch_t, epoch_t> get_bounds() const override {
2976 auto iter = interval_map.begin();
2977 if (iter != interval_map.end()) {
2978 auto riter = interval_map.rbegin();
2979 return make_pair(
2980 iter->second.first,
2981 riter->second.last + 1);
2982 } else {
2983 return make_pair(0, 0);
2984 }
2985 }
2986 set<pg_shard_t> get_all_participants(
2987 bool ec_pool) const override {
2988 set<pg_shard_t> all_participants;
2989
2990 // We need to decide who might have unfound objects that we need
2991 auto p = interval_map.rbegin();
2992 auto end = interval_map.rend();
2993 for (; p != end; ++p) {
2994 const PastIntervals::pg_interval_t &interval(p->second);
2995 // If nothing changed, we don't care about this interval.
2996 if (!interval.maybe_went_rw)
2997 continue;
2998
2999 int i = 0;
3000 std::vector<int>::const_iterator a = interval.acting.begin();
3001 std::vector<int>::const_iterator a_end = interval.acting.end();
3002 for (; a != a_end; ++a, ++i) {
3003 pg_shard_t shard(*a, ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD);
3004 if (*a != CRUSH_ITEM_NONE)
3005 all_participants.insert(shard);
3006 }
3007 }
3008 return all_participants;
3009 }
3010 void add_interval(
3011 bool ec_pool,
3012 const PastIntervals::pg_interval_t &interval) override {
3013 interval_map[interval.first] = interval;
3014 }
3015 unique_ptr<PastIntervals::interval_rep> clone() const override {
3016 return unique_ptr<PastIntervals::interval_rep>(new pi_simple_rep(*this));
3017 }
3018 ostream &print(ostream &out) const override {
3019 return out << interval_map;
3020 }
3021 void encode(bufferlist &bl) const override {
3022 ::encode(interval_map, bl);
3023 }
3024 void decode(bufferlist::iterator &bl) override {
3025 ::decode(interval_map, bl);
3026 }
3027 void dump(Formatter *f) const override {
3028 f->open_array_section("PastIntervals::compat_rep");
3029 for (auto &&i: interval_map) {
3030 f->open_object_section("pg_interval_t");
3031 f->dump_int("epoch", i.first);
3032 f->open_object_section("interval");
3033 i.second.dump(f);
3034 f->close_section();
3035 f->close_section();
3036 }
3037 f->close_section();
3038 }
3039 bool is_classic() const override {
3040 return true;
3041 }
3042 static void generate_test_instances(list<pi_simple_rep*> &o) {
3043 using ival = PastIntervals::pg_interval_t;
3044 using ivallst = std::list<ival>;
3045 o.push_back(
3046 new pi_simple_rep(
3047 true, ivallst
3048 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3049 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3050 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3051 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3052 }));
3053 o.push_back(
3054 new pi_simple_rep(
3055 false, ivallst
3056 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3057 , ival{{ 1, 2}, { 1, 2}, 20, 30, true, 1, 1}
3058 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3059 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3060 }));
3061 o.push_back(
3062 new pi_simple_rep(
3063 true, ivallst
3064 { ival{{2, 1, 0}, {2, 1, 0}, 10, 20, true, 1, 1}
3065 , ival{{ 0, 2}, { 0, 2}, 21, 30, true, 0, 0}
3066 , ival{{ 0, 2}, {2, 0}, 31, 35, true, 2, 2}
3067 , ival{{ 0, 2}, { 0, 2}, 36, 50, true, 0, 0}
3068 }));
3069 return;
3070 }
3071 void iterate_mayberw_back_to(
3072 bool ec_pool,
3073 epoch_t les,
3074 std::function<void(epoch_t, const set<pg_shard_t> &)> &&f) const override {
3075 for (auto i = interval_map.rbegin(); i != interval_map.rend(); ++i) {
3076 if (!i->second.maybe_went_rw)
3077 continue;
3078 if (i->second.last < les)
3079 break;
3080 set<pg_shard_t> actingset;
3081 for (unsigned j = 0; j < i->second.acting.size(); ++j) {
3082 if (i->second.acting[j] == CRUSH_ITEM_NONE)
3083 continue;
3084 actingset.insert(
3085 pg_shard_t(
3086 i->second.acting[j],
3087 ec_pool ? shard_id_t(j) : shard_id_t::NO_SHARD));
3088 }
3089 f(i->second.first, actingset);
3090 }
3091 }
3092
3093 bool has_full_intervals() const override { return true; }
3094 void iterate_all_intervals(
3095 std::function<void(const PastIntervals::pg_interval_t &)> &&f
3096 ) const override {
3097 for (auto &&i: interval_map) {
3098 f(i.second);
3099 }
3100 }
3101 virtual ~pi_simple_rep() override {}
3102};
3103
3104/**
3105 * pi_compact_rep
3106 *
3107 * PastIntervals only needs to be able to answer two questions:
3108 * 1) Where should the primary look for unfound objects?
3109 * 2) List a set of subsets of the OSDs such that contacting at least
3110 * one from each subset guarrantees we speak to at least one witness
3111 * of any completed write.
3112 *
3113 * Crucially, 2) does not require keeping *all* past intervals. Certainly,
3114 * we don't need to keep any where maybe_went_rw would be false. We also
3115 * needn't keep two intervals where the actingset in one is a subset
3116 * of the other (only need to keep the smaller of the two sets). In order
3117 * to accurately trim the set of intervals as last_epoch_started changes
3118 * without rebuilding the set from scratch, we'll retain the larger set
3119 * if it in an older interval.
3120 */
3121struct compact_interval_t {
3122 epoch_t first;
3123 epoch_t last;
3124 set<pg_shard_t> acting;
3125 bool supersedes(const compact_interval_t &other) {
3126 for (auto &&i: acting) {
3127 if (!other.acting.count(i))
3128 return false;
3129 }
3130 return true;
3131 }
3132 void dump(Formatter *f) const {
3133 f->open_object_section("compact_interval_t");
3134 f->dump_stream("first") << first;
3135 f->dump_stream("last") << last;
3136 f->dump_stream("acting") << acting;
3137 f->close_section();
3138 }
3139 void encode(bufferlist &bl) const {
3140 ENCODE_START(1, 1, bl);
3141 ::encode(first, bl);
3142 ::encode(last, bl);
3143 ::encode(acting, bl);
3144 ENCODE_FINISH(bl);
3145 }
3146 void decode(bufferlist::iterator &bl) {
3147 DECODE_START(1, bl);
3148 ::decode(first, bl);
3149 ::decode(last, bl);
3150 ::decode(acting, bl);
3151 DECODE_FINISH(bl);
3152 }
3153 static void generate_test_instances(list<compact_interval_t*> & o) {
3154 /* Not going to be used, we'll generate pi_compact_rep directly */
3155 }
3156};
3157ostream &operator<<(ostream &o, const compact_interval_t &rhs)
3158{
3159 return o << "([" << rhs.first << "," << rhs.last
3160 << "] acting " << rhs.acting << ")";
3161}
3162WRITE_CLASS_ENCODER(compact_interval_t)
3163
3164class pi_compact_rep : public PastIntervals::interval_rep {
3165 epoch_t first = 0;
3166 epoch_t last = 0; // inclusive
3167 set<pg_shard_t> all_participants;
3168 list<compact_interval_t> intervals;
3169 pi_compact_rep(
3170 bool ec_pool,
3171 std::list<PastIntervals::pg_interval_t> &&intervals) {
3172 for (auto &&i: intervals)
3173 add_interval(ec_pool, i);
3174 }
3175public:
3176 pi_compact_rep() = default;
3177 pi_compact_rep(const pi_compact_rep &) = default;
3178 pi_compact_rep(pi_compact_rep &&) = default;
3179 pi_compact_rep &operator=(const pi_compact_rep &) = default;
3180 pi_compact_rep &operator=(pi_compact_rep &&) = default;
3181
3182 size_t size() const override { return intervals.size(); }
3183 bool empty() const override {
3184 return first > last || (first == 0 && last == 0);
3185 }
3186 void clear() override {
3187 *this = pi_compact_rep();
3188 }
3189 pair<epoch_t, epoch_t> get_bounds() const override {
3190 return make_pair(first, last + 1);
3191 }
3192 set<pg_shard_t> get_all_participants(
3193 bool ec_pool) const override {
3194 return all_participants;
3195 }
3196 void add_interval(
3197 bool ec_pool, const PastIntervals::pg_interval_t &interval) override {
3198 if (first == 0)
3199 first = interval.first;
3200 assert(interval.last > last);
3201 last = interval.last;
3202 set<pg_shard_t> acting;
3203 for (unsigned i = 0; i < interval.acting.size(); ++i) {
3204 if (interval.acting[i] == CRUSH_ITEM_NONE)
3205 continue;
3206 acting.insert(
3207 pg_shard_t(
3208 interval.acting[i],
3209 ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD));
3210 }
3211 all_participants.insert(acting.begin(), acting.end());
3212 if (!interval.maybe_went_rw)
3213 return;
3214 intervals.push_back(
3215 compact_interval_t{interval.first, interval.last, acting});
3216 auto plast = intervals.end();
3217 --plast;
3218 for (auto cur = intervals.begin(); cur != plast; ) {
3219 if (plast->supersedes(*cur)) {
3220 intervals.erase(cur++);
3221 } else {
3222 ++cur;
3223 }
3224 }
3225 }
3226 unique_ptr<PastIntervals::interval_rep> clone() const override {
3227 return unique_ptr<PastIntervals::interval_rep>(new pi_compact_rep(*this));
3228 }
3229 ostream &print(ostream &out) const override {
3230 return out << "([" << first << "," << last
3231 << "] intervals=" << intervals << ")";
3232 }
3233 void encode(bufferlist &bl) const override {
3234 ENCODE_START(1, 1, bl);
3235 ::encode(first, bl);
3236 ::encode(last, bl);
3237 ::encode(all_participants, bl);
3238 ::encode(intervals, bl);
3239 ENCODE_FINISH(bl);
3240 }
3241 void decode(bufferlist::iterator &bl) override {
3242 DECODE_START(1, bl);
3243 ::decode(first, bl);
3244 ::decode(last, bl);
3245 ::decode(all_participants, bl);
3246 ::decode(intervals, bl);
3247 DECODE_FINISH(bl);
3248 }
3249 void dump(Formatter *f) const override {
3250 f->open_object_section("PastIntervals::compact_rep");
3251 f->dump_stream("first") << first;
3252 f->dump_stream("last") << last;
3253 f->open_array_section("all_participants");
3254 for (auto& i : all_participants) {
3255 f->dump_object("pg_shard", i);
3256 }
3257 f->close_section();
3258 f->open_array_section("intervals");
3259 for (auto &&i: intervals) {
3260 i.dump(f);
3261 }
3262 f->close_section();
3263 f->close_section();
3264 }
3265 bool is_classic() const override {
3266 return false;
3267 }
3268 static void generate_test_instances(list<pi_compact_rep*> &o) {
3269 using ival = PastIntervals::pg_interval_t;
3270 using ivallst = std::list<ival>;
3271 o.push_back(
3272 new pi_compact_rep(
3273 true, ivallst
3274 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3275 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3276 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3277 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3278 }));
3279 o.push_back(
3280 new pi_compact_rep(
3281 false, ivallst
3282 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3283 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3284 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3285 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3286 }));
3287 o.push_back(
3288 new pi_compact_rep(
3289 true, ivallst
3290 { ival{{2, 1, 0}, {2, 1, 0}, 10, 20, true, 1, 1}
3291 , ival{{ 0, 2}, { 0, 2}, 21, 30, true, 0, 0}
3292 , ival{{ 0, 2}, {2, 0}, 31, 35, true, 2, 2}
3293 , ival{{ 0, 2}, { 0, 2}, 36, 50, true, 0, 0}
3294 }));
3295 }
3296 void iterate_mayberw_back_to(
3297 bool ec_pool,
3298 epoch_t les,
3299 std::function<void(epoch_t, const set<pg_shard_t> &)> &&f) const override {
3300 for (auto i = intervals.rbegin(); i != intervals.rend(); ++i) {
3301 if (i->last < les)
3302 break;
3303 f(i->first, i->acting);
3304 }
3305 }
3306 virtual ~pi_compact_rep() override {}
3307};
3308WRITE_CLASS_ENCODER(pi_compact_rep)
3309
3310PastIntervals::PastIntervals(const PastIntervals &rhs)
3311 : past_intervals(rhs.past_intervals ?
3312 rhs.past_intervals->clone() :
3313 nullptr) {}
3314
3315PastIntervals &PastIntervals::operator=(const PastIntervals &rhs)
3316{
3317 PastIntervals other(rhs);
31f18b77 3318 swap(other);
7c673cae
FG
3319 return *this;
3320}
3321
3322ostream& operator<<(ostream& out, const PastIntervals &i)
3323{
3324 if (i.past_intervals) {
3325 return i.past_intervals->print(out);
3326 } else {
3327 return out << "(empty)";
3328 }
3329}
3330
3331ostream& operator<<(ostream& out, const PastIntervals::PriorSet &i)
3332{
3333 return out << "PriorSet("
3334 << "ec_pool: " << i.ec_pool
3335 << ", probe: " << i.probe
3336 << ", down: " << i.down
3337 << ", blocked_by: " << i.blocked_by
3338 << ", pg_down: " << i.pg_down
3339 << ")";
3340}
3341
3342void PastIntervals::decode(bufferlist::iterator &bl)
3343{
3344 DECODE_START(1, bl);
3345 __u8 type = 0;
3346 ::decode(type, bl);
3347 switch (type) {
3348 case 0:
3349 break;
3350 case 1:
3351 past_intervals.reset(new pi_simple_rep);
3352 past_intervals->decode(bl);
3353 break;
3354 case 2:
3355 past_intervals.reset(new pi_compact_rep);
3356 past_intervals->decode(bl);
3357 break;
3358 }
3359 DECODE_FINISH(bl);
3360}
3361
3362void PastIntervals::decode_classic(bufferlist::iterator &bl)
3363{
3364 past_intervals.reset(new pi_simple_rep);
3365 past_intervals->decode(bl);
3366}
3367
3368void PastIntervals::generate_test_instances(list<PastIntervals*> &o)
3369{
3370 {
3371 list<pi_simple_rep *> simple;
3372 pi_simple_rep::generate_test_instances(simple);
3373 for (auto &&i: simple) {
3374 // takes ownership of contents
3375 o.push_back(new PastIntervals(i));
3376 }
3377 }
3378 {
3379 list<pi_compact_rep *> compact;
3380 pi_compact_rep::generate_test_instances(compact);
3381 for (auto &&i: compact) {
3382 // takes ownership of contents
3383 o.push_back(new PastIntervals(i));
3384 }
3385 }
3386 return;
3387}
3388
3389void PastIntervals::update_type(bool ec_pool, bool compact)
3390{
3391 if (!compact) {
3392 if (!past_intervals) {
3393 past_intervals.reset(new pi_simple_rep);
3394 } else {
3395 // we never convert from compact back to classic
3396 assert(is_classic());
3397 }
3398 } else {
3399 if (!past_intervals) {
3400 past_intervals.reset(new pi_compact_rep);
3401 } else if (is_classic()) {
3402 auto old = std::move(past_intervals);
3403 past_intervals.reset(new pi_compact_rep);
3404 assert(old->has_full_intervals());
3405 old->iterate_all_intervals([&](const pg_interval_t &i) {
3406 past_intervals->add_interval(ec_pool, i);
3407 });
3408 }
3409 }
3410}
3411
3412void PastIntervals::update_type_from_map(bool ec_pool, const OSDMap &osdmap)
3413{
31f18b77 3414 update_type(ec_pool, osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS);
7c673cae
FG
3415}
3416
3417bool PastIntervals::is_new_interval(
3418 int old_acting_primary,
3419 int new_acting_primary,
3420 const vector<int> &old_acting,
3421 const vector<int> &new_acting,
3422 int old_up_primary,
3423 int new_up_primary,
3424 const vector<int> &old_up,
3425 const vector<int> &new_up,
3426 int old_size,
3427 int new_size,
3428 int old_min_size,
3429 int new_min_size,
3430 unsigned old_pg_num,
3431 unsigned new_pg_num,
3432 bool old_sort_bitwise,
3433 bool new_sort_bitwise,
c07f9fc5
FG
3434 bool old_recovery_deletes,
3435 bool new_recovery_deletes,
7c673cae
FG
3436 pg_t pgid) {
3437 return old_acting_primary != new_acting_primary ||
3438 new_acting != old_acting ||
3439 old_up_primary != new_up_primary ||
3440 new_up != old_up ||
3441 old_min_size != new_min_size ||
3442 old_size != new_size ||
3443 pgid.is_split(old_pg_num, new_pg_num, 0) ||
c07f9fc5
FG
3444 old_sort_bitwise != new_sort_bitwise ||
3445 old_recovery_deletes != new_recovery_deletes;
7c673cae
FG
3446}
3447
3448bool PastIntervals::is_new_interval(
3449 int old_acting_primary,
3450 int new_acting_primary,
3451 const vector<int> &old_acting,
3452 const vector<int> &new_acting,
3453 int old_up_primary,
3454 int new_up_primary,
3455 const vector<int> &old_up,
3456 const vector<int> &new_up,
3457 OSDMapRef osdmap,
3458 OSDMapRef lastmap,
3459 pg_t pgid) {
3460 return !(lastmap->get_pools().count(pgid.pool())) ||
3461 is_new_interval(old_acting_primary,
3462 new_acting_primary,
3463 old_acting,
3464 new_acting,
3465 old_up_primary,
3466 new_up_primary,
3467 old_up,
3468 new_up,
3469 lastmap->get_pools().find(pgid.pool())->second.size,
3470 osdmap->get_pools().find(pgid.pool())->second.size,
3471 lastmap->get_pools().find(pgid.pool())->second.min_size,
3472 osdmap->get_pools().find(pgid.pool())->second.min_size,
3473 lastmap->get_pg_num(pgid.pool()),
3474 osdmap->get_pg_num(pgid.pool()),
3475 lastmap->test_flag(CEPH_OSDMAP_SORTBITWISE),
3476 osdmap->test_flag(CEPH_OSDMAP_SORTBITWISE),
c07f9fc5
FG
3477 lastmap->test_flag(CEPH_OSDMAP_RECOVERY_DELETES),
3478 osdmap->test_flag(CEPH_OSDMAP_RECOVERY_DELETES),
7c673cae
FG
3479 pgid);
3480}
3481
3482bool PastIntervals::check_new_interval(
3483 int old_acting_primary,
3484 int new_acting_primary,
3485 const vector<int> &old_acting,
3486 const vector<int> &new_acting,
3487 int old_up_primary,
3488 int new_up_primary,
3489 const vector<int> &old_up,
3490 const vector<int> &new_up,
3491 epoch_t same_interval_since,
3492 epoch_t last_epoch_clean,
3493 OSDMapRef osdmap,
3494 OSDMapRef lastmap,
3495 pg_t pgid,
3496 IsPGRecoverablePredicate *could_have_gone_active,
3497 PastIntervals *past_intervals,
3498 std::ostream *out)
3499{
3500 /*
3501 * We have to be careful to gracefully deal with situations like
3502 * so. Say we have a power outage or something that takes out both
3503 * OSDs, but the monitor doesn't mark them down in the same epoch.
3504 * The history may look like
3505 *
3506 * 1: A B
3507 * 2: B
3508 * 3: let's say B dies for good, too (say, from the power spike)
3509 * 4: A
3510 *
3511 * which makes it look like B may have applied updates to the PG
3512 * that we need in order to proceed. This sucks...
3513 *
3514 * To minimize the risk of this happening, we CANNOT go active if
3515 * _any_ OSDs in the prior set are down until we send an MOSDAlive
3516 * to the monitor such that the OSDMap sets osd_up_thru to an epoch.
3517 * Then, we have something like
3518 *
3519 * 1: A B
3520 * 2: B up_thru[B]=0
3521 * 3:
3522 * 4: A
3523 *
3524 * -> we can ignore B, bc it couldn't have gone active (up_thru still 0).
3525 *
3526 * or,
3527 *
3528 * 1: A B
3529 * 2: B up_thru[B]=0
3530 * 3: B up_thru[B]=2
3531 * 4:
3532 * 5: A
3533 *
3534 * -> we must wait for B, bc it was alive through 2, and could have
3535 * written to the pg.
3536 *
3537 * If B is really dead, then an administrator will need to manually
3538 * intervene by marking the OSD as "lost."
3539 */
3540
3541 // remember past interval
3542 // NOTE: a change in the up set primary triggers an interval
3543 // change, even though the interval members in the pg_interval_t
3544 // do not change.
3545 assert(past_intervals);
3546 assert(past_intervals->past_intervals);
3547 if (is_new_interval(
3548 old_acting_primary,
3549 new_acting_primary,
3550 old_acting,
3551 new_acting,
3552 old_up_primary,
3553 new_up_primary,
3554 old_up,
3555 new_up,
3556 osdmap,
3557 lastmap,
3558 pgid)) {
3559 pg_interval_t i;
3560 i.first = same_interval_since;
3561 i.last = osdmap->get_epoch() - 1;
3562 assert(i.first <= i.last);
3563 i.acting = old_acting;
3564 i.up = old_up;
3565 i.primary = old_acting_primary;
3566 i.up_primary = old_up_primary;
3567
3568 unsigned num_acting = 0;
3569 for (vector<int>::const_iterator p = i.acting.begin(); p != i.acting.end();
3570 ++p)
3571 if (*p != CRUSH_ITEM_NONE)
3572 ++num_acting;
3573
3574 assert(lastmap->get_pools().count(pgid.pool()));
3575 const pg_pool_t& old_pg_pool = lastmap->get_pools().find(pgid.pool())->second;
3576 set<pg_shard_t> old_acting_shards;
3577 old_pg_pool.convert_to_pg_shards(old_acting, &old_acting_shards);
3578
3579 if (num_acting &&
3580 i.primary != -1 &&
3581 num_acting >= old_pg_pool.min_size &&
3582 (*could_have_gone_active)(old_acting_shards)) {
3583 if (out)
3584 *out << __func__ << " " << i
3585 << ": not rw,"
3586 << " up_thru " << lastmap->get_up_thru(i.primary)
3587 << " up_from " << lastmap->get_up_from(i.primary)
3588 << " last_epoch_clean " << last_epoch_clean
3589 << std::endl;
3590 if (lastmap->get_up_thru(i.primary) >= i.first &&
3591 lastmap->get_up_from(i.primary) <= i.first) {
3592 i.maybe_went_rw = true;
3593 if (out)
3594 *out << __func__ << " " << i
3595 << " : primary up " << lastmap->get_up_from(i.primary)
3596 << "-" << lastmap->get_up_thru(i.primary)
3597 << " includes interval"
3598 << std::endl;
3599 } else if (last_epoch_clean >= i.first &&
3600 last_epoch_clean <= i.last) {
3601 // If the last_epoch_clean is included in this interval, then
3602 // the pg must have been rw (for recovery to have completed).
3603 // This is important because we won't know the _real_
3604 // first_epoch because we stop at last_epoch_clean, and we
3605 // don't want the oldest interval to randomly have
3606 // maybe_went_rw false depending on the relative up_thru vs
3607 // last_epoch_clean timing.
3608 i.maybe_went_rw = true;
3609 if (out)
3610 *out << __func__ << " " << i
3611 << " : includes last_epoch_clean " << last_epoch_clean
3612 << " and presumed to have been rw"
3613 << std::endl;
3614 } else {
3615 i.maybe_went_rw = false;
3616 if (out)
3617 *out << __func__ << " " << i
3618 << " : primary up " << lastmap->get_up_from(i.primary)
3619 << "-" << lastmap->get_up_thru(i.primary)
3620 << " does not include interval"
3621 << std::endl;
3622 }
3623 } else {
3624 i.maybe_went_rw = false;
3625 if (out)
3626 *out << __func__ << " " << i << " : acting set is too small" << std::endl;
3627 }
3628 past_intervals->past_intervals->add_interval(old_pg_pool.ec_pool(), i);
3629 return true;
3630 } else {
3631 return false;
3632 }
3633}
3634
3635
3636// true if the given map affects the prior set
3637bool PastIntervals::PriorSet::affected_by_map(
3638 const OSDMap &osdmap,
3639 const DoutPrefixProvider *dpp) const
3640{
3641 for (set<pg_shard_t>::iterator p = probe.begin();
3642 p != probe.end();
3643 ++p) {
3644 int o = p->osd;
3645
3646 // did someone in the prior set go down?
3647 if (osdmap.is_down(o) && down.count(o) == 0) {
3648 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now down" << dendl;
3649 return true;
3650 }
3651
3652 // did a down osd in cur get (re)marked as lost?
3653 map<int, epoch_t>::const_iterator r = blocked_by.find(o);
3654 if (r != blocked_by.end()) {
3655 if (!osdmap.exists(o)) {
3656 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
3657 return true;
3658 }
3659 if (osdmap.get_info(o).lost_at != r->second) {
3660 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
3661 return true;
3662 }
3663 }
3664 }
3665
3666 // did someone in the prior down set go up?
3667 for (set<int>::const_iterator p = down.begin();
3668 p != down.end();
3669 ++p) {
3670 int o = *p;
3671
3672 if (osdmap.is_up(o)) {
3673 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now up" << dendl;
3674 return true;
3675 }
3676
3677 // did someone in the prior set get lost or destroyed?
3678 if (!osdmap.exists(o)) {
3679 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
3680 return true;
3681 }
3682 // did a down osd in down get (re)marked as lost?
3683 map<int, epoch_t>::const_iterator r = blocked_by.find(o);
3684 if (r != blocked_by.end()) {
3685 if (osdmap.get_info(o).lost_at != r->second) {
3686 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
3687 return true;
3688 }
3689 }
3690 }
3691
3692 return false;
3693}
3694
3695ostream& operator<<(ostream& out, const PastIntervals::pg_interval_t& i)
3696{
3697 out << "interval(" << i.first << "-" << i.last
3698 << " up " << i.up << "(" << i.up_primary << ")"
3699 << " acting " << i.acting << "(" << i.primary << ")";
3700 if (i.maybe_went_rw)
3701 out << " maybe_went_rw";
3702 out << ")";
3703 return out;
3704}
3705
3706
3707
3708// -- pg_query_t --
3709
3710void pg_query_t::encode(bufferlist &bl, uint64_t features) const {
3711 ENCODE_START(3, 3, bl);
3712 ::encode(type, bl);
3713 ::encode(since, bl);
3714 history.encode(bl);
3715 ::encode(epoch_sent, bl);
3716 ::encode(to, bl);
3717 ::encode(from, bl);
3718 ENCODE_FINISH(bl);
3719}
3720
3721void pg_query_t::decode(bufferlist::iterator &bl) {
3722 DECODE_START(3, bl);
3723 ::decode(type, bl);
3724 ::decode(since, bl);
3725 history.decode(bl);
3726 ::decode(epoch_sent, bl);
3727 ::decode(to, bl);
3728 ::decode(from, bl);
3729 DECODE_FINISH(bl);
3730}
3731
3732void pg_query_t::dump(Formatter *f) const
3733{
3734 f->dump_int("from", from);
3735 f->dump_int("to", to);
3736 f->dump_string("type", get_type_name());
3737 f->dump_stream("since") << since;
3738 f->dump_stream("epoch_sent") << epoch_sent;
3739 f->open_object_section("history");
3740 history.dump(f);
3741 f->close_section();
3742}
3743void pg_query_t::generate_test_instances(list<pg_query_t*>& o)
3744{
3745 o.push_back(new pg_query_t());
3746 list<pg_history_t*> h;
3747 pg_history_t::generate_test_instances(h);
3748 o.push_back(new pg_query_t(pg_query_t::INFO, shard_id_t(1), shard_id_t(2), *h.back(), 4));
3749 o.push_back(new pg_query_t(pg_query_t::MISSING, shard_id_t(2), shard_id_t(3), *h.back(), 4));
3750 o.push_back(new pg_query_t(pg_query_t::LOG, shard_id_t(0), shard_id_t(0),
3751 eversion_t(4, 5), *h.back(), 4));
3752 o.push_back(new pg_query_t(pg_query_t::FULLLOG,
3753 shard_id_t::NO_SHARD, shard_id_t::NO_SHARD,
3754 *h.back(), 5));
3755}
3756
3757// -- ObjectModDesc --
3758void ObjectModDesc::visit(Visitor *visitor) const
3759{
3760 bufferlist::iterator bp = bl.begin();
3761 try {
3762 while (!bp.end()) {
3763 DECODE_START(max_required_version, bp);
3764 uint8_t code;
3765 ::decode(code, bp);
3766 switch (code) {
3767 case APPEND: {
3768 uint64_t size;
3769 ::decode(size, bp);
3770 visitor->append(size);
3771 break;
3772 }
3773 case SETATTRS: {
3774 map<string, boost::optional<bufferlist> > attrs;
3775 ::decode(attrs, bp);
3776 visitor->setattrs(attrs);
3777 break;
3778 }
3779 case DELETE: {
3780 version_t old_version;
3781 ::decode(old_version, bp);
3782 visitor->rmobject(old_version);
3783 break;
3784 }
3785 case CREATE: {
3786 visitor->create();
3787 break;
3788 }
3789 case UPDATE_SNAPS: {
3790 set<snapid_t> snaps;
3791 ::decode(snaps, bp);
3792 visitor->update_snaps(snaps);
3793 break;
3794 }
3795 case TRY_DELETE: {
3796 version_t old_version;
3797 ::decode(old_version, bp);
3798 visitor->try_rmobject(old_version);
3799 break;
3800 }
3801 case ROLLBACK_EXTENTS: {
3802 vector<pair<uint64_t, uint64_t> > extents;
3803 version_t gen;
3804 ::decode(gen, bp);
3805 ::decode(extents, bp);
3806 visitor->rollback_extents(gen,extents);
3807 break;
3808 }
3809 default:
3810 assert(0 == "Invalid rollback code");
3811 }
3812 DECODE_FINISH(bp);
3813 }
3814 } catch (...) {
3815 assert(0 == "Invalid encoding");
3816 }
3817}
3818
3819struct DumpVisitor : public ObjectModDesc::Visitor {
3820 Formatter *f;
3821 explicit DumpVisitor(Formatter *f) : f(f) {}
3822 void append(uint64_t old_size) override {
3823 f->open_object_section("op");
3824 f->dump_string("code", "APPEND");
3825 f->dump_unsigned("old_size", old_size);
3826 f->close_section();
3827 }
3828 void setattrs(map<string, boost::optional<bufferlist> > &attrs) override {
3829 f->open_object_section("op");
3830 f->dump_string("code", "SETATTRS");
3831 f->open_array_section("attrs");
3832 for (map<string, boost::optional<bufferlist> >::iterator i = attrs.begin();
3833 i != attrs.end();
3834 ++i) {
3835 f->dump_string("attr_name", i->first);
3836 }
3837 f->close_section();
3838 f->close_section();
3839 }
3840 void rmobject(version_t old_version) override {
3841 f->open_object_section("op");
3842 f->dump_string("code", "RMOBJECT");
3843 f->dump_unsigned("old_version", old_version);
3844 f->close_section();
3845 }
3846 void try_rmobject(version_t old_version) override {
3847 f->open_object_section("op");
3848 f->dump_string("code", "TRY_RMOBJECT");
3849 f->dump_unsigned("old_version", old_version);
3850 f->close_section();
3851 }
3852 void create() override {
3853 f->open_object_section("op");
3854 f->dump_string("code", "CREATE");
3855 f->close_section();
3856 }
3857 void update_snaps(const set<snapid_t> &snaps) override {
3858 f->open_object_section("op");
3859 f->dump_string("code", "UPDATE_SNAPS");
3860 f->dump_stream("snaps") << snaps;
3861 f->close_section();
3862 }
3863 void rollback_extents(
3864 version_t gen,
3865 const vector<pair<uint64_t, uint64_t> > &extents) override {
3866 f->open_object_section("op");
3867 f->dump_string("code", "ROLLBACK_EXTENTS");
3868 f->dump_unsigned("gen", gen);
3869 f->dump_stream("snaps") << extents;
3870 f->close_section();
3871 }
3872};
3873
3874void ObjectModDesc::dump(Formatter *f) const
3875{
3876 f->open_object_section("object_mod_desc");
3877 f->dump_bool("can_local_rollback", can_local_rollback);
3878 f->dump_bool("rollback_info_completed", rollback_info_completed);
3879 {
3880 f->open_array_section("ops");
3881 DumpVisitor vis(f);
3882 visit(&vis);
3883 f->close_section();
3884 }
3885 f->close_section();
3886}
3887
3888void ObjectModDesc::generate_test_instances(list<ObjectModDesc*>& o)
3889{
3890 map<string, boost::optional<bufferlist> > attrs;
3891 attrs[OI_ATTR];
3892 attrs[SS_ATTR];
3893 attrs["asdf"];
3894 o.push_back(new ObjectModDesc());
3895 o.back()->append(100);
3896 o.back()->setattrs(attrs);
3897 o.push_back(new ObjectModDesc());
3898 o.back()->rmobject(1001);
3899 o.push_back(new ObjectModDesc());
3900 o.back()->create();
3901 o.back()->setattrs(attrs);
3902 o.push_back(new ObjectModDesc());
3903 o.back()->create();
3904 o.back()->setattrs(attrs);
3905 o.back()->mark_unrollbackable();
3906 o.back()->append(1000);
3907}
3908
3909void ObjectModDesc::encode(bufferlist &_bl) const
3910{
3911 ENCODE_START(max_required_version, max_required_version, _bl);
3912 ::encode(can_local_rollback, _bl);
3913 ::encode(rollback_info_completed, _bl);
3914 ::encode(bl, _bl);
3915 ENCODE_FINISH(_bl);
3916}
3917void ObjectModDesc::decode(bufferlist::iterator &_bl)
3918{
3919 DECODE_START(2, _bl);
3920 max_required_version = struct_v;
3921 ::decode(can_local_rollback, _bl);
3922 ::decode(rollback_info_completed, _bl);
3923 ::decode(bl, _bl);
3924 // ensure bl does not pin a larger buffer in memory
3925 bl.rebuild();
31f18b77 3926 bl.reassign_to_mempool(mempool::mempool_osd_pglog);
7c673cae
FG
3927 DECODE_FINISH(_bl);
3928}
3929
3930// -- pg_log_entry_t --
3931
3932string pg_log_entry_t::get_key_name() const
3933{
3934 return version.get_key_name();
3935}
3936
3937void pg_log_entry_t::encode_with_checksum(bufferlist& bl) const
3938{
3939 bufferlist ebl(sizeof(*this)*2);
3940 encode(ebl);
3941 __u32 crc = ebl.crc32c(0);
3942 ::encode(ebl, bl);
3943 ::encode(crc, bl);
3944}
3945
3946void pg_log_entry_t::decode_with_checksum(bufferlist::iterator& p)
3947{
3948 bufferlist bl;
3949 ::decode(bl, p);
3950 __u32 crc;
3951 ::decode(crc, p);
3952 if (crc != bl.crc32c(0))
3953 throw buffer::malformed_input("bad checksum on pg_log_entry_t");
3954 bufferlist::iterator q = bl.begin();
3955 decode(q);
3956}
3957
3958void pg_log_entry_t::encode(bufferlist &bl) const
3959{
3960 ENCODE_START(11, 4, bl);
3961 ::encode(op, bl);
3962 ::encode(soid, bl);
3963 ::encode(version, bl);
3964
3965 /**
3966 * Added with reverting_to:
3967 * Previous code used prior_version to encode
3968 * what we now call reverting_to. This will
3969 * allow older code to decode reverting_to
3970 * into prior_version as expected.
3971 */
3972 if (op == LOST_REVERT)
3973 ::encode(reverting_to, bl);
3974 else
3975 ::encode(prior_version, bl);
3976
3977 ::encode(reqid, bl);
3978 ::encode(mtime, bl);
3979 if (op == LOST_REVERT)
3980 ::encode(prior_version, bl);
3981 ::encode(snaps, bl);
3982 ::encode(user_version, bl);
3983 ::encode(mod_desc, bl);
3984 ::encode(extra_reqids, bl);
3985 if (op == ERROR)
3986 ::encode(return_code, bl);
3987 ENCODE_FINISH(bl);
3988}
3989
3990void pg_log_entry_t::decode(bufferlist::iterator &bl)
3991{
3992 DECODE_START_LEGACY_COMPAT_LEN(11, 4, 4, bl);
3993 ::decode(op, bl);
3994 if (struct_v < 2) {
3995 sobject_t old_soid;
3996 ::decode(old_soid, bl);
3997 soid.oid = old_soid.oid;
3998 soid.snap = old_soid.snap;
3999 invalid_hash = true;
4000 } else {
4001 ::decode(soid, bl);
4002 }
4003 if (struct_v < 3)
4004 invalid_hash = true;
4005 ::decode(version, bl);
4006
4007 if (struct_v >= 6 && op == LOST_REVERT)
4008 ::decode(reverting_to, bl);
4009 else
4010 ::decode(prior_version, bl);
4011
4012 ::decode(reqid, bl);
4013
4014 ::decode(mtime, bl);
4015 if (struct_v < 5)
4016 invalid_pool = true;
4017
4018 if (op == LOST_REVERT) {
4019 if (struct_v >= 6) {
4020 ::decode(prior_version, bl);
4021 } else {
4022 reverting_to = prior_version;
4023 }
4024 }
4025 if (struct_v >= 7 || // for v >= 7, this is for all ops.
4026 op == CLONE) { // for v < 7, it's only present for CLONE.
4027 ::decode(snaps, bl);
4028 // ensure snaps does not pin a larger buffer in memory
4029 snaps.rebuild();
31f18b77 4030 snaps.reassign_to_mempool(mempool::mempool_osd_pglog);
7c673cae
FG
4031 }
4032
4033 if (struct_v >= 8)
4034 ::decode(user_version, bl);
4035 else
4036 user_version = version.version;
4037
4038 if (struct_v >= 9)
4039 ::decode(mod_desc, bl);
4040 else
4041 mod_desc.mark_unrollbackable();
4042 if (struct_v >= 10)
4043 ::decode(extra_reqids, bl);
4044 if (struct_v >= 11 && op == ERROR)
4045 ::decode(return_code, bl);
4046 DECODE_FINISH(bl);
4047}
4048
4049void pg_log_entry_t::dump(Formatter *f) const
4050{
4051 f->dump_string("op", get_op_name());
4052 f->dump_stream("object") << soid;
4053 f->dump_stream("version") << version;
4054 f->dump_stream("prior_version") << prior_version;
4055 f->dump_stream("reqid") << reqid;
4056 f->open_array_section("extra_reqids");
31f18b77 4057 for (auto p = extra_reqids.begin();
7c673cae
FG
4058 p != extra_reqids.end();
4059 ++p) {
4060 f->open_object_section("extra_reqid");
4061 f->dump_stream("reqid") << p->first;
4062 f->dump_stream("user_version") << p->second;
4063 f->close_section();
4064 }
4065 f->close_section();
4066 f->dump_stream("mtime") << mtime;
4067 f->dump_int("return_code", return_code);
4068 if (snaps.length() > 0) {
4069 vector<snapid_t> v;
4070 bufferlist c = snaps;
4071 bufferlist::iterator p = c.begin();
4072 try {
4073 ::decode(v, p);
4074 } catch (...) {
4075 v.clear();
4076 }
4077 f->open_object_section("snaps");
4078 for (vector<snapid_t>::iterator p = v.begin(); p != v.end(); ++p)
4079 f->dump_unsigned("snap", *p);
4080 f->close_section();
4081 }
4082 {
4083 f->open_object_section("mod_desc");
4084 mod_desc.dump(f);
4085 f->close_section();
4086 }
4087}
4088
4089void pg_log_entry_t::generate_test_instances(list<pg_log_entry_t*>& o)
4090{
4091 o.push_back(new pg_log_entry_t());
4092 hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
4093 o.push_back(new pg_log_entry_t(MODIFY, oid, eversion_t(1,2), eversion_t(3,4),
4094 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4095 utime_t(8,9), 0));
4096 o.push_back(new pg_log_entry_t(ERROR, oid, eversion_t(1,2), eversion_t(3,4),
4097 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4098 utime_t(8,9), -ENOENT));
4099}
4100
4101ostream& operator<<(ostream& out, const pg_log_entry_t& e)
4102{
4103 out << e.version << " (" << e.prior_version << ") "
4104 << std::left << std::setw(8) << e.get_op_name() << ' '
4105 << e.soid << " by " << e.reqid << " " << e.mtime
4106 << " " << e.return_code;
4107 if (e.snaps.length()) {
4108 vector<snapid_t> snaps;
4109 bufferlist c = e.snaps;
4110 bufferlist::iterator p = c.begin();
4111 try {
4112 ::decode(snaps, p);
4113 } catch (...) {
4114 snaps.clear();
4115 }
4116 out << " snaps " << snaps;
4117 }
4118 return out;
4119}
4120
c07f9fc5
FG
4121// -- pg_log_dup_t --
4122
4123string pg_log_dup_t::get_key_name() const
4124{
4125 return "dup_" + version.get_key_name();
4126}
4127
4128void pg_log_dup_t::encode(bufferlist &bl) const
4129{
4130 ENCODE_START(1, 1, bl);
4131 ::encode(reqid, bl);
4132 ::encode(version, bl);
4133 ::encode(user_version, bl);
4134 ::encode(return_code, bl);
4135 ENCODE_FINISH(bl);
4136}
4137
4138void pg_log_dup_t::decode(bufferlist::iterator &bl)
4139{
4140 DECODE_START(1, bl);
4141 ::decode(reqid, bl);
4142 ::decode(version, bl);
4143 ::decode(user_version, bl);
4144 ::decode(return_code, bl);
4145 DECODE_FINISH(bl);
4146}
4147
4148void pg_log_dup_t::dump(Formatter *f) const
4149{
4150 f->dump_stream("reqid") << reqid;
4151 f->dump_stream("version") << version;
4152 f->dump_stream("user_version") << user_version;
4153 f->dump_stream("return_code") << return_code;
4154}
4155
4156void pg_log_dup_t::generate_test_instances(list<pg_log_dup_t*>& o)
4157{
4158 o.push_back(new pg_log_dup_t());
4159 o.push_back(new pg_log_dup_t(eversion_t(1,2),
4160 1,
4161 osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4162 0));
4163 o.push_back(new pg_log_dup_t(eversion_t(1,2),
4164 2,
4165 osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4166 -ENOENT));
4167}
4168
4169
4170std::ostream& operator<<(std::ostream& out, const pg_log_dup_t& e) {
4171 return out << "log_dup(reqid=" << e.reqid <<
4172 " v=" << e.version << " uv=" << e.user_version <<
4173 " rc=" << e.return_code << ")";
4174}
4175
7c673cae
FG
4176
4177// -- pg_log_t --
4178
4179// out: pg_log_t that only has entries that apply to import_pgid using curmap
4180// reject: Entries rejected from "in" are in the reject.log. Other fields not set.
4181void pg_log_t::filter_log(spg_t import_pgid, const OSDMap &curmap,
4182 const string &hit_set_namespace, const pg_log_t &in,
4183 pg_log_t &out, pg_log_t &reject)
4184{
4185 out = in;
4186 out.log.clear();
4187 reject.log.clear();
4188
4189 for (list<pg_log_entry_t>::const_iterator i = in.log.begin();
4190 i != in.log.end(); ++i) {
4191
4192 // Reject pg log entries for temporary objects
4193 if (i->soid.is_temp()) {
4194 reject.log.push_back(*i);
4195 continue;
4196 }
4197
4198 if (i->soid.nspace != hit_set_namespace) {
4199 object_t oid = i->soid.oid;
4200 object_locator_t loc(i->soid);
4201 pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
4202 pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
4203
4204 if (import_pgid.pgid == pgid) {
4205 out.log.push_back(*i);
4206 } else {
4207 reject.log.push_back(*i);
4208 }
4209 } else {
4210 out.log.push_back(*i);
4211 }
4212 }
4213}
4214
4215void pg_log_t::encode(bufferlist& bl) const
4216{
c07f9fc5 4217 ENCODE_START(7, 3, bl);
7c673cae
FG
4218 ::encode(head, bl);
4219 ::encode(tail, bl);
4220 ::encode(log, bl);
4221 ::encode(can_rollback_to, bl);
4222 ::encode(rollback_info_trimmed_to, bl);
c07f9fc5 4223 ::encode(dups, bl);
7c673cae
FG
4224 ENCODE_FINISH(bl);
4225}
4226
4227void pg_log_t::decode(bufferlist::iterator &bl, int64_t pool)
4228{
c07f9fc5 4229 DECODE_START_LEGACY_COMPAT_LEN(7, 3, 3, bl);
7c673cae
FG
4230 ::decode(head, bl);
4231 ::decode(tail, bl);
4232 if (struct_v < 2) {
4233 bool backlog;
4234 ::decode(backlog, bl);
4235 }
4236 ::decode(log, bl);
4237 if (struct_v >= 5)
4238 ::decode(can_rollback_to, bl);
4239
4240 if (struct_v >= 6)
4241 ::decode(rollback_info_trimmed_to, bl);
4242 else
4243 rollback_info_trimmed_to = tail;
c07f9fc5
FG
4244
4245 if (struct_v >= 7)
4246 ::decode(dups, bl);
4247
7c673cae
FG
4248 DECODE_FINISH(bl);
4249
4250 // handle hobject_t format change
4251 if (struct_v < 4) {
4252 for (list<pg_log_entry_t>::iterator i = log.begin();
4253 i != log.end();
4254 ++i) {
4255 if (!i->soid.is_max() && i->soid.pool == -1)
4256 i->soid.pool = pool;
4257 }
4258 }
4259}
4260
4261void pg_log_t::dump(Formatter *f) const
4262{
4263 f->dump_stream("head") << head;
4264 f->dump_stream("tail") << tail;
4265 f->open_array_section("log");
4266 for (list<pg_log_entry_t>::const_iterator p = log.begin(); p != log.end(); ++p) {
4267 f->open_object_section("entry");
4268 p->dump(f);
4269 f->close_section();
4270 }
4271 f->close_section();
c07f9fc5
FG
4272 f->open_array_section("dups");
4273 for (const auto& entry : dups) {
4274 f->open_object_section("entry");
4275 entry.dump(f);
4276 f->close_section();
4277 }
4278 f->close_section();
7c673cae
FG
4279}
4280
4281void pg_log_t::generate_test_instances(list<pg_log_t*>& o)
4282{
4283 o.push_back(new pg_log_t);
4284
4285 // this is nonsensical:
4286 o.push_back(new pg_log_t);
4287 o.back()->head = eversion_t(1,2);
4288 o.back()->tail = eversion_t(3,4);
4289 list<pg_log_entry_t*> e;
4290 pg_log_entry_t::generate_test_instances(e);
4291 for (list<pg_log_entry_t*>::iterator p = e.begin(); p != e.end(); ++p)
4292 o.back()->log.push_back(**p);
4293}
4294
4295void pg_log_t::copy_after(const pg_log_t &other, eversion_t v)
4296{
4297 can_rollback_to = other.can_rollback_to;
4298 head = other.head;
4299 tail = other.tail;
4300 for (list<pg_log_entry_t>::const_reverse_iterator i = other.log.rbegin();
4301 i != other.log.rend();
4302 ++i) {
4303 assert(i->version > other.tail);
4304 if (i->version <= v) {
4305 // make tail accurate.
4306 tail = i->version;
4307 break;
4308 }
4309 log.push_front(*i);
4310 }
4311}
4312
4313void pg_log_t::copy_range(const pg_log_t &other, eversion_t from, eversion_t to)
4314{
4315 can_rollback_to = other.can_rollback_to;
4316 list<pg_log_entry_t>::const_reverse_iterator i = other.log.rbegin();
4317 assert(i != other.log.rend());
4318 while (i->version > to) {
4319 ++i;
4320 assert(i != other.log.rend());
4321 }
4322 assert(i->version == to);
4323 head = to;
4324 for ( ; i != other.log.rend(); ++i) {
4325 if (i->version <= from) {
4326 tail = i->version;
4327 break;
4328 }
4329 log.push_front(*i);
4330 }
4331}
4332
4333void pg_log_t::copy_up_to(const pg_log_t &other, int max)
4334{
4335 can_rollback_to = other.can_rollback_to;
4336 int n = 0;
4337 head = other.head;
4338 tail = other.tail;
4339 for (list<pg_log_entry_t>::const_reverse_iterator i = other.log.rbegin();
4340 i != other.log.rend();
4341 ++i) {
4342 if (n++ >= max) {
4343 tail = i->version;
4344 break;
4345 }
4346 log.push_front(*i);
4347 }
4348}
4349
c07f9fc5 4350ostream& pg_log_t::print(ostream& out) const
7c673cae
FG
4351{
4352 out << *this << std::endl;
4353 for (list<pg_log_entry_t>::const_iterator p = log.begin();
4354 p != log.end();
c07f9fc5 4355 ++p)
7c673cae 4356 out << *p << std::endl;
c07f9fc5
FG
4357 for (const auto& entry : dups) {
4358 out << " dup entry: " << entry << std::endl;
4359 }
7c673cae
FG
4360 return out;
4361}
4362
4363// -- pg_missing_t --
4364
4365ostream& operator<<(ostream& out, const pg_missing_item& i)
4366{
4367 out << i.need;
4368 if (i.have != eversion_t())
4369 out << "(" << i.have << ")";
c07f9fc5 4370 out << " flags = " << i.flag_str();
7c673cae
FG
4371 return out;
4372}
4373
4374// -- object_copy_cursor_t --
4375
4376void object_copy_cursor_t::encode(bufferlist& bl) const
4377{
4378 ENCODE_START(1, 1, bl);
4379 ::encode(attr_complete, bl);
4380 ::encode(data_offset, bl);
4381 ::encode(data_complete, bl);
4382 ::encode(omap_offset, bl);
4383 ::encode(omap_complete, bl);
4384 ENCODE_FINISH(bl);
4385}
4386
4387void object_copy_cursor_t::decode(bufferlist::iterator &bl)
4388{
4389 DECODE_START(1, bl);
4390 ::decode(attr_complete, bl);
4391 ::decode(data_offset, bl);
4392 ::decode(data_complete, bl);
4393 ::decode(omap_offset, bl);
4394 ::decode(omap_complete, bl);
4395 DECODE_FINISH(bl);
4396}
4397
4398void object_copy_cursor_t::dump(Formatter *f) const
4399{
4400 f->dump_unsigned("attr_complete", (int)attr_complete);
4401 f->dump_unsigned("data_offset", data_offset);
4402 f->dump_unsigned("data_complete", (int)data_complete);
4403 f->dump_string("omap_offset", omap_offset);
4404 f->dump_unsigned("omap_complete", (int)omap_complete);
4405}
4406
4407void object_copy_cursor_t::generate_test_instances(list<object_copy_cursor_t*>& o)
4408{
4409 o.push_back(new object_copy_cursor_t);
4410 o.push_back(new object_copy_cursor_t);
4411 o.back()->attr_complete = true;
4412 o.back()->data_offset = 123;
4413 o.push_back(new object_copy_cursor_t);
4414 o.back()->attr_complete = true;
4415 o.back()->data_complete = true;
4416 o.back()->omap_offset = "foo";
4417 o.push_back(new object_copy_cursor_t);
4418 o.back()->attr_complete = true;
4419 o.back()->data_complete = true;
4420 o.back()->omap_complete = true;
4421}
4422
4423// -- object_copy_data_t --
4424
4425void object_copy_data_t::encode(bufferlist& bl, uint64_t features) const
4426{
4427 ENCODE_START(7, 5, bl);
4428 ::encode(size, bl);
4429 ::encode(mtime, bl);
4430 ::encode(attrs, bl);
4431 ::encode(data, bl);
4432 ::encode(omap_data, bl);
4433 ::encode(cursor, bl);
4434 ::encode(omap_header, bl);
4435 ::encode(snaps, bl);
4436 ::encode(snap_seq, bl);
4437 ::encode(flags, bl);
4438 ::encode(data_digest, bl);
4439 ::encode(omap_digest, bl);
4440 ::encode(reqids, bl);
4441 ::encode(truncate_seq, bl);
4442 ::encode(truncate_size, bl);
4443 ENCODE_FINISH(bl);
4444}
4445
4446void object_copy_data_t::decode(bufferlist::iterator& bl)
4447{
4448 DECODE_START(7, bl);
4449 if (struct_v < 5) {
4450 // old
4451 ::decode(size, bl);
4452 ::decode(mtime, bl);
4453 {
4454 string category;
4455 ::decode(category, bl); // no longer used
4456 }
4457 ::decode(attrs, bl);
4458 ::decode(data, bl);
4459 {
4460 map<string,bufferlist> omap;
4461 ::decode(omap, bl);
4462 omap_data.clear();
4463 if (!omap.empty())
4464 ::encode(omap, omap_data);
4465 }
4466 ::decode(cursor, bl);
4467 if (struct_v >= 2)
4468 ::decode(omap_header, bl);
4469 if (struct_v >= 3) {
4470 ::decode(snaps, bl);
4471 ::decode(snap_seq, bl);
4472 } else {
4473 snaps.clear();
4474 snap_seq = 0;
4475 }
4476 if (struct_v >= 4) {
4477 ::decode(flags, bl);
4478 ::decode(data_digest, bl);
4479 ::decode(omap_digest, bl);
4480 }
4481 } else {
4482 // current
4483 ::decode(size, bl);
4484 ::decode(mtime, bl);
4485 ::decode(attrs, bl);
4486 ::decode(data, bl);
4487 ::decode(omap_data, bl);
4488 ::decode(cursor, bl);
4489 ::decode(omap_header, bl);
4490 ::decode(snaps, bl);
4491 ::decode(snap_seq, bl);
4492 if (struct_v >= 4) {
4493 ::decode(flags, bl);
4494 ::decode(data_digest, bl);
4495 ::decode(omap_digest, bl);
4496 }
4497 if (struct_v >= 6) {
4498 ::decode(reqids, bl);
4499 }
4500 if (struct_v >= 7) {
4501 ::decode(truncate_seq, bl);
4502 ::decode(truncate_size, bl);
4503 }
4504 }
4505 DECODE_FINISH(bl);
4506}
4507
4508void object_copy_data_t::generate_test_instances(list<object_copy_data_t*>& o)
4509{
4510 o.push_back(new object_copy_data_t());
4511
4512 list<object_copy_cursor_t*> cursors;
4513 object_copy_cursor_t::generate_test_instances(cursors);
4514 list<object_copy_cursor_t*>::iterator ci = cursors.begin();
4515 o.back()->cursor = **(ci++);
4516
4517 o.push_back(new object_copy_data_t());
4518 o.back()->cursor = **(ci++);
4519
4520 o.push_back(new object_copy_data_t());
4521 o.back()->size = 1234;
4522 o.back()->mtime.set_from_double(1234);
4523 bufferptr bp("there", 5);
4524 bufferlist bl;
4525 bl.push_back(bp);
4526 o.back()->attrs["hello"] = bl;
4527 bufferptr bp2("not", 3);
4528 bufferlist bl2;
4529 bl2.push_back(bp2);
4530 map<string,bufferlist> omap;
4531 omap["why"] = bl2;
4532 ::encode(omap, o.back()->omap_data);
4533 bufferptr databp("iamsomedatatocontain", 20);
4534 o.back()->data.push_back(databp);
4535 o.back()->omap_header.append("this is an omap header");
4536 o.back()->snaps.push_back(123);
4537 o.back()->reqids.push_back(make_pair(osd_reqid_t(), version_t()));
4538}
4539
4540void object_copy_data_t::dump(Formatter *f) const
4541{
4542 f->open_object_section("cursor");
4543 cursor.dump(f);
4544 f->close_section(); // cursor
4545 f->dump_int("size", size);
4546 f->dump_stream("mtime") << mtime;
4547 /* we should really print out the attrs here, but bufferlist
4548 const-correctness prevents that */
4549 f->dump_int("attrs_size", attrs.size());
4550 f->dump_int("flags", flags);
4551 f->dump_unsigned("data_digest", data_digest);
4552 f->dump_unsigned("omap_digest", omap_digest);
4553 f->dump_int("omap_data_length", omap_data.length());
4554 f->dump_int("omap_header_length", omap_header.length());
4555 f->dump_int("data_length", data.length());
4556 f->open_array_section("snaps");
4557 for (vector<snapid_t>::const_iterator p = snaps.begin();
4558 p != snaps.end(); ++p)
4559 f->dump_unsigned("snap", *p);
4560 f->close_section();
4561 f->open_array_section("reqids");
31f18b77 4562 for (auto p = reqids.begin();
7c673cae
FG
4563 p != reqids.end();
4564 ++p) {
4565 f->open_object_section("extra_reqid");
4566 f->dump_stream("reqid") << p->first;
4567 f->dump_stream("user_version") << p->second;
4568 f->close_section();
4569 }
4570 f->close_section();
4571}
4572
4573// -- pg_create_t --
4574
4575void pg_create_t::encode(bufferlist &bl) const
4576{
4577 ENCODE_START(1, 1, bl);
4578 ::encode(created, bl);
4579 ::encode(parent, bl);
4580 ::encode(split_bits, bl);
4581 ENCODE_FINISH(bl);
4582}
4583
4584void pg_create_t::decode(bufferlist::iterator &bl)
4585{
4586 DECODE_START(1, bl);
4587 ::decode(created, bl);
4588 ::decode(parent, bl);
4589 ::decode(split_bits, bl);
4590 DECODE_FINISH(bl);
4591}
4592
4593void pg_create_t::dump(Formatter *f) const
4594{
4595 f->dump_unsigned("created", created);
4596 f->dump_stream("parent") << parent;
4597 f->dump_int("split_bits", split_bits);
4598}
4599
4600void pg_create_t::generate_test_instances(list<pg_create_t*>& o)
4601{
4602 o.push_back(new pg_create_t);
4603 o.push_back(new pg_create_t(1, pg_t(3, 4, -1), 2));
4604}
4605
4606
4607// -- pg_hit_set_info_t --
4608
4609void pg_hit_set_info_t::encode(bufferlist& bl) const
4610{
4611 ENCODE_START(2, 1, bl);
4612 ::encode(begin, bl);
4613 ::encode(end, bl);
4614 ::encode(version, bl);
4615 ::encode(using_gmt, bl);
4616 ENCODE_FINISH(bl);
4617}
4618
4619void pg_hit_set_info_t::decode(bufferlist::iterator& p)
4620{
4621 DECODE_START(2, p);
4622 ::decode(begin, p);
4623 ::decode(end, p);
4624 ::decode(version, p);
4625 if (struct_v >= 2) {
4626 ::decode(using_gmt, p);
4627 } else {
4628 using_gmt = false;
4629 }
4630 DECODE_FINISH(p);
4631}
4632
4633void pg_hit_set_info_t::dump(Formatter *f) const
4634{
4635 f->dump_stream("begin") << begin;
4636 f->dump_stream("end") << end;
4637 f->dump_stream("version") << version;
4638 f->dump_stream("using_gmt") << using_gmt;
4639}
4640
4641void pg_hit_set_info_t::generate_test_instances(list<pg_hit_set_info_t*>& ls)
4642{
4643 ls.push_back(new pg_hit_set_info_t);
4644 ls.push_back(new pg_hit_set_info_t);
4645 ls.back()->begin = utime_t(1, 2);
4646 ls.back()->end = utime_t(3, 4);
4647}
4648
4649
4650// -- pg_hit_set_history_t --
4651
4652void pg_hit_set_history_t::encode(bufferlist& bl) const
4653{
4654 ENCODE_START(1, 1, bl);
4655 ::encode(current_last_update, bl);
4656 {
4657 utime_t dummy_stamp;
4658 ::encode(dummy_stamp, bl);
4659 }
4660 {
4661 pg_hit_set_info_t dummy_info;
4662 ::encode(dummy_info, bl);
4663 }
4664 ::encode(history, bl);
4665 ENCODE_FINISH(bl);
4666}
4667
4668void pg_hit_set_history_t::decode(bufferlist::iterator& p)
4669{
4670 DECODE_START(1, p);
4671 ::decode(current_last_update, p);
4672 {
4673 utime_t dummy_stamp;
4674 ::decode(dummy_stamp, p);
4675 }
4676 {
4677 pg_hit_set_info_t dummy_info;
4678 ::decode(dummy_info, p);
4679 }
4680 ::decode(history, p);
4681 DECODE_FINISH(p);
4682}
4683
4684void pg_hit_set_history_t::dump(Formatter *f) const
4685{
4686 f->dump_stream("current_last_update") << current_last_update;
4687 f->open_array_section("history");
4688 for (list<pg_hit_set_info_t>::const_iterator p = history.begin();
4689 p != history.end(); ++p) {
4690 f->open_object_section("info");
4691 p->dump(f);
4692 f->close_section();
4693 }
4694 f->close_section();
4695}
4696
4697void pg_hit_set_history_t::generate_test_instances(list<pg_hit_set_history_t*>& ls)
4698{
4699 ls.push_back(new pg_hit_set_history_t);
4700 ls.push_back(new pg_hit_set_history_t);
4701 ls.back()->current_last_update = eversion_t(1, 2);
4702 ls.back()->history.push_back(pg_hit_set_info_t());
4703}
4704
4705// -- osd_peer_stat_t --
4706
4707void osd_peer_stat_t::encode(bufferlist& bl) const
4708{
4709 ENCODE_START(1, 1, bl);
4710 ::encode(stamp, bl);
4711 ENCODE_FINISH(bl);
4712}
4713
4714void osd_peer_stat_t::decode(bufferlist::iterator& bl)
4715{
4716 DECODE_START(1, bl);
4717 ::decode(stamp, bl);
4718 DECODE_FINISH(bl);
4719}
4720
4721void osd_peer_stat_t::dump(Formatter *f) const
4722{
4723 f->dump_stream("stamp") << stamp;
4724}
4725
4726void osd_peer_stat_t::generate_test_instances(list<osd_peer_stat_t*>& o)
4727{
4728 o.push_back(new osd_peer_stat_t);
4729 o.push_back(new osd_peer_stat_t);
4730 o.back()->stamp = utime_t(1, 2);
4731}
4732
4733ostream& operator<<(ostream& out, const osd_peer_stat_t &stat)
4734{
4735 return out << "stat(" << stat.stamp << ")";
4736}
4737
4738
4739// -- OSDSuperblock --
4740
4741void OSDSuperblock::encode(bufferlist &bl) const
4742{
4743 ENCODE_START(8, 5, bl);
4744 ::encode(cluster_fsid, bl);
4745 ::encode(whoami, bl);
4746 ::encode(current_epoch, bl);
4747 ::encode(oldest_map, bl);
4748 ::encode(newest_map, bl);
4749 ::encode(weight, bl);
4750 compat_features.encode(bl);
4751 ::encode(clean_thru, bl);
4752 ::encode(mounted, bl);
4753 ::encode(osd_fsid, bl);
4754 ::encode((epoch_t)0, bl); // epoch_t last_epoch_marked_full
4755 ::encode((uint32_t)0, bl); // map<int64_t,epoch_t> pool_last_epoch_marked_full
4756 ENCODE_FINISH(bl);
4757}
4758
4759void OSDSuperblock::decode(bufferlist::iterator &bl)
4760{
4761 DECODE_START_LEGACY_COMPAT_LEN(8, 5, 5, bl);
4762 if (struct_v < 3) {
4763 string magic;
4764 ::decode(magic, bl);
4765 }
4766 ::decode(cluster_fsid, bl);
4767 ::decode(whoami, bl);
4768 ::decode(current_epoch, bl);
4769 ::decode(oldest_map, bl);
4770 ::decode(newest_map, bl);
4771 ::decode(weight, bl);
4772 if (struct_v >= 2) {
4773 compat_features.decode(bl);
4774 } else { //upgrade it!
4775 compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
4776 }
4777 ::decode(clean_thru, bl);
4778 ::decode(mounted, bl);
4779 if (struct_v >= 4)
4780 ::decode(osd_fsid, bl);
4781 if (struct_v >= 6) {
4782 epoch_t last_map_marked_full;
4783 ::decode(last_map_marked_full, bl);
4784 }
4785 if (struct_v >= 7) {
4786 map<int64_t,epoch_t> pool_last_map_marked_full;
4787 ::decode(pool_last_map_marked_full, bl);
4788 }
4789 DECODE_FINISH(bl);
4790}
4791
4792void OSDSuperblock::dump(Formatter *f) const
4793{
4794 f->dump_stream("cluster_fsid") << cluster_fsid;
4795 f->dump_stream("osd_fsid") << osd_fsid;
4796 f->dump_int("whoami", whoami);
4797 f->dump_int("current_epoch", current_epoch);
4798 f->dump_int("oldest_map", oldest_map);
4799 f->dump_int("newest_map", newest_map);
4800 f->dump_float("weight", weight);
4801 f->open_object_section("compat");
4802 compat_features.dump(f);
4803 f->close_section();
4804 f->dump_int("clean_thru", clean_thru);
4805 f->dump_int("last_epoch_mounted", mounted);
4806}
4807
4808void OSDSuperblock::generate_test_instances(list<OSDSuperblock*>& o)
4809{
4810 OSDSuperblock z;
4811 o.push_back(new OSDSuperblock(z));
4812 memset(&z.cluster_fsid, 1, sizeof(z.cluster_fsid));
4813 memset(&z.osd_fsid, 2, sizeof(z.osd_fsid));
4814 z.whoami = 3;
4815 z.current_epoch = 4;
4816 z.oldest_map = 5;
4817 z.newest_map = 9;
4818 z.mounted = 8;
4819 z.clean_thru = 7;
4820 o.push_back(new OSDSuperblock(z));
4821 o.push_back(new OSDSuperblock(z));
4822}
4823
4824// -- SnapSet --
4825
4826void SnapSet::encode(bufferlist& bl) const
4827{
4828 ENCODE_START(3, 2, bl);
4829 ::encode(seq, bl);
4830 ::encode(head_exists, bl);
4831 ::encode(snaps, bl);
4832 ::encode(clones, bl);
4833 ::encode(clone_overlap, bl);
4834 ::encode(clone_size, bl);
4835 ::encode(clone_snaps, bl);
4836 ENCODE_FINISH(bl);
4837}
4838
4839void SnapSet::decode(bufferlist::iterator& bl)
4840{
4841 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
4842 ::decode(seq, bl);
4843 ::decode(head_exists, bl);
4844 ::decode(snaps, bl);
4845 ::decode(clones, bl);
4846 ::decode(clone_overlap, bl);
4847 ::decode(clone_size, bl);
4848 if (struct_v >= 3) {
4849 ::decode(clone_snaps, bl);
4850 } else {
4851 clone_snaps.clear();
4852 }
4853 DECODE_FINISH(bl);
4854}
4855
4856void SnapSet::dump(Formatter *f) const
4857{
4858 SnapContext sc(seq, snaps);
4859 f->open_object_section("snap_context");
4860 sc.dump(f);
4861 f->close_section();
4862 f->dump_int("head_exists", head_exists);
4863 f->open_array_section("clones");
4864 for (vector<snapid_t>::const_iterator p = clones.begin(); p != clones.end(); ++p) {
4865 f->open_object_section("clone");
4866 f->dump_unsigned("snap", *p);
4867 f->dump_unsigned("size", clone_size.find(*p)->second);
4868 f->dump_stream("overlap") << clone_overlap.find(*p)->second;
4869 auto q = clone_snaps.find(*p);
4870 if (q != clone_snaps.end()) {
4871 f->open_array_section("snaps");
4872 for (auto s : q->second) {
4873 f->dump_unsigned("snap", s);
4874 }
4875 f->close_section();
4876 }
4877 f->close_section();
4878 }
4879 f->close_section();
4880}
4881
4882void SnapSet::generate_test_instances(list<SnapSet*>& o)
4883{
4884 o.push_back(new SnapSet);
4885 o.push_back(new SnapSet);
4886 o.back()->head_exists = true;
4887 o.back()->seq = 123;
4888 o.back()->snaps.push_back(123);
4889 o.back()->snaps.push_back(12);
4890 o.push_back(new SnapSet);
4891 o.back()->head_exists = true;
4892 o.back()->seq = 123;
4893 o.back()->snaps.push_back(123);
4894 o.back()->snaps.push_back(12);
4895 o.back()->clones.push_back(12);
4896 o.back()->clone_size[12] = 12345;
4897 o.back()->clone_overlap[12];
4898 o.back()->clone_snaps[12] = {12, 10, 8};
4899}
4900
4901ostream& operator<<(ostream& out, const SnapSet& cs)
4902{
4903 if (cs.is_legacy()) {
4904 out << cs.seq << "=" << cs.snaps << ":"
4905 << cs.clones
4906 << (cs.head_exists ? "+head":"");
4907 if (!cs.clone_snaps.empty()) {
4908 out << "+stray_clone_snaps=" << cs.clone_snaps;
4909 }
4910 return out;
4911 } else {
4912 return out << cs.seq << "=" << cs.snaps << ":"
4913 << cs.clone_snaps;
4914 }
4915}
4916
4917void SnapSet::from_snap_set(const librados::snap_set_t& ss, bool legacy)
4918{
4919 // NOTE: our reconstruction of snaps (and the snapc) is not strictly
4920 // correct: it will not include snaps that still logically exist
4921 // but for which there was no clone that is defined. For all
4922 // practical purposes this doesn't matter, since we only use that
4923 // information to clone on the OSD, and we have already moved
4924 // forward past that part of the object history.
4925
4926 seq = ss.seq;
4927 set<snapid_t> _snaps;
4928 set<snapid_t> _clones;
4929 head_exists = false;
4930 for (vector<librados::clone_info_t>::const_iterator p = ss.clones.begin();
4931 p != ss.clones.end();
4932 ++p) {
4933 if (p->cloneid == librados::SNAP_HEAD) {
4934 head_exists = true;
4935 } else {
4936 _clones.insert(p->cloneid);
4937 _snaps.insert(p->snaps.begin(), p->snaps.end());
4938 clone_size[p->cloneid] = p->size;
4939 clone_overlap[p->cloneid]; // the entry must exist, even if it's empty.
4940 for (vector<pair<uint64_t, uint64_t> >::const_iterator q =
4941 p->overlap.begin(); q != p->overlap.end(); ++q)
4942 clone_overlap[p->cloneid].insert(q->first, q->second);
4943 if (!legacy) {
4944 // p->snaps is ascending; clone_snaps is descending
4945 vector<snapid_t>& v = clone_snaps[p->cloneid];
4946 for (auto q = p->snaps.rbegin(); q != p->snaps.rend(); ++q) {
4947 v.push_back(*q);
4948 }
4949 }
4950 }
4951 }
4952
4953 // ascending
4954 clones.clear();
4955 clones.reserve(_clones.size());
4956 for (set<snapid_t>::iterator p = _clones.begin(); p != _clones.end(); ++p)
4957 clones.push_back(*p);
4958
4959 // descending
4960 snaps.clear();
4961 snaps.reserve(_snaps.size());
4962 for (set<snapid_t>::reverse_iterator p = _snaps.rbegin();
4963 p != _snaps.rend(); ++p)
4964 snaps.push_back(*p);
4965}
4966
4967uint64_t SnapSet::get_clone_bytes(snapid_t clone) const
4968{
4969 assert(clone_size.count(clone));
4970 uint64_t size = clone_size.find(clone)->second;
4971 assert(clone_overlap.count(clone));
4972 const interval_set<uint64_t> &overlap = clone_overlap.find(clone)->second;
4973 for (interval_set<uint64_t>::const_iterator i = overlap.begin();
4974 i != overlap.end();
4975 ++i) {
4976 assert(size >= i.get_len());
4977 size -= i.get_len();
4978 }
4979 return size;
4980}
4981
4982void SnapSet::filter(const pg_pool_t &pinfo)
4983{
4984 vector<snapid_t> oldsnaps;
4985 oldsnaps.swap(snaps);
4986 for (vector<snapid_t>::const_iterator i = oldsnaps.begin();
4987 i != oldsnaps.end();
4988 ++i) {
4989 if (!pinfo.is_removed_snap(*i))
4990 snaps.push_back(*i);
4991 }
4992}
4993
4994SnapSet SnapSet::get_filtered(const pg_pool_t &pinfo) const
4995{
4996 SnapSet ss = *this;
4997 ss.filter(pinfo);
4998 return ss;
4999}
5000
5001// -- watch_info_t --
5002
5003void watch_info_t::encode(bufferlist& bl, uint64_t features) const
5004{
5005 ENCODE_START(4, 3, bl);
5006 ::encode(cookie, bl);
5007 ::encode(timeout_seconds, bl);
5008 ::encode(addr, bl, features);
5009 ENCODE_FINISH(bl);
5010}
5011
5012void watch_info_t::decode(bufferlist::iterator& bl)
5013{
5014 DECODE_START_LEGACY_COMPAT_LEN(4, 3, 3, bl);
5015 ::decode(cookie, bl);
5016 if (struct_v < 2) {
5017 uint64_t ver;
5018 ::decode(ver, bl);
5019 }
5020 ::decode(timeout_seconds, bl);
5021 if (struct_v >= 4) {
5022 ::decode(addr, bl);
5023 }
5024 DECODE_FINISH(bl);
5025}
5026
5027void watch_info_t::dump(Formatter *f) const
5028{
5029 f->dump_unsigned("cookie", cookie);
5030 f->dump_unsigned("timeout_seconds", timeout_seconds);
5031 f->open_object_section("addr");
5032 addr.dump(f);
5033 f->close_section();
5034}
5035
5036void watch_info_t::generate_test_instances(list<watch_info_t*>& o)
5037{
5038 o.push_back(new watch_info_t);
5039 o.push_back(new watch_info_t);
5040 o.back()->cookie = 123;
5041 o.back()->timeout_seconds = 99;
5042 entity_addr_t ea;
5043 ea.set_type(entity_addr_t::TYPE_LEGACY);
5044 ea.set_nonce(1);
5045 ea.set_family(AF_INET);
5046 ea.set_in4_quad(0, 127);
5047 ea.set_in4_quad(1, 0);
5048 ea.set_in4_quad(2, 1);
5049 ea.set_in4_quad(3, 2);
5050 ea.set_port(2);
5051 o.back()->addr = ea;
5052}
5053
31f18b77
FG
5054// -- object_manifest_t --
5055
5056void object_manifest_t::encode(bufferlist& bl) const
5057{
5058 ENCODE_START(1, 1, bl);
5059 ::encode(type, bl);
5060 switch (type) {
5061 case TYPE_NONE: break;
5062 case TYPE_REDIRECT:
5063 ::encode(redirect_target, bl);
5064 break;
5065 default:
5066 ceph_abort();
5067 }
5068 ENCODE_FINISH(bl);
5069}
5070
5071void object_manifest_t::decode(bufferlist::iterator& bl)
5072{
5073 DECODE_START(1, bl);
5074 ::decode(type, bl);
5075 switch (type) {
5076 case TYPE_NONE: break;
5077 case TYPE_REDIRECT:
5078 ::decode(redirect_target, bl);
5079 break;
5080 default:
5081 ceph_abort();
5082 }
5083 DECODE_FINISH(bl);
5084}
5085
5086void object_manifest_t::dump(Formatter *f) const
5087{
5088 f->dump_unsigned("type", type);
5089 f->open_object_section("redirect_target");
5090 redirect_target.dump(f);
5091 f->close_section();
5092}
5093
5094void object_manifest_t::generate_test_instances(list<object_manifest_t*>& o)
5095{
5096 o.push_back(new object_manifest_t());
5097 o.back()->type = TYPE_REDIRECT;
5098}
5099
5100ostream& operator<<(ostream& out, const object_manifest_t& om)
5101{
5102 return out << "type:" << om.type << " redirect_target:" << om.redirect_target;
5103}
7c673cae
FG
5104
5105// -- object_info_t --
5106
5107void object_info_t::copy_user_bits(const object_info_t& other)
5108{
5109 // these bits are copied from head->clone.
5110 size = other.size;
5111 mtime = other.mtime;
5112 local_mtime = other.local_mtime;
5113 last_reqid = other.last_reqid;
5114 truncate_seq = other.truncate_seq;
5115 truncate_size = other.truncate_size;
5116 flags = other.flags;
5117 user_version = other.user_version;
5118 data_digest = other.data_digest;
5119 omap_digest = other.omap_digest;
5120}
5121
5122ps_t object_info_t::legacy_object_locator_to_ps(const object_t &oid,
5123 const object_locator_t &loc) {
5124 ps_t ps;
5125 if (loc.key.length())
5126 // Hack, we don't have the osd map, so we don't really know the hash...
5127 ps = ceph_str_hash(CEPH_STR_HASH_RJENKINS, loc.key.c_str(),
5128 loc.key.length());
5129 else
5130 ps = ceph_str_hash(CEPH_STR_HASH_RJENKINS, oid.name.c_str(),
5131 oid.name.length());
5132 return ps;
5133}
5134
5135void object_info_t::encode(bufferlist& bl, uint64_t features) const
5136{
5137 object_locator_t myoloc(soid);
5138 map<entity_name_t, watch_info_t> old_watchers;
5139 for (map<pair<uint64_t, entity_name_t>, watch_info_t>::const_iterator i =
5140 watchers.begin();
5141 i != watchers.end();
5142 ++i) {
5143 old_watchers.insert(make_pair(i->first.second, i->second));
5144 }
31f18b77 5145 ENCODE_START(17, 8, bl);
7c673cae
FG
5146 ::encode(soid, bl);
5147 ::encode(myoloc, bl); //Retained for compatibility
5148 ::encode((__u32)0, bl); // was category, no longer used
5149 ::encode(version, bl);
5150 ::encode(prior_version, bl);
5151 ::encode(last_reqid, bl);
5152 ::encode(size, bl);
5153 ::encode(mtime, bl);
5154 if (soid.snap == CEPH_NOSNAP)
5155 ::encode(osd_reqid_t(), bl); // used to be wrlock_by
5156 else
5157 ::encode(legacy_snaps, bl);
5158 ::encode(truncate_seq, bl);
5159 ::encode(truncate_size, bl);
5160 ::encode(is_lost(), bl);
5161 ::encode(old_watchers, bl, features);
5162 /* shenanigans to avoid breaking backwards compatibility in the disk format.
5163 * When we can, switch this out for simply putting the version_t on disk. */
5164 eversion_t user_eversion(0, user_version);
5165 ::encode(user_eversion, bl);
5166 ::encode(test_flag(FLAG_USES_TMAP), bl);
5167 ::encode(watchers, bl, features);
5168 __u32 _flags = flags;
5169 ::encode(_flags, bl);
5170 ::encode(local_mtime, bl);
5171 ::encode(data_digest, bl);
5172 ::encode(omap_digest, bl);
5173 ::encode(expected_object_size, bl);
5174 ::encode(expected_write_size, bl);
5175 ::encode(alloc_hint_flags, bl);
31f18b77
FG
5176 if (has_manifest()) {
5177 ::encode(manifest, bl);
5178 }
7c673cae
FG
5179 ENCODE_FINISH(bl);
5180}
5181
5182void object_info_t::decode(bufferlist::iterator& bl)
5183{
5184 object_locator_t myoloc;
31f18b77 5185 DECODE_START_LEGACY_COMPAT_LEN(17, 8, 8, bl);
7c673cae
FG
5186 map<entity_name_t, watch_info_t> old_watchers;
5187 ::decode(soid, bl);
5188 ::decode(myoloc, bl);
5189 {
5190 string category;
5191 ::decode(category, bl); // no longer used
5192 }
5193 ::decode(version, bl);
5194 ::decode(prior_version, bl);
5195 ::decode(last_reqid, bl);
5196 ::decode(size, bl);
5197 ::decode(mtime, bl);
5198 if (soid.snap == CEPH_NOSNAP) {
5199 osd_reqid_t wrlock_by;
5200 ::decode(wrlock_by, bl);
5201 } else {
5202 ::decode(legacy_snaps, bl);
5203 }
5204 ::decode(truncate_seq, bl);
5205 ::decode(truncate_size, bl);
5206
5207 // if this is struct_v >= 13, we will overwrite this
5208 // below since this field is just here for backwards
5209 // compatibility
5210 __u8 lo;
5211 ::decode(lo, bl);
5212 flags = (flag_t)lo;
5213
5214 ::decode(old_watchers, bl);
5215 eversion_t user_eversion;
5216 ::decode(user_eversion, bl);
5217 user_version = user_eversion.version;
5218
5219 if (struct_v >= 9) {
5220 bool uses_tmap = false;
5221 ::decode(uses_tmap, bl);
5222 if (uses_tmap)
5223 set_flag(FLAG_USES_TMAP);
5224 } else {
5225 set_flag(FLAG_USES_TMAP);
5226 }
5227 if (struct_v < 10)
5228 soid.pool = myoloc.pool;
5229 if (struct_v >= 11) {
5230 ::decode(watchers, bl);
5231 } else {
5232 for (map<entity_name_t, watch_info_t>::iterator i = old_watchers.begin();
5233 i != old_watchers.end();
5234 ++i) {
5235 watchers.insert(
5236 make_pair(
5237 make_pair(i->second.cookie, i->first), i->second));
5238 }
5239 }
5240 if (struct_v >= 13) {
5241 __u32 _flags;
5242 ::decode(_flags, bl);
5243 flags = (flag_t)_flags;
5244 }
5245 if (struct_v >= 14) {
5246 ::decode(local_mtime, bl);
5247 } else {
5248 local_mtime = utime_t();
5249 }
5250 if (struct_v >= 15) {
5251 ::decode(data_digest, bl);
5252 ::decode(omap_digest, bl);
5253 } else {
5254 data_digest = omap_digest = -1;
5255 clear_flag(FLAG_DATA_DIGEST);
5256 clear_flag(FLAG_OMAP_DIGEST);
5257 }
5258 if (struct_v >= 16) {
5259 ::decode(expected_object_size, bl);
5260 ::decode(expected_write_size, bl);
5261 ::decode(alloc_hint_flags, bl);
5262 } else {
5263 expected_object_size = 0;
5264 expected_write_size = 0;
5265 alloc_hint_flags = 0;
5266 }
31f18b77
FG
5267 if (struct_v >= 17) {
5268 if (has_manifest()) {
5269 ::decode(manifest, bl);
5270 }
5271 }
7c673cae
FG
5272 DECODE_FINISH(bl);
5273}
5274
5275void object_info_t::dump(Formatter *f) const
5276{
5277 f->open_object_section("oid");
5278 soid.dump(f);
5279 f->close_section();
5280 f->dump_stream("version") << version;
5281 f->dump_stream("prior_version") << prior_version;
5282 f->dump_stream("last_reqid") << last_reqid;
5283 f->dump_unsigned("user_version", user_version);
5284 f->dump_unsigned("size", size);
5285 f->dump_stream("mtime") << mtime;
5286 f->dump_stream("local_mtime") << local_mtime;
5287 f->dump_unsigned("lost", (int)is_lost());
5288 f->dump_unsigned("flags", (int)flags);
5289 f->open_array_section("legacy_snaps");
5290 for (auto s : legacy_snaps) {
5291 f->dump_unsigned("snap", s);
5292 }
5293 f->close_section();
5294 f->dump_unsigned("truncate_seq", truncate_seq);
5295 f->dump_unsigned("truncate_size", truncate_size);
5296 f->dump_unsigned("data_digest", data_digest);
5297 f->dump_unsigned("omap_digest", omap_digest);
5298 f->dump_unsigned("expected_object_size", expected_object_size);
5299 f->dump_unsigned("expected_write_size", expected_write_size);
5300 f->dump_unsigned("alloc_hint_flags", alloc_hint_flags);
31f18b77 5301 f->dump_object("manifest", manifest);
7c673cae
FG
5302 f->open_object_section("watchers");
5303 for (map<pair<uint64_t, entity_name_t>,watch_info_t>::const_iterator p =
5304 watchers.begin(); p != watchers.end(); ++p) {
5305 stringstream ss;
5306 ss << p->first.second;
5307 f->open_object_section(ss.str().c_str());
5308 p->second.dump(f);
5309 f->close_section();
5310 }
5311 f->close_section();
5312}
5313
5314void object_info_t::generate_test_instances(list<object_info_t*>& o)
5315{
5316 o.push_back(new object_info_t());
5317
5318 // fixme
5319}
5320
5321
5322ostream& operator<<(ostream& out, const object_info_t& oi)
5323{
5324 out << oi.soid << "(" << oi.version
5325 << " " << oi.last_reqid;
5326 if (oi.soid.snap != CEPH_NOSNAP && !oi.legacy_snaps.empty())
5327 out << " " << oi.legacy_snaps;
5328 if (oi.flags)
5329 out << " " << oi.get_flag_string();
5330 out << " s " << oi.size;
5331 out << " uv " << oi.user_version;
5332 if (oi.is_data_digest())
5333 out << " dd " << std::hex << oi.data_digest << std::dec;
5334 if (oi.is_omap_digest())
5335 out << " od " << std::hex << oi.omap_digest << std::dec;
5336 out << " alloc_hint [" << oi.expected_object_size
5337 << " " << oi.expected_write_size
5338 << " " << oi.alloc_hint_flags << "]";
31f18b77
FG
5339 if (oi.has_manifest())
5340 out << " " << oi.manifest;
7c673cae
FG
5341
5342 out << ")";
5343 return out;
5344}
5345
5346// -- ObjectRecovery --
5347void ObjectRecoveryProgress::encode(bufferlist &bl) const
5348{
5349 ENCODE_START(1, 1, bl);
5350 ::encode(first, bl);
5351 ::encode(data_complete, bl);
5352 ::encode(data_recovered_to, bl);
5353 ::encode(omap_recovered_to, bl);
5354 ::encode(omap_complete, bl);
5355 ENCODE_FINISH(bl);
5356}
5357
5358void ObjectRecoveryProgress::decode(bufferlist::iterator &bl)
5359{
5360 DECODE_START(1, bl);
5361 ::decode(first, bl);
5362 ::decode(data_complete, bl);
5363 ::decode(data_recovered_to, bl);
5364 ::decode(omap_recovered_to, bl);
5365 ::decode(omap_complete, bl);
5366 DECODE_FINISH(bl);
5367}
5368
5369ostream &operator<<(ostream &out, const ObjectRecoveryProgress &prog)
5370{
5371 return prog.print(out);
5372}
5373
5374void ObjectRecoveryProgress::generate_test_instances(
5375 list<ObjectRecoveryProgress*>& o)
5376{
5377 o.push_back(new ObjectRecoveryProgress);
5378 o.back()->first = false;
5379 o.back()->data_complete = true;
5380 o.back()->omap_complete = true;
5381 o.back()->data_recovered_to = 100;
5382
5383 o.push_back(new ObjectRecoveryProgress);
5384 o.back()->first = true;
5385 o.back()->data_complete = false;
5386 o.back()->omap_complete = false;
5387 o.back()->data_recovered_to = 0;
5388}
5389
5390ostream &ObjectRecoveryProgress::print(ostream &out) const
5391{
5392 return out << "ObjectRecoveryProgress("
5393 << ( first ? "" : "!" ) << "first, "
5394 << "data_recovered_to:" << data_recovered_to
5395 << ", data_complete:" << ( data_complete ? "true" : "false" )
5396 << ", omap_recovered_to:" << omap_recovered_to
5397 << ", omap_complete:" << ( omap_complete ? "true" : "false" )
224ce89b 5398 << ", error:" << ( error ? "true" : "false" )
7c673cae
FG
5399 << ")";
5400}
5401
5402void ObjectRecoveryProgress::dump(Formatter *f) const
5403{
5404 f->dump_int("first?", first);
5405 f->dump_int("data_complete?", data_complete);
5406 f->dump_unsigned("data_recovered_to", data_recovered_to);
5407 f->dump_int("omap_complete?", omap_complete);
5408 f->dump_string("omap_recovered_to", omap_recovered_to);
5409}
5410
5411void ObjectRecoveryInfo::encode(bufferlist &bl, uint64_t features) const
5412{
5413 ENCODE_START(2, 1, bl);
5414 ::encode(soid, bl);
5415 ::encode(version, bl);
5416 ::encode(size, bl);
5417 ::encode(oi, bl, features);
5418 ::encode(ss, bl);
5419 ::encode(copy_subset, bl);
5420 ::encode(clone_subset, bl);
5421 ENCODE_FINISH(bl);
5422}
5423
5424void ObjectRecoveryInfo::decode(bufferlist::iterator &bl,
5425 int64_t pool)
5426{
5427 DECODE_START(2, bl);
5428 ::decode(soid, bl);
5429 ::decode(version, bl);
5430 ::decode(size, bl);
5431 ::decode(oi, bl);
5432 ::decode(ss, bl);
5433 ::decode(copy_subset, bl);
5434 ::decode(clone_subset, bl);
5435 DECODE_FINISH(bl);
5436
5437 if (struct_v < 2) {
5438 if (!soid.is_max() && soid.pool == -1)
5439 soid.pool = pool;
5440 map<hobject_t, interval_set<uint64_t>> tmp;
5441 tmp.swap(clone_subset);
5442 for (map<hobject_t, interval_set<uint64_t>>::iterator i = tmp.begin();
5443 i != tmp.end();
5444 ++i) {
5445 hobject_t first(i->first);
5446 if (!first.is_max() && first.pool == -1)
5447 first.pool = pool;
5448 clone_subset[first].swap(i->second);
5449 }
5450 }
5451}
5452
5453void ObjectRecoveryInfo::generate_test_instances(
5454 list<ObjectRecoveryInfo*>& o)
5455{
5456 o.push_back(new ObjectRecoveryInfo);
5457 o.back()->soid = hobject_t(sobject_t("key", CEPH_NOSNAP));
5458 o.back()->version = eversion_t(0,0);
5459 o.back()->size = 100;
5460}
5461
5462
5463void ObjectRecoveryInfo::dump(Formatter *f) const
5464{
5465 f->dump_stream("object") << soid;
5466 f->dump_stream("at_version") << version;
5467 f->dump_stream("size") << size;
5468 {
5469 f->open_object_section("object_info");
5470 oi.dump(f);
5471 f->close_section();
5472 }
5473 {
5474 f->open_object_section("snapset");
5475 ss.dump(f);
5476 f->close_section();
5477 }
5478 f->dump_stream("copy_subset") << copy_subset;
5479 f->dump_stream("clone_subset") << clone_subset;
5480}
5481
5482ostream& operator<<(ostream& out, const ObjectRecoveryInfo &inf)
5483{
5484 return inf.print(out);
5485}
5486
5487ostream &ObjectRecoveryInfo::print(ostream &out) const
5488{
5489 return out << "ObjectRecoveryInfo("
5490 << soid << "@" << version
5491 << ", size: " << size
5492 << ", copy_subset: " << copy_subset
5493 << ", clone_subset: " << clone_subset
5494 << ", snapset: " << ss
5495 << ")";
5496}
5497
5498// -- PushReplyOp --
5499void PushReplyOp::generate_test_instances(list<PushReplyOp*> &o)
5500{
5501 o.push_back(new PushReplyOp);
5502 o.push_back(new PushReplyOp);
5503 o.back()->soid = hobject_t(sobject_t("asdf", 2));
5504 o.push_back(new PushReplyOp);
5505 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
5506}
5507
5508void PushReplyOp::encode(bufferlist &bl) const
5509{
5510 ENCODE_START(1, 1, bl);
5511 ::encode(soid, bl);
5512 ENCODE_FINISH(bl);
5513}
5514
5515void PushReplyOp::decode(bufferlist::iterator &bl)
5516{
5517 DECODE_START(1, bl);
5518 ::decode(soid, bl);
5519 DECODE_FINISH(bl);
5520}
5521
5522void PushReplyOp::dump(Formatter *f) const
5523{
5524 f->dump_stream("soid") << soid;
5525}
5526
5527ostream &PushReplyOp::print(ostream &out) const
5528{
5529 return out
5530 << "PushReplyOp(" << soid
5531 << ")";
5532}
5533
5534ostream& operator<<(ostream& out, const PushReplyOp &op)
5535{
5536 return op.print(out);
5537}
5538
5539uint64_t PushReplyOp::cost(CephContext *cct) const
5540{
5541
5542 return cct->_conf->osd_push_per_object_cost +
5543 cct->_conf->osd_recovery_max_chunk;
5544}
5545
5546// -- PullOp --
5547void PullOp::generate_test_instances(list<PullOp*> &o)
5548{
5549 o.push_back(new PullOp);
5550 o.push_back(new PullOp);
5551 o.back()->soid = hobject_t(sobject_t("asdf", 2));
5552 o.back()->recovery_info.version = eversion_t(3, 10);
5553 o.push_back(new PullOp);
5554 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
5555 o.back()->recovery_info.version = eversion_t(0, 0);
5556}
5557
5558void PullOp::encode(bufferlist &bl, uint64_t features) const
5559{
5560 ENCODE_START(1, 1, bl);
5561 ::encode(soid, bl);
5562 ::encode(recovery_info, bl, features);
5563 ::encode(recovery_progress, bl);
5564 ENCODE_FINISH(bl);
5565}
5566
5567void PullOp::decode(bufferlist::iterator &bl)
5568{
5569 DECODE_START(1, bl);
5570 ::decode(soid, bl);
5571 ::decode(recovery_info, bl);
5572 ::decode(recovery_progress, bl);
5573 DECODE_FINISH(bl);
5574}
5575
5576void PullOp::dump(Formatter *f) const
5577{
5578 f->dump_stream("soid") << soid;
5579 {
5580 f->open_object_section("recovery_info");
5581 recovery_info.dump(f);
5582 f->close_section();
5583 }
5584 {
5585 f->open_object_section("recovery_progress");
5586 recovery_progress.dump(f);
5587 f->close_section();
5588 }
5589}
5590
5591ostream &PullOp::print(ostream &out) const
5592{
5593 return out
5594 << "PullOp(" << soid
5595 << ", recovery_info: " << recovery_info
5596 << ", recovery_progress: " << recovery_progress
5597 << ")";
5598}
5599
5600ostream& operator<<(ostream& out, const PullOp &op)
5601{
5602 return op.print(out);
5603}
5604
5605uint64_t PullOp::cost(CephContext *cct) const
5606{
5607 return cct->_conf->osd_push_per_object_cost +
5608 cct->_conf->osd_recovery_max_chunk;
5609}
5610
5611// -- PushOp --
5612void PushOp::generate_test_instances(list<PushOp*> &o)
5613{
5614 o.push_back(new PushOp);
5615 o.push_back(new PushOp);
5616 o.back()->soid = hobject_t(sobject_t("asdf", 2));
5617 o.back()->version = eversion_t(3, 10);
5618 o.push_back(new PushOp);
5619 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
5620 o.back()->version = eversion_t(0, 0);
5621}
5622
5623void PushOp::encode(bufferlist &bl, uint64_t features) const
5624{
5625 ENCODE_START(1, 1, bl);
5626 ::encode(soid, bl);
5627 ::encode(version, bl);
5628 ::encode(data, bl);
5629 ::encode(data_included, bl);
5630 ::encode(omap_header, bl);
5631 ::encode(omap_entries, bl);
5632 ::encode(attrset, bl);
5633 ::encode(recovery_info, bl, features);
5634 ::encode(after_progress, bl);
5635 ::encode(before_progress, bl);
5636 ENCODE_FINISH(bl);
5637}
5638
5639void PushOp::decode(bufferlist::iterator &bl)
5640{
5641 DECODE_START(1, bl);
5642 ::decode(soid, bl);
5643 ::decode(version, bl);
5644 ::decode(data, bl);
5645 ::decode(data_included, bl);
5646 ::decode(omap_header, bl);
5647 ::decode(omap_entries, bl);
5648 ::decode(attrset, bl);
5649 ::decode(recovery_info, bl);
5650 ::decode(after_progress, bl);
5651 ::decode(before_progress, bl);
5652 DECODE_FINISH(bl);
5653}
5654
5655void PushOp::dump(Formatter *f) const
5656{
5657 f->dump_stream("soid") << soid;
5658 f->dump_stream("version") << version;
5659 f->dump_int("data_len", data.length());
5660 f->dump_stream("data_included") << data_included;
5661 f->dump_int("omap_header_len", omap_header.length());
5662 f->dump_int("omap_entries_len", omap_entries.size());
5663 f->dump_int("attrset_len", attrset.size());
5664 {
5665 f->open_object_section("recovery_info");
5666 recovery_info.dump(f);
5667 f->close_section();
5668 }
5669 {
5670 f->open_object_section("after_progress");
5671 after_progress.dump(f);
5672 f->close_section();
5673 }
5674 {
5675 f->open_object_section("before_progress");
5676 before_progress.dump(f);
5677 f->close_section();
5678 }
5679}
5680
5681ostream &PushOp::print(ostream &out) const
5682{
5683 return out
5684 << "PushOp(" << soid
5685 << ", version: " << version
5686 << ", data_included: " << data_included
5687 << ", data_size: " << data.length()
5688 << ", omap_header_size: " << omap_header.length()
5689 << ", omap_entries_size: " << omap_entries.size()
5690 << ", attrset_size: " << attrset.size()
5691 << ", recovery_info: " << recovery_info
5692 << ", after_progress: " << after_progress
5693 << ", before_progress: " << before_progress
5694 << ")";
5695}
5696
5697ostream& operator<<(ostream& out, const PushOp &op)
5698{
5699 return op.print(out);
5700}
5701
5702uint64_t PushOp::cost(CephContext *cct) const
5703{
5704 uint64_t cost = data_included.size();
5705 for (map<string, bufferlist>::const_iterator i =
5706 omap_entries.begin();
5707 i != omap_entries.end();
5708 ++i) {
5709 cost += i->second.length();
5710 }
5711 cost += cct->_conf->osd_push_per_object_cost;
5712 return cost;
5713}
5714
5715// -- ScrubMap --
5716
5717void ScrubMap::merge_incr(const ScrubMap &l)
5718{
5719 assert(valid_through == l.incr_since);
5720 valid_through = l.valid_through;
5721
5722 for (map<hobject_t,object>::const_iterator p = l.objects.begin();
5723 p != l.objects.end();
5724 ++p){
5725 if (p->second.negative) {
5726 map<hobject_t,object>::iterator q = objects.find(p->first);
5727 if (q != objects.end()) {
5728 objects.erase(q);
5729 }
5730 } else {
5731 objects[p->first] = p->second;
5732 }
5733 }
5734}
5735
5736void ScrubMap::encode(bufferlist& bl) const
5737{
5738 ENCODE_START(3, 2, bl);
5739 ::encode(objects, bl);
5740 ::encode((__u32)0, bl); // used to be attrs; now deprecated
5741 bufferlist old_logbl; // not used
5742 ::encode(old_logbl, bl);
5743 ::encode(valid_through, bl);
5744 ::encode(incr_since, bl);
5745 ENCODE_FINISH(bl);
5746}
5747
5748void ScrubMap::decode(bufferlist::iterator& bl, int64_t pool)
5749{
5750 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
5751 ::decode(objects, bl);
5752 {
5753 map<string,string> attrs; // deprecated
5754 ::decode(attrs, bl);
5755 }
5756 bufferlist old_logbl; // not used
5757 ::decode(old_logbl, bl);
5758 ::decode(valid_through, bl);
5759 ::decode(incr_since, bl);
5760 DECODE_FINISH(bl);
5761
5762 // handle hobject_t upgrade
5763 if (struct_v < 3) {
5764 map<hobject_t, object> tmp;
5765 tmp.swap(objects);
5766 for (map<hobject_t, object>::iterator i = tmp.begin();
5767 i != tmp.end();
5768 ++i) {
5769 hobject_t first(i->first);
5770 if (!first.is_max() && first.pool == -1)
5771 first.pool = pool;
5772 objects[first] = i->second;
5773 }
5774 }
5775}
5776
5777void ScrubMap::dump(Formatter *f) const
5778{
5779 f->dump_stream("valid_through") << valid_through;
5780 f->dump_stream("incremental_since") << incr_since;
5781 f->open_array_section("objects");
5782 for (map<hobject_t,object>::const_iterator p = objects.begin(); p != objects.end(); ++p) {
5783 f->open_object_section("object");
5784 f->dump_string("name", p->first.oid.name);
5785 f->dump_unsigned("hash", p->first.get_hash());
5786 f->dump_string("key", p->first.get_key());
5787 f->dump_int("snapid", p->first.snap);
5788 p->second.dump(f);
5789 f->close_section();
5790 }
5791 f->close_section();
5792}
5793
5794void ScrubMap::generate_test_instances(list<ScrubMap*>& o)
5795{
5796 o.push_back(new ScrubMap);
5797 o.push_back(new ScrubMap);
5798 o.back()->valid_through = eversion_t(1, 2);
5799 o.back()->incr_since = eversion_t(3, 4);
5800 list<object*> obj;
5801 object::generate_test_instances(obj);
5802 o.back()->objects[hobject_t(object_t("foo"), "fookey", 123, 456, 0, "")] = *obj.back();
5803 obj.pop_back();
5804 o.back()->objects[hobject_t(object_t("bar"), string(), 123, 456, 0, "")] = *obj.back();
5805}
5806
5807// -- ScrubMap::object --
5808
5809void ScrubMap::object::encode(bufferlist& bl) const
5810{
5811 bool compat_read_error = read_error || ec_hash_mismatch || ec_size_mismatch;
5812 ENCODE_START(8, 7, bl);
5813 ::encode(size, bl);
5814 ::encode(negative, bl);
5815 ::encode(attrs, bl);
5816 ::encode(digest, bl);
5817 ::encode(digest_present, bl);
5818 ::encode((uint32_t)0, bl); // obsolete nlinks
5819 ::encode((uint32_t)0, bl); // snapcolls
5820 ::encode(omap_digest, bl);
5821 ::encode(omap_digest_present, bl);
5822 ::encode(compat_read_error, bl);
5823 ::encode(stat_error, bl);
5824 ::encode(read_error, bl);
5825 ::encode(ec_hash_mismatch, bl);
5826 ::encode(ec_size_mismatch, bl);
5827 ENCODE_FINISH(bl);
5828}
5829
5830void ScrubMap::object::decode(bufferlist::iterator& bl)
5831{
5832 DECODE_START(8, bl);
5833 ::decode(size, bl);
5834 bool tmp, compat_read_error = false;
5835 ::decode(tmp, bl);
5836 negative = tmp;
5837 ::decode(attrs, bl);
5838 ::decode(digest, bl);
5839 ::decode(tmp, bl);
5840 digest_present = tmp;
5841 {
5842 uint32_t nlinks;
5843 ::decode(nlinks, bl);
5844 set<snapid_t> snapcolls;
5845 ::decode(snapcolls, bl);
5846 }
5847 ::decode(omap_digest, bl);
5848 ::decode(tmp, bl);
5849 omap_digest_present = tmp;
5850 ::decode(compat_read_error, bl);
5851 ::decode(tmp, bl);
5852 stat_error = tmp;
5853 if (struct_v >= 8) {
5854 ::decode(tmp, bl);
5855 read_error = tmp;
5856 ::decode(tmp, bl);
5857 ec_hash_mismatch = tmp;
5858 ::decode(tmp, bl);
5859 ec_size_mismatch = tmp;
5860 }
5861 // If older encoder found a read_error, set read_error
5862 if (compat_read_error && !read_error && !ec_hash_mismatch && !ec_size_mismatch)
5863 read_error = true;
5864 DECODE_FINISH(bl);
5865}
5866
5867void ScrubMap::object::dump(Formatter *f) const
5868{
5869 f->dump_int("size", size);
5870 f->dump_int("negative", negative);
5871 f->open_array_section("attrs");
5872 for (map<string,bufferptr>::const_iterator p = attrs.begin(); p != attrs.end(); ++p) {
5873 f->open_object_section("attr");
5874 f->dump_string("name", p->first);
5875 f->dump_int("length", p->second.length());
5876 f->close_section();
5877 }
5878 f->close_section();
5879}
5880
5881void ScrubMap::object::generate_test_instances(list<object*>& o)
5882{
5883 o.push_back(new object);
5884 o.push_back(new object);
5885 o.back()->negative = true;
5886 o.push_back(new object);
5887 o.back()->size = 123;
5888 o.back()->attrs["foo"] = buffer::copy("foo", 3);
5889 o.back()->attrs["bar"] = buffer::copy("barval", 6);
5890}
5891
5892// -- OSDOp --
5893
5894ostream& operator<<(ostream& out, const OSDOp& op)
5895{
5896 out << ceph_osd_op_name(op.op.op);
5897 if (ceph_osd_op_type_data(op.op.op)) {
5898 // data extent
5899 switch (op.op.op) {
5900 case CEPH_OSD_OP_ASSERT_VER:
5901 out << " v" << op.op.assert_ver.ver;
5902 break;
5903 case CEPH_OSD_OP_TRUNCATE:
5904 out << " " << op.op.extent.offset;
5905 break;
5906 case CEPH_OSD_OP_MASKTRUNC:
5907 case CEPH_OSD_OP_TRIMTRUNC:
5908 out << " " << op.op.extent.truncate_seq << "@"
5909 << (int64_t)op.op.extent.truncate_size;
5910 break;
5911 case CEPH_OSD_OP_ROLLBACK:
5912 out << " " << snapid_t(op.op.snap.snapid);
5913 break;
5914 case CEPH_OSD_OP_WATCH:
5915 out << " " << ceph_osd_watch_op_name(op.op.watch.op)
5916 << " cookie " << op.op.watch.cookie;
5917 if (op.op.watch.gen)
5918 out << " gen " << op.op.watch.gen;
5919 break;
5920 case CEPH_OSD_OP_NOTIFY:
5921 case CEPH_OSD_OP_NOTIFY_ACK:
5922 out << " cookie " << op.op.notify.cookie;
5923 break;
5924 case CEPH_OSD_OP_COPY_GET:
5925 out << " max " << op.op.copy_get.max;
5926 break;
5927 case CEPH_OSD_OP_COPY_FROM:
5928 out << " ver " << op.op.copy_from.src_version;
5929 break;
5930 case CEPH_OSD_OP_SETALLOCHINT:
5931 out << " object_size " << op.op.alloc_hint.expected_object_size
5932 << " write_size " << op.op.alloc_hint.expected_write_size;
5933 break;
5934 case CEPH_OSD_OP_READ:
5935 case CEPH_OSD_OP_SPARSE_READ:
5936 case CEPH_OSD_OP_SYNC_READ:
5937 case CEPH_OSD_OP_WRITE:
5938 case CEPH_OSD_OP_WRITEFULL:
5939 case CEPH_OSD_OP_ZERO:
5940 case CEPH_OSD_OP_APPEND:
5941 case CEPH_OSD_OP_MAPEXT:
5942 out << " " << op.op.extent.offset << "~" << op.op.extent.length;
5943 if (op.op.extent.truncate_seq)
5944 out << " [" << op.op.extent.truncate_seq << "@"
5945 << (int64_t)op.op.extent.truncate_size << "]";
5946 if (op.op.flags)
5947 out << " [" << ceph_osd_op_flag_string(op.op.flags) << "]";
5948 default:
5949 // don't show any arg info
5950 break;
5951 }
5952 } else if (ceph_osd_op_type_attr(op.op.op)) {
5953 // xattr name
5954 if (op.op.xattr.name_len && op.indata.length()) {
5955 out << " ";
5956 op.indata.write(0, op.op.xattr.name_len, out);
5957 }
5958 if (op.op.xattr.value_len)
5959 out << " (" << op.op.xattr.value_len << ")";
5960 if (op.op.op == CEPH_OSD_OP_CMPXATTR)
5961 out << " op " << (int)op.op.xattr.cmp_op
5962 << " mode " << (int)op.op.xattr.cmp_mode;
5963 } else if (ceph_osd_op_type_exec(op.op.op)) {
5964 // class.method
5965 if (op.op.cls.class_len && op.indata.length()) {
5966 out << " ";
5967 op.indata.write(0, op.op.cls.class_len, out);
5968 out << ".";
5969 op.indata.write(op.op.cls.class_len, op.op.cls.method_len, out);
5970 }
5971 } else if (ceph_osd_op_type_pg(op.op.op)) {
5972 switch (op.op.op) {
5973 case CEPH_OSD_OP_PGLS:
5974 case CEPH_OSD_OP_PGLS_FILTER:
5975 case CEPH_OSD_OP_PGNLS:
5976 case CEPH_OSD_OP_PGNLS_FILTER:
5977 out << " start_epoch " << op.op.pgls.start_epoch;
5978 break;
5979 case CEPH_OSD_OP_PG_HITSET_LS:
5980 break;
5981 case CEPH_OSD_OP_PG_HITSET_GET:
5982 out << " " << utime_t(op.op.hit_set_get.stamp);
5983 break;
5984 case CEPH_OSD_OP_SCRUBLS:
5985 break;
5986 }
5987 }
5988 return out;
5989}
5990
5991
5992void OSDOp::split_osd_op_vector_in_data(vector<OSDOp>& ops, bufferlist& in)
5993{
5994 bufferlist::iterator datap = in.begin();
5995 for (unsigned i = 0; i < ops.size(); i++) {
5996 if (ops[i].op.payload_len) {
5997 datap.copy(ops[i].op.payload_len, ops[i].indata);
5998 }
5999 }
6000}
6001
6002void OSDOp::merge_osd_op_vector_in_data(vector<OSDOp>& ops, bufferlist& out)
6003{
6004 for (unsigned i = 0; i < ops.size(); i++) {
6005 if (ops[i].indata.length()) {
6006 ops[i].op.payload_len = ops[i].indata.length();
6007 out.append(ops[i].indata);
6008 }
6009 }
6010}
6011
6012void OSDOp::split_osd_op_vector_out_data(vector<OSDOp>& ops, bufferlist& in)
6013{
6014 bufferlist::iterator datap = in.begin();
6015 for (unsigned i = 0; i < ops.size(); i++) {
6016 if (ops[i].op.payload_len) {
6017 datap.copy(ops[i].op.payload_len, ops[i].outdata);
6018 }
6019 }
6020}
6021
6022void OSDOp::merge_osd_op_vector_out_data(vector<OSDOp>& ops, bufferlist& out)
6023{
6024 for (unsigned i = 0; i < ops.size(); i++) {
6025 if (ops[i].outdata.length()) {
6026 ops[i].op.payload_len = ops[i].outdata.length();
6027 out.append(ops[i].outdata);
6028 }
6029 }
6030}
6031
6032bool store_statfs_t::operator==(const store_statfs_t& other) const
6033{
6034 return total == other.total
6035 && available == other.available
6036 && allocated == other.allocated
6037 && stored == other.stored
6038 && compressed == other.compressed
6039 && compressed_allocated == other.compressed_allocated
6040 && compressed_original == other.compressed_original;
6041}
6042
6043void store_statfs_t::dump(Formatter *f) const
6044{
6045 f->dump_int("total", total);
6046 f->dump_int("available", available);
6047 f->dump_int("allocated", allocated);
6048 f->dump_int("stored", stored);
6049 f->dump_int("compressed", compressed);
6050 f->dump_int("compressed_allocated", compressed_allocated);
6051 f->dump_int("compressed_original", compressed_original);
6052}
6053
6054ostream& operator<<(ostream& out, const store_statfs_t &s)
6055{
6056 out << std::hex
6057 << "store_statfs(0x" << s.available
6058 << "/0x" << s.total
6059 << ", stored 0x" << s.stored
6060 << "/0x" << s.allocated
6061 << ", compress 0x" << s.compressed
6062 << "/0x" << s.compressed_allocated
6063 << "/0x" << s.compressed_original
6064 << std::dec
6065 << ")";
6066 return out;
6067}
224ce89b
WB
6068
6069void OSDOp::clear_data(vector<OSDOp>& ops)
6070{
6071 for (unsigned i = 0; i < ops.size(); i++) {
6072 OSDOp& op = ops[i];
6073 op.outdata.clear();
6074 if (ceph_osd_op_type_attr(op.op.op) &&
6075 op.op.xattr.name_len &&
6076 op.indata.length() >= op.op.xattr.name_len) {
6077 bufferptr bp(op.op.xattr.name_len);
6078 bufferlist bl;
6079 bl.append(bp);
6080 bl.copy_in(0, op.op.xattr.name_len, op.indata);
6081 op.indata.claim(bl);
6082 } else if (ceph_osd_op_type_exec(op.op.op) &&
6083 op.op.cls.class_len &&
6084 op.indata.length() >
6085 (op.op.cls.class_len + op.op.cls.method_len)) {
6086 __u8 len = op.op.cls.class_len + op.op.cls.method_len;
6087 bufferptr bp(len);
6088 bufferlist bl;
6089 bl.append(bp);
6090 bl.copy_in(0, len, op.indata);
6091 op.indata.claim(bl);
6092 } else {
6093 op.indata.clear();
6094 }
6095 }
6096}
6097