1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2011 New Dream Network
7 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
9 * Author: Loic Dachary <loic@dachary.org>
11 * This is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License version 2.1, as published by the Free Software
14 * Foundation. See file COPYING.
18 #include <boost/assign/list_of.hpp>
20 #include "osd_types.h"
21 #include "include/ceph_features.h"
23 #include "crush/hash.h"
27 #include "PGBackend.h"
29 const char *ceph_osd_flag_name(unsigned flag
)
32 case CEPH_OSD_FLAG_ACK
: return "ack";
33 case CEPH_OSD_FLAG_ONNVRAM
: return "onnvram";
34 case CEPH_OSD_FLAG_ONDISK
: return "ondisk";
35 case CEPH_OSD_FLAG_RETRY
: return "retry";
36 case CEPH_OSD_FLAG_READ
: return "read";
37 case CEPH_OSD_FLAG_WRITE
: return "write";
38 case CEPH_OSD_FLAG_ORDERSNAP
: return "ordersnap";
39 case CEPH_OSD_FLAG_PEERSTAT_OLD
: return "peerstat_old";
40 case CEPH_OSD_FLAG_BALANCE_READS
: return "balance_reads";
41 case CEPH_OSD_FLAG_PARALLELEXEC
: return "parallelexec";
42 case CEPH_OSD_FLAG_PGOP
: return "pgop";
43 case CEPH_OSD_FLAG_EXEC
: return "exec";
44 case CEPH_OSD_FLAG_EXEC_PUBLIC
: return "exec_public";
45 case CEPH_OSD_FLAG_LOCALIZE_READS
: return "localize_reads";
46 case CEPH_OSD_FLAG_RWORDERED
: return "rwordered";
47 case CEPH_OSD_FLAG_IGNORE_CACHE
: return "ignore_cache";
48 case CEPH_OSD_FLAG_SKIPRWLOCKS
: return "skiprwlocks";
49 case CEPH_OSD_FLAG_IGNORE_OVERLAY
: return "ignore_overlay";
50 case CEPH_OSD_FLAG_FLUSH
: return "flush";
51 case CEPH_OSD_FLAG_MAP_SNAP_CLONE
: return "map_snap_clone";
52 case CEPH_OSD_FLAG_ENFORCE_SNAPC
: return "enforce_snapc";
53 case CEPH_OSD_FLAG_REDIRECTED
: return "redirected";
54 case CEPH_OSD_FLAG_KNOWN_REDIR
: return "known_if_redirected";
55 case CEPH_OSD_FLAG_FULL_TRY
: return "full_try";
56 case CEPH_OSD_FLAG_FULL_FORCE
: return "full_force";
57 default: return "???";
61 string
ceph_osd_flag_string(unsigned flags
)
64 for (unsigned i
=0; i
<32; ++i
) {
65 if (flags
& (1u<<i
)) {
68 s
+= ceph_osd_flag_name(1u << i
);
76 const char * ceph_osd_op_flag_name(unsigned flag
)
81 case CEPH_OSD_OP_FLAG_EXCL
:
84 case CEPH_OSD_OP_FLAG_FAILOK
:
87 case CEPH_OSD_OP_FLAG_FADVISE_RANDOM
:
88 name
= "fadvise_random";
90 case CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL
:
91 name
= "fadvise_sequential";
93 case CEPH_OSD_OP_FLAG_FADVISE_WILLNEED
:
94 name
= "favise_willneed";
96 case CEPH_OSD_OP_FLAG_FADVISE_DONTNEED
:
97 name
= "fadvise_dontneed";
99 case CEPH_OSD_OP_FLAG_FADVISE_NOCACHE
:
100 name
= "fadvise_nocache";
109 string
ceph_osd_op_flag_string(unsigned flags
)
112 for (unsigned i
=0; i
<32; ++i
) {
113 if (flags
& (1u<<i
)) {
116 s
+= ceph_osd_op_flag_name(1u << i
);
124 string
ceph_osd_alloc_hint_flag_string(unsigned flags
)
127 for (unsigned i
=0; i
<32; ++i
) {
128 if (flags
& (1u<<i
)) {
131 s
+= ceph_osd_alloc_hint_flag_name(1u << i
);
139 void pg_shard_t::encode(bufferlist
&bl
) const
141 ENCODE_START(1, 1, bl
);
146 void pg_shard_t::decode(bufferlist::iterator
&bl
)
154 ostream
&operator<<(ostream
&lhs
, const pg_shard_t
&rhs
)
156 if (rhs
.is_undefined())
158 if (rhs
.shard
== shard_id_t::NO_SHARD
)
159 return lhs
<< rhs
.osd
;
160 return lhs
<< rhs
.osd
<< '(' << (unsigned)(rhs
.shard
) << ')';
164 void osd_reqid_t::dump(Formatter
*f
) const
166 f
->dump_stream("name") << name
;
167 f
->dump_int("inc", inc
);
168 f
->dump_unsigned("tid", tid
);
171 void osd_reqid_t::generate_test_instances(list
<osd_reqid_t
*>& o
)
173 o
.push_back(new osd_reqid_t
);
174 o
.push_back(new osd_reqid_t(entity_name_t::CLIENT(123), 1, 45678));
177 // -- object_locator_t --
179 void object_locator_t::encode(bufferlist
& bl
) const
181 // verify that nobody's corrupted the locator
182 assert(hash
== -1 || key
.empty());
183 __u8 encode_compat
= 3;
184 ENCODE_START(6, encode_compat
, bl
);
186 int32_t preferred
= -1; // tell old code there is no preferred osd (-1).
187 ::encode(preferred
, bl
);
189 ::encode(nspace
, bl
);
192 encode_compat
= MAX(encode_compat
, 6); // need to interpret the hash
193 ENCODE_FINISH_NEW_COMPAT(bl
, encode_compat
);
196 void object_locator_t::decode(bufferlist::iterator
& p
)
198 DECODE_START_LEGACY_COMPAT_LEN(6, 3, 3, p
);
208 ::decode(preferred
, p
);
218 // verify that nobody's corrupted the locator
219 assert(hash
== -1 || key
.empty());
222 void object_locator_t::dump(Formatter
*f
) const
224 f
->dump_int("pool", pool
);
225 f
->dump_string("key", key
);
226 f
->dump_string("namespace", nspace
);
227 f
->dump_int("hash", hash
);
230 void object_locator_t::generate_test_instances(list
<object_locator_t
*>& o
)
232 o
.push_back(new object_locator_t
);
233 o
.push_back(new object_locator_t(123));
234 o
.push_back(new object_locator_t(123, 876));
235 o
.push_back(new object_locator_t(1, "n2"));
236 o
.push_back(new object_locator_t(1234, "", "key"));
237 o
.push_back(new object_locator_t(12, "n1", "key2"));
240 // -- request_redirect_t --
241 void request_redirect_t::encode(bufferlist
& bl
) const
243 ENCODE_START(1, 1, bl
);
244 ::encode(redirect_locator
, bl
);
245 ::encode(redirect_object
, bl
);
246 ::encode(osd_instructions
, bl
);
250 void request_redirect_t::decode(bufferlist::iterator
& bl
)
253 ::decode(redirect_locator
, bl
);
254 ::decode(redirect_object
, bl
);
255 ::decode(osd_instructions
, bl
);
259 void request_redirect_t::dump(Formatter
*f
) const
261 f
->dump_string("object", redirect_object
);
262 f
->open_object_section("locator");
263 redirect_locator
.dump(f
);
264 f
->close_section(); // locator
267 void request_redirect_t::generate_test_instances(list
<request_redirect_t
*>& o
)
269 object_locator_t
loc(1, "redir_obj");
270 o
.push_back(new request_redirect_t());
271 o
.push_back(new request_redirect_t(loc
, 0));
272 o
.push_back(new request_redirect_t(loc
, "redir_obj"));
273 o
.push_back(new request_redirect_t(loc
));
276 void objectstore_perf_stat_t::dump(Formatter
*f
) const
278 f
->dump_unsigned("commit_latency_ms", os_commit_latency
);
279 f
->dump_unsigned("apply_latency_ms", os_apply_latency
);
282 void objectstore_perf_stat_t::encode(bufferlist
&bl
) const
284 ENCODE_START(1, 1, bl
);
285 ::encode(os_commit_latency
, bl
);
286 ::encode(os_apply_latency
, bl
);
290 void objectstore_perf_stat_t::decode(bufferlist::iterator
&bl
)
293 ::decode(os_commit_latency
, bl
);
294 ::decode(os_apply_latency
, bl
);
298 void objectstore_perf_stat_t::generate_test_instances(std::list
<objectstore_perf_stat_t
*>& o
)
300 o
.push_back(new objectstore_perf_stat_t());
301 o
.push_back(new objectstore_perf_stat_t());
302 o
.back()->os_commit_latency
= 20;
303 o
.back()->os_apply_latency
= 30;
307 void osd_stat_t::dump(Formatter
*f
) const
309 f
->dump_unsigned("kb", kb
);
310 f
->dump_unsigned("kb_used", kb_used
);
311 f
->dump_unsigned("kb_avail", kb_avail
);
312 f
->open_array_section("hb_peers");
313 for (auto p
: hb_peers
)
314 f
->dump_int("osd", p
);
316 f
->dump_int("snap_trim_queue_len", snap_trim_queue_len
);
317 f
->dump_int("num_snap_trimming", num_snap_trimming
);
318 f
->open_object_section("op_queue_age_hist");
319 op_queue_age_hist
.dump(f
);
321 f
->open_object_section("perf_stat");
322 os_perf_stat
.dump(f
);
326 void osd_stat_t::encode(bufferlist
&bl
) const
328 ENCODE_START(5, 2, bl
);
330 ::encode(kb_used
, bl
);
331 ::encode(kb_avail
, bl
);
332 ::encode(snap_trim_queue_len
, bl
);
333 ::encode(num_snap_trimming
, bl
);
334 ::encode(hb_peers
, bl
);
335 ::encode((uint32_t)0, bl
);
336 ::encode(op_queue_age_hist
, bl
);
337 ::encode(os_perf_stat
, bl
);
341 void osd_stat_t::decode(bufferlist::iterator
&bl
)
343 DECODE_START_LEGACY_COMPAT_LEN(5, 2, 2, bl
);
345 ::decode(kb_used
, bl
);
346 ::decode(kb_avail
, bl
);
347 ::decode(snap_trim_queue_len
, bl
);
348 ::decode(num_snap_trimming
, bl
);
349 ::decode(hb_peers
, bl
);
350 vector
<int> num_hb_out
;
351 ::decode(num_hb_out
, bl
);
353 ::decode(op_queue_age_hist
, bl
);
355 ::decode(os_perf_stat
, bl
);
359 void osd_stat_t::generate_test_instances(std::list
<osd_stat_t
*>& o
)
361 o
.push_back(new osd_stat_t
);
363 o
.push_back(new osd_stat_t
);
365 o
.back()->kb_used
= 2;
366 o
.back()->kb_avail
= 3;
367 o
.back()->hb_peers
.push_back(7);
368 o
.back()->snap_trim_queue_len
= 8;
369 o
.back()->num_snap_trimming
= 99;
374 int pg_t::print(char *o
, int maxlen
) const
376 if (preferred() >= 0)
377 return snprintf(o
, maxlen
, "%llu.%xp%d", (unsigned long long)pool(), ps(), preferred());
379 return snprintf(o
, maxlen
, "%llu.%x", (unsigned long long)pool(), ps());
382 bool pg_t::parse(const char *s
)
387 int r
= sscanf(s
, "%llu.%xp%d", (long long unsigned *)&ppool
, &pseed
, &pref
);
399 bool spg_t::parse(const char *s
)
401 pgid
.set_preferred(-1);
402 shard
= shard_id_t::NO_SHARD
;
407 int r
= sscanf(s
, "%llu.%x", (long long unsigned *)&ppool
, &pseed
);
410 pgid
.set_pool(ppool
);
413 const char *p
= strchr(s
, 'p');
415 r
= sscanf(p
, "p%d", &pref
);
417 pgid
.set_preferred(pref
);
425 r
= sscanf(p
, "s%d", &pshard
);
427 shard
= shard_id_t(pshard
);
435 char *spg_t::calc_name(char *buf
, const char *suffix_backwords
) const
437 while (*suffix_backwords
)
438 *--buf
= *suffix_backwords
++;
440 if (!is_no_shard()) {
441 buf
= ritoa
<uint8_t, 10>((uint8_t)shard
.id
, buf
);
445 return pgid
.calc_name(buf
, "");
448 ostream
& operator<<(ostream
& out
, const spg_t
&pg
)
450 char buf
[spg_t::calc_name_buf_size
];
451 buf
[spg_t::calc_name_buf_size
- 1] = '\0';
452 out
<< pg
.calc_name(buf
+ spg_t::calc_name_buf_size
- 1, "");
456 pg_t
pg_t::get_ancestor(unsigned old_pg_num
) const
458 int old_bits
= cbits(old_pg_num
);
459 int old_mask
= (1 << old_bits
) - 1;
461 ret
.m_seed
= ceph_stable_mod(m_seed
, old_pg_num
, old_mask
);
465 bool pg_t::is_split(unsigned old_pg_num
, unsigned new_pg_num
, set
<pg_t
> *children
) const
467 assert(m_seed
< old_pg_num
);
468 if (new_pg_num
<= old_pg_num
)
473 unsigned old_bits
= cbits(old_pg_num
);
474 unsigned old_mask
= (1 << old_bits
) - 1;
475 for (unsigned n
= 1; ; n
++) {
476 unsigned next_bit
= (n
<< (old_bits
-1));
477 unsigned s
= next_bit
| m_seed
;
479 if (s
< old_pg_num
|| s
== m_seed
)
483 if ((unsigned)ceph_stable_mod(s
, old_pg_num
, old_mask
) == m_seed
) {
486 children
->insert(pg_t(s
, m_pool
, m_preferred
));
492 int old_bits
= cbits(old_pg_num
);
493 int old_mask
= (1 << old_bits
) - 1;
494 for (unsigned x
= old_pg_num
; x
< new_pg_num
; ++x
) {
495 unsigned o
= ceph_stable_mod(x
, old_pg_num
, old_mask
);
498 children
->insert(pg_t(x
, m_pool
, m_preferred
));
505 unsigned pg_t::get_split_bits(unsigned pg_num
) const {
510 // Find unique p such that pg_num \in [2^(p-1), 2^p)
511 unsigned p
= cbits(pg_num
);
512 assert(p
); // silence coverity #751330
514 if ((m_seed
% (1<<(p
-1))) < (pg_num
% (1<<(p
-1))))
520 pg_t
pg_t::get_parent() const
522 unsigned bits
= cbits(m_seed
);
525 retval
.m_seed
&= ~((~0)<<(bits
- 1));
529 hobject_t
pg_t::get_hobj_start() const
531 return hobject_t(object_t(), string(), CEPH_NOSNAP
, m_seed
, m_pool
,
535 hobject_t
pg_t::get_hobj_end(unsigned pg_num
) const
537 // note: this assumes a bitwise sort; with the legacy nibblewise
538 // sort a PG did not always cover a single contiguous range of the
539 // (bit-reversed) hash range.
540 unsigned bits
= get_split_bits(pg_num
);
541 uint64_t rev_start
= hobject_t::_reverse_bits(m_seed
);
542 uint64_t rev_end
= (rev_start
| (0xffffffff >> bits
)) + 1;
543 if (rev_end
>= 0x100000000) {
544 assert(rev_end
== 0x100000000);
545 return hobject_t::get_max();
547 return hobject_t(object_t(), string(), CEPH_NOSNAP
,
548 hobject_t::_reverse_bits(rev_end
), m_pool
,
553 void pg_t::dump(Formatter
*f
) const
555 f
->dump_unsigned("pool", m_pool
);
556 f
->dump_unsigned("seed", m_seed
);
557 f
->dump_int("preferred_osd", m_preferred
);
560 void pg_t::generate_test_instances(list
<pg_t
*>& o
)
562 o
.push_back(new pg_t
);
563 o
.push_back(new pg_t(1, 2, -1));
564 o
.push_back(new pg_t(13123, 3, -1));
565 o
.push_back(new pg_t(131223, 4, 23));
568 char *pg_t::calc_name(char *buf
, const char *suffix_backwords
) const
570 while (*suffix_backwords
)
571 *--buf
= *suffix_backwords
++;
573 if (m_preferred
>= 0)
576 buf
= ritoa
<uint32_t, 16>(m_seed
, buf
);
580 return ritoa
<uint64_t, 10>(m_pool
, buf
);
583 ostream
& operator<<(ostream
& out
, const pg_t
&pg
)
585 char buf
[pg_t::calc_name_buf_size
];
586 buf
[pg_t::calc_name_buf_size
- 1] = '\0';
587 out
<< pg
.calc_name(buf
+ pg_t::calc_name_buf_size
- 1, "");
594 void coll_t::calc_str()
598 strcpy(_str_buff
, "meta");
602 _str_buff
[spg_t::calc_name_buf_size
- 1] = '\0';
603 _str
= pgid
.calc_name(_str_buff
+ spg_t::calc_name_buf_size
- 1, "daeh_");
606 _str_buff
[spg_t::calc_name_buf_size
- 1] = '\0';
607 _str
= pgid
.calc_name(_str_buff
+ spg_t::calc_name_buf_size
- 1, "PMET_");
610 assert(0 == "unknown collection type");
614 bool coll_t::parse(const std::string
& s
)
624 if (s
.find("_head") == s
.length() - 5 &&
625 pgid
.parse(s
.substr(0, s
.length() - 5))) {
632 if (s
.find("_TEMP") == s
.length() - 5 &&
633 pgid
.parse(s
.substr(0, s
.length() - 5))) {
643 void coll_t::encode(bufferlist
& bl
) const
645 // when changing this, remember to update encoded_size() too.
647 // can't express this as v2...
649 ::encode(struct_v
, bl
);
650 ::encode(to_str(), bl
);
653 ::encode(struct_v
, bl
);
654 ::encode((__u8
)type
, bl
);
656 snapid_t snap
= CEPH_NOSNAP
;
661 size_t coll_t::encoded_size() const
663 size_t r
= sizeof(__u8
);
676 r
+= sizeof(ceph_le32
) + 2 * sizeof(__u8
);
678 r
+= sizeof(__u8
) + sizeof(uint64_t) + 2 * sizeof(uint32_t);
682 r
+= sizeof(uint64_t);
688 void coll_t::decode(bufferlist::iterator
& bl
)
691 ::decode(struct_v
, bl
);
700 if (pgid
== spg_t() && snap
== 0) {
716 type
= (type_t
)_type
;
725 bool ok
= parse(str
);
727 throw std::domain_error(std::string("unable to parse pg ") + str
);
734 oss
<< "coll_t::decode(): don't know how to decode version "
736 throw std::domain_error(oss
.str());
741 void coll_t::dump(Formatter
*f
) const
743 f
->dump_unsigned("type_id", (unsigned)type
);
744 if (type
!= TYPE_META
)
745 f
->dump_stream("pgid") << pgid
;
746 f
->dump_string("name", to_str());
749 void coll_t::generate_test_instances(list
<coll_t
*>& o
)
751 o
.push_back(new coll_t());
752 o
.push_back(new coll_t(spg_t(pg_t(1, 0), shard_id_t::NO_SHARD
)));
753 o
.push_back(new coll_t(o
.back()->get_temp()));
754 o
.push_back(new coll_t(spg_t(pg_t(3, 2), shard_id_t(12))));
755 o
.push_back(new coll_t(o
.back()->get_temp()));
756 o
.push_back(new coll_t());
761 std::string
pg_vector_string(const vector
<int32_t> &a
)
765 for (vector
<int32_t>::const_iterator i
= a
.begin(); i
!= a
.end(); ++i
) {
768 if (*i
!= CRUSH_ITEM_NONE
)
777 std::string
pg_state_string(int state
)
780 if (state
& PG_STATE_STALE
)
782 if (state
& PG_STATE_CREATING
)
784 if (state
& PG_STATE_ACTIVE
)
786 if (state
& PG_STATE_ACTIVATING
)
787 oss
<< "activating+";
788 if (state
& PG_STATE_CLEAN
)
790 if (state
& PG_STATE_RECOVERY_WAIT
)
791 oss
<< "recovery_wait+";
792 if (state
& PG_STATE_RECOVERY_TOOFULL
)
793 oss
<< "recovery_toofull+";
794 if (state
& PG_STATE_RECOVERING
)
795 oss
<< "recovering+";
796 if (state
& PG_STATE_DOWN
)
798 if (state
& PG_STATE_UNDERSIZED
)
799 oss
<< "undersized+";
800 if (state
& PG_STATE_DEGRADED
)
802 if (state
& PG_STATE_REMAPPED
)
804 if (state
& PG_STATE_SCRUBBING
)
806 if (state
& PG_STATE_DEEP_SCRUB
)
808 if (state
& PG_STATE_INCONSISTENT
)
809 oss
<< "inconsistent+";
810 if (state
& PG_STATE_PEERING
)
812 if (state
& PG_STATE_REPAIR
)
814 if ((state
& PG_STATE_BACKFILL_WAIT
) &&
815 !(state
&PG_STATE_BACKFILL
))
816 oss
<< "backfill_wait+";
817 if (state
& PG_STATE_BACKFILL
)
818 oss
<< "backfilling+";
819 if (state
& PG_STATE_BACKFILL_TOOFULL
)
820 oss
<< "backfill_toofull+";
821 if (state
& PG_STATE_INCOMPLETE
)
822 oss
<< "incomplete+";
823 if (state
& PG_STATE_PEERED
)
825 if (state
& PG_STATE_SNAPTRIM
)
827 if (state
& PG_STATE_SNAPTRIM_WAIT
)
828 oss
<< "snaptrim_wait+";
829 string
ret(oss
.str());
830 if (ret
.length() > 0)
831 ret
.resize(ret
.length() - 1);
837 int pg_string_state(const std::string
& state
)
840 if (state
== "active")
841 type
= PG_STATE_ACTIVE
;
842 else if (state
== "clean")
843 type
= PG_STATE_CLEAN
;
844 else if (state
== "down")
845 type
= PG_STATE_DOWN
;
846 else if (state
== "scrubbing")
847 type
= PG_STATE_SCRUBBING
;
848 else if (state
== "degraded")
849 type
= PG_STATE_DEGRADED
;
850 else if (state
== "inconsistent")
851 type
= PG_STATE_INCONSISTENT
;
852 else if (state
== "peering")
853 type
= PG_STATE_PEERING
;
854 else if (state
== "repair")
855 type
= PG_STATE_REPAIR
;
856 else if (state
== "recovering")
857 type
= PG_STATE_RECOVERING
;
858 else if (state
== "backfill_wait")
859 type
= PG_STATE_BACKFILL_WAIT
;
860 else if (state
== "incomplete")
861 type
= PG_STATE_INCOMPLETE
;
862 else if (state
== "stale")
863 type
= PG_STATE_STALE
;
864 else if (state
== "remapped")
865 type
= PG_STATE_REMAPPED
;
866 else if (state
== "deep_scrub")
867 type
= PG_STATE_DEEP_SCRUB
;
868 else if (state
== "backfill")
869 type
= PG_STATE_BACKFILL
;
870 else if (state
== "backfill_toofull")
871 type
= PG_STATE_BACKFILL_TOOFULL
;
872 else if (state
== "recovery_wait")
873 type
= PG_STATE_RECOVERY_WAIT
;
874 else if (state
== "recovery_toofull")
875 type
= PG_STATE_RECOVERY_TOOFULL
;
876 else if (state
== "undersized")
877 type
= PG_STATE_UNDERSIZED
;
878 else if (state
== "activating")
879 type
= PG_STATE_ACTIVATING
;
880 else if (state
== "peered")
881 type
= PG_STATE_PEERED
;
882 else if (state
== "snaptrim")
883 type
= PG_STATE_SNAPTRIM
;
884 else if (state
== "snaptrim_wait")
885 type
= PG_STATE_SNAPTRIM_WAIT
;
892 string
eversion_t::get_key_name() const
895 // Below is equivalent of sprintf("%010u.%020llu");
897 ritoa
<uint64_t, 10, 20>(version
, key
+ 31);
899 ritoa
<uint32_t, 10, 10>(epoch
, key
+ 10);
904 // -- pool_snap_info_t --
905 void pool_snap_info_t::dump(Formatter
*f
) const
907 f
->dump_unsigned("snapid", snapid
);
908 f
->dump_stream("stamp") << stamp
;
909 f
->dump_string("name", name
);
912 void pool_snap_info_t::encode(bufferlist
& bl
, uint64_t features
) const
914 if ((features
& CEPH_FEATURE_PGPOOL3
) == 0) {
916 ::encode(struct_v
, bl
);
917 ::encode(snapid
, bl
);
922 ENCODE_START(2, 2, bl
);
923 ::encode(snapid
, bl
);
929 void pool_snap_info_t::decode(bufferlist::iterator
& bl
)
931 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl
);
932 ::decode(snapid
, bl
);
938 void pool_snap_info_t::generate_test_instances(list
<pool_snap_info_t
*>& o
)
940 o
.push_back(new pool_snap_info_t
);
941 o
.push_back(new pool_snap_info_t
);
942 o
.back()->snapid
= 1;
943 o
.back()->stamp
= utime_t(1, 2);
944 o
.back()->name
= "foo";
949 typedef std::map
<std::string
, pool_opts_t::opt_desc_t
> opt_mapping_t
;
950 static opt_mapping_t opt_mapping
= boost::assign::map_list_of
951 ("scrub_min_interval", pool_opts_t::opt_desc_t(
952 pool_opts_t::SCRUB_MIN_INTERVAL
, pool_opts_t::DOUBLE
))
953 ("scrub_max_interval", pool_opts_t::opt_desc_t(
954 pool_opts_t::SCRUB_MAX_INTERVAL
, pool_opts_t::DOUBLE
))
955 ("deep_scrub_interval", pool_opts_t::opt_desc_t(
956 pool_opts_t::DEEP_SCRUB_INTERVAL
, pool_opts_t::DOUBLE
))
957 ("recovery_priority", pool_opts_t::opt_desc_t(
958 pool_opts_t::RECOVERY_PRIORITY
, pool_opts_t::INT
))
959 ("recovery_op_priority", pool_opts_t::opt_desc_t(
960 pool_opts_t::RECOVERY_OP_PRIORITY
, pool_opts_t::INT
))
961 ("scrub_priority", pool_opts_t::opt_desc_t(
962 pool_opts_t::SCRUB_PRIORITY
, pool_opts_t::INT
))
963 ("compression_mode", pool_opts_t::opt_desc_t(
964 pool_opts_t::COMPRESSION_MODE
, pool_opts_t::STR
))
965 ("compression_algorithm", pool_opts_t::opt_desc_t(
966 pool_opts_t::COMPRESSION_ALGORITHM
, pool_opts_t::STR
))
967 ("compression_required_ratio", pool_opts_t::opt_desc_t(
968 pool_opts_t::COMPRESSION_REQUIRED_RATIO
, pool_opts_t::DOUBLE
))
969 ("compression_max_blob_size", pool_opts_t::opt_desc_t(
970 pool_opts_t::COMPRESSION_MAX_BLOB_SIZE
, pool_opts_t::INT
))
971 ("compression_min_blob_size", pool_opts_t::opt_desc_t(
972 pool_opts_t::COMPRESSION_MIN_BLOB_SIZE
, pool_opts_t::INT
))
973 ("csum_type", pool_opts_t::opt_desc_t(
974 pool_opts_t::CSUM_TYPE
, pool_opts_t::INT
))
975 ("csum_max_block", pool_opts_t::opt_desc_t(
976 pool_opts_t::CSUM_MAX_BLOCK
, pool_opts_t::INT
))
977 ("csum_min_block", pool_opts_t::opt_desc_t(
978 pool_opts_t::CSUM_MIN_BLOCK
, pool_opts_t::INT
));
980 bool pool_opts_t::is_opt_name(const std::string
& name
) {
981 return opt_mapping
.find(name
) != opt_mapping
.end();
984 pool_opts_t::opt_desc_t
pool_opts_t::get_opt_desc(const std::string
& name
) {
985 opt_mapping_t::iterator i
= opt_mapping
.find(name
);
986 assert(i
!= opt_mapping
.end());
990 bool pool_opts_t::is_set(pool_opts_t::key_t key
) const {
991 return opts
.find(key
) != opts
.end();
994 const pool_opts_t::value_t
& pool_opts_t::get(pool_opts_t::key_t key
) const {
995 opts_t::const_iterator i
= opts
.find(key
);
996 assert(i
!= opts
.end());
1000 bool pool_opts_t::unset(pool_opts_t::key_t key
) {
1001 return opts
.erase(key
) > 0;
1004 class pool_opts_dumper_t
: public boost::static_visitor
<>
1007 pool_opts_dumper_t(const std::string
& name_
, Formatter
* f_
) :
1008 name(name_
.c_str()), f(f_
) {}
1010 void operator()(std::string s
) const {
1011 f
->dump_string(name
, s
);
1013 void operator()(int i
) const {
1014 f
->dump_int(name
, i
);
1016 void operator()(double d
) const {
1017 f
->dump_float(name
, d
);
1025 void pool_opts_t::dump(const std::string
& name
, Formatter
* f
) const
1027 const opt_desc_t
& desc
= get_opt_desc(name
);
1028 opts_t::const_iterator i
= opts
.find(desc
.key
);
1029 if (i
== opts
.end()) {
1032 boost::apply_visitor(pool_opts_dumper_t(name
, f
), i
->second
);
1035 void pool_opts_t::dump(Formatter
* f
) const
1037 for (opt_mapping_t::iterator i
= opt_mapping
.begin(); i
!= opt_mapping
.end();
1039 const std::string
& name
= i
->first
;
1040 const opt_desc_t
& desc
= i
->second
;
1041 opts_t::const_iterator j
= opts
.find(desc
.key
);
1042 if (j
== opts
.end()) {
1045 boost::apply_visitor(pool_opts_dumper_t(name
, f
), j
->second
);
1049 class pool_opts_encoder_t
: public boost::static_visitor
<>
1052 explicit pool_opts_encoder_t(bufferlist
& bl_
) : bl(bl_
) {}
1054 void operator()(std::string s
) const {
1055 ::encode(static_cast<int32_t>(pool_opts_t::STR
), bl
);
1058 void operator()(int i
) const {
1059 ::encode(static_cast<int32_t>(pool_opts_t::INT
), bl
);
1062 void operator()(double d
) const {
1063 ::encode(static_cast<int32_t>(pool_opts_t::DOUBLE
), bl
);
1071 void pool_opts_t::encode(bufferlist
& bl
) const {
1072 ENCODE_START(1, 1, bl
);
1073 uint32_t n
= static_cast<uint32_t>(opts
.size());
1075 for (opts_t::const_iterator i
= opts
.begin(); i
!= opts
.end(); ++i
) {
1076 ::encode(static_cast<int32_t>(i
->first
), bl
);
1077 boost::apply_visitor(pool_opts_encoder_t(bl
), i
->second
);
1082 void pool_opts_t::decode(bufferlist::iterator
& bl
) {
1083 DECODE_START(1, bl
);
1094 opts
[static_cast<key_t
>(k
)] = s
;
1095 } else if (t
== INT
) {
1098 opts
[static_cast<key_t
>(k
)] = i
;
1099 } else if (t
== DOUBLE
) {
1102 opts
[static_cast<key_t
>(k
)] = d
;
1104 assert(!"invalid type");
1110 ostream
& operator<<(ostream
& out
, const pool_opts_t
& opts
)
1112 for (opt_mapping_t::iterator i
= opt_mapping
.begin(); i
!= opt_mapping
.end();
1114 const std::string
& name
= i
->first
;
1115 const pool_opts_t::opt_desc_t
& desc
= i
->second
;
1116 pool_opts_t::opts_t::const_iterator j
= opts
.opts
.find(desc
.key
);
1117 if (j
== opts
.opts
.end()) {
1120 out
<< " " << name
<< " " << j
->second
;
1127 void pg_pool_t::dump(Formatter
*f
) const
1129 f
->dump_unsigned("flags", get_flags());
1130 f
->dump_string("flags_names", get_flags_string());
1131 f
->dump_int("type", get_type());
1132 f
->dump_int("size", get_size());
1133 f
->dump_int("min_size", get_min_size());
1134 f
->dump_int("crush_ruleset", get_crush_ruleset());
1135 f
->dump_int("object_hash", get_object_hash());
1136 f
->dump_unsigned("pg_num", get_pg_num());
1137 f
->dump_unsigned("pg_placement_num", get_pgp_num());
1138 f
->dump_unsigned("crash_replay_interval", get_crash_replay_interval());
1139 f
->dump_stream("last_change") << get_last_change();
1140 f
->dump_stream("last_force_op_resend") << get_last_force_op_resend();
1141 f
->dump_stream("last_force_op_resend_preluminous")
1142 << get_last_force_op_resend_preluminous();
1143 f
->dump_unsigned("auid", get_auid());
1144 f
->dump_string("snap_mode", is_pool_snaps_mode() ? "pool" : "selfmanaged");
1145 f
->dump_unsigned("snap_seq", get_snap_seq());
1146 f
->dump_unsigned("snap_epoch", get_snap_epoch());
1147 f
->open_array_section("pool_snaps");
1148 for (map
<snapid_t
, pool_snap_info_t
>::const_iterator p
= snaps
.begin(); p
!= snaps
.end(); ++p
) {
1149 f
->open_object_section("pool_snap_info");
1154 f
->dump_stream("removed_snaps") << removed_snaps
;
1155 f
->dump_unsigned("quota_max_bytes", quota_max_bytes
);
1156 f
->dump_unsigned("quota_max_objects", quota_max_objects
);
1157 f
->open_array_section("tiers");
1158 for (set
<uint64_t>::const_iterator p
= tiers
.begin(); p
!= tiers
.end(); ++p
)
1159 f
->dump_unsigned("pool_id", *p
);
1161 f
->dump_int("tier_of", tier_of
);
1162 f
->dump_int("read_tier", read_tier
);
1163 f
->dump_int("write_tier", write_tier
);
1164 f
->dump_string("cache_mode", get_cache_mode_name());
1165 f
->dump_unsigned("target_max_bytes", target_max_bytes
);
1166 f
->dump_unsigned("target_max_objects", target_max_objects
);
1167 f
->dump_unsigned("cache_target_dirty_ratio_micro",
1168 cache_target_dirty_ratio_micro
);
1169 f
->dump_unsigned("cache_target_dirty_high_ratio_micro",
1170 cache_target_dirty_high_ratio_micro
);
1171 f
->dump_unsigned("cache_target_full_ratio_micro",
1172 cache_target_full_ratio_micro
);
1173 f
->dump_unsigned("cache_min_flush_age", cache_min_flush_age
);
1174 f
->dump_unsigned("cache_min_evict_age", cache_min_evict_age
);
1175 f
->dump_string("erasure_code_profile", erasure_code_profile
);
1176 f
->open_object_section("hit_set_params");
1177 hit_set_params
.dump(f
);
1178 f
->close_section(); // hit_set_params
1179 f
->dump_unsigned("hit_set_period", hit_set_period
);
1180 f
->dump_unsigned("hit_set_count", hit_set_count
);
1181 f
->dump_bool("use_gmt_hitset", use_gmt_hitset
);
1182 f
->dump_unsigned("min_read_recency_for_promote", min_read_recency_for_promote
);
1183 f
->dump_unsigned("min_write_recency_for_promote", min_write_recency_for_promote
);
1184 f
->dump_unsigned("hit_set_grade_decay_rate", hit_set_grade_decay_rate
);
1185 f
->dump_unsigned("hit_set_search_last_n", hit_set_search_last_n
);
1186 f
->open_array_section("grade_table");
1187 for (unsigned i
= 0; i
< hit_set_count
; ++i
)
1188 f
->dump_unsigned("value", get_grade(i
));
1190 f
->dump_unsigned("stripe_width", get_stripe_width());
1191 f
->dump_unsigned("expected_num_objects", expected_num_objects
);
1192 f
->dump_bool("fast_read", fast_read
);
1193 f
->open_object_section("options");
1195 f
->close_section(); // options
1198 void pg_pool_t::convert_to_pg_shards(const vector
<int> &from
, set
<pg_shard_t
>* to
) const {
1199 for (size_t i
= 0; i
< from
.size(); ++i
) {
1200 if (from
[i
] != CRUSH_ITEM_NONE
) {
1204 ec_pool() ? shard_id_t(i
) : shard_id_t::NO_SHARD
));
1209 void pg_pool_t::calc_pg_masks()
1211 pg_num_mask
= (1 << cbits(pg_num
-1)) - 1;
1212 pgp_num_mask
= (1 << cbits(pgp_num
-1)) - 1;
1215 unsigned pg_pool_t::get_pg_num_divisor(pg_t pgid
) const
1217 if (pg_num
== pg_num_mask
+ 1)
1218 return pg_num
; // power-of-2 split
1219 unsigned mask
= pg_num_mask
>> 1;
1220 if ((pgid
.ps() & mask
) < (pg_num
& mask
))
1221 return pg_num_mask
+ 1; // smaller bin size (already split)
1223 return (pg_num_mask
+ 1) >> 1; // bigger bin (not yet split)
1227 * we have two snap modes:
1228 * - pool global snaps
1229 * - snap existence/non-existence defined by snaps[] and snap_seq
1230 * - user managed snaps
1231 * - removal governed by removed_snaps
1233 * we know which mode we're using based on whether removed_snaps is empty.
1235 bool pg_pool_t::is_pool_snaps_mode() const
1237 return removed_snaps
.empty() && get_snap_seq() > 0;
1240 bool pg_pool_t::is_unmanaged_snaps_mode() const
1242 return removed_snaps
.size() && get_snap_seq() > 0;
1245 bool pg_pool_t::is_removed_snap(snapid_t s
) const
1247 if (is_pool_snaps_mode())
1248 return s
<= get_snap_seq() && snaps
.count(s
) == 0;
1250 return removed_snaps
.contains(s
);
1254 * build set of known-removed sets from either pool snaps or
1255 * explicit removed_snaps set.
1257 void pg_pool_t::build_removed_snaps(interval_set
<snapid_t
>& rs
) const
1259 if (is_pool_snaps_mode()) {
1261 for (snapid_t s
= 1; s
<= get_snap_seq(); s
= s
+ 1)
1262 if (snaps
.count(s
) == 0)
1269 snapid_t
pg_pool_t::snap_exists(const char *s
) const
1271 for (map
<snapid_t
,pool_snap_info_t
>::const_iterator p
= snaps
.begin();
1274 if (p
->second
.name
== s
)
1275 return p
->second
.snapid
;
1279 void pg_pool_t::add_snap(const char *n
, utime_t stamp
)
1281 assert(!is_unmanaged_snaps_mode());
1282 snapid_t s
= get_snap_seq() + 1;
1284 snaps
[s
].snapid
= s
;
1286 snaps
[s
].stamp
= stamp
;
1289 void pg_pool_t::add_unmanaged_snap(uint64_t& snapid
)
1291 if (removed_snaps
.empty()) {
1292 assert(!is_pool_snaps_mode());
1293 removed_snaps
.insert(snapid_t(1));
1296 snapid
= snap_seq
= snap_seq
+ 1;
1299 void pg_pool_t::remove_snap(snapid_t s
)
1301 assert(snaps
.count(s
));
1303 snap_seq
= snap_seq
+ 1;
1306 void pg_pool_t::remove_unmanaged_snap(snapid_t s
)
1308 assert(is_unmanaged_snaps_mode());
1309 removed_snaps
.insert(s
);
1310 snap_seq
= snap_seq
+ 1;
1311 removed_snaps
.insert(get_snap_seq());
1314 SnapContext
pg_pool_t::get_snap_context() const
1316 vector
<snapid_t
> s(snaps
.size());
1318 for (map
<snapid_t
, pool_snap_info_t
>::const_reverse_iterator p
= snaps
.rbegin();
1322 return SnapContext(get_snap_seq(), s
);
1325 uint32_t pg_pool_t::hash_key(const string
& key
, const string
& ns
) const
1328 return ceph_str_hash(object_hash
, key
.data(), key
.length());
1329 int nsl
= ns
.length();
1330 int len
= key
.length() + nsl
+ 1;
1332 memcpy(&buf
[0], ns
.data(), nsl
);
1334 memcpy(&buf
[nsl
+1], key
.data(), key
.length());
1335 return ceph_str_hash(object_hash
, &buf
[0], len
);
1338 uint32_t pg_pool_t::raw_hash_to_pg(uint32_t v
) const
1340 return ceph_stable_mod(v
, pg_num
, pg_num_mask
);
1344 * map a raw pg (with full precision ps) into an actual pg, for storage
1346 pg_t
pg_pool_t::raw_pg_to_pg(pg_t pg
) const
1348 pg
.set_ps(ceph_stable_mod(pg
.ps(), pg_num
, pg_num_mask
));
1353 * map raw pg (full precision ps) into a placement seed. include
1354 * pool id in that value so that different pools don't use the same
1357 ps_t
pg_pool_t::raw_pg_to_pps(pg_t pg
) const
1359 if (flags
& FLAG_HASHPSPOOL
) {
1360 // Hash the pool id so that pool PGs do not overlap.
1362 crush_hash32_2(CRUSH_HASH_RJENKINS1
,
1363 ceph_stable_mod(pg
.ps(), pgp_num
, pgp_num_mask
),
1366 // Legacy behavior; add ps and pool together. This is not a great
1367 // idea because the PGs from each pool will essentially overlap on
1368 // top of each other: 0.5 == 1.4 == 2.3 == ...
1370 ceph_stable_mod(pg
.ps(), pgp_num
, pgp_num_mask
) +
1375 uint32_t pg_pool_t::get_random_pg_position(pg_t pg
, uint32_t seed
) const
1377 uint32_t r
= crush_hash32_2(CRUSH_HASH_RJENKINS1
, seed
, 123);
1378 if (pg_num
== pg_num_mask
+ 1) {
1381 unsigned smaller_mask
= pg_num_mask
>> 1;
1382 if ((pg
.ps() & smaller_mask
) < (pg_num
& smaller_mask
)) {
1392 void pg_pool_t::encode(bufferlist
& bl
, uint64_t features
) const
1394 if ((features
& CEPH_FEATURE_PGPOOL3
) == 0) {
1395 // this encoding matches the old struct ceph_pg_pool
1397 ::encode(struct_v
, bl
);
1400 ::encode(crush_ruleset
, bl
);
1401 ::encode(object_hash
, bl
);
1402 ::encode(pg_num
, bl
);
1403 ::encode(pgp_num
, bl
);
1404 __u32 lpg_num
= 0, lpgp_num
= 0; // tell old code that there are no localized pgs.
1405 ::encode(lpg_num
, bl
);
1406 ::encode(lpgp_num
, bl
);
1407 ::encode(last_change
, bl
);
1408 ::encode(snap_seq
, bl
);
1409 ::encode(snap_epoch
, bl
);
1411 __u32 n
= snaps
.size();
1413 n
= removed_snaps
.num_intervals();
1418 ::encode_nohead(snaps
, bl
, features
);
1419 ::encode_nohead(removed_snaps
, bl
);
1423 if ((features
& CEPH_FEATURE_OSDENC
) == 0) {
1425 ::encode(struct_v
, bl
);
1428 ::encode(crush_ruleset
, bl
);
1429 ::encode(object_hash
, bl
);
1430 ::encode(pg_num
, bl
);
1431 ::encode(pgp_num
, bl
);
1432 __u32 lpg_num
= 0, lpgp_num
= 0; // tell old code that there are no localized pgs.
1433 ::encode(lpg_num
, bl
);
1434 ::encode(lpgp_num
, bl
);
1435 ::encode(last_change
, bl
);
1436 ::encode(snap_seq
, bl
);
1437 ::encode(snap_epoch
, bl
);
1438 ::encode(snaps
, bl
, features
);
1439 ::encode(removed_snaps
, bl
);
1441 ::encode(flags
, bl
);
1442 ::encode(crash_replay_interval
, bl
);
1446 if ((features
& CEPH_FEATURE_OSD_POOLRESEND
) == 0) {
1447 // we simply added last_force_op_resend here, which is a fully
1448 // backward compatible change. however, encoding the same map
1449 // differently between monitors triggers scrub noise (even though
1450 // they are decodable without the feature), so let's be pendantic
1452 ENCODE_START(14, 5, bl
);
1455 ::encode(crush_ruleset
, bl
);
1456 ::encode(object_hash
, bl
);
1457 ::encode(pg_num
, bl
);
1458 ::encode(pgp_num
, bl
);
1459 __u32 lpg_num
= 0, lpgp_num
= 0; // tell old code that there are no localized pgs.
1460 ::encode(lpg_num
, bl
);
1461 ::encode(lpgp_num
, bl
);
1462 ::encode(last_change
, bl
);
1463 ::encode(snap_seq
, bl
);
1464 ::encode(snap_epoch
, bl
);
1465 ::encode(snaps
, bl
, features
);
1466 ::encode(removed_snaps
, bl
);
1468 ::encode(flags
, bl
);
1469 ::encode(crash_replay_interval
, bl
);
1470 ::encode(min_size
, bl
);
1471 ::encode(quota_max_bytes
, bl
);
1472 ::encode(quota_max_objects
, bl
);
1473 ::encode(tiers
, bl
);
1474 ::encode(tier_of
, bl
);
1475 __u8 c
= cache_mode
;
1477 ::encode(read_tier
, bl
);
1478 ::encode(write_tier
, bl
);
1479 ::encode(properties
, bl
);
1480 ::encode(hit_set_params
, bl
);
1481 ::encode(hit_set_period
, bl
);
1482 ::encode(hit_set_count
, bl
);
1483 ::encode(stripe_width
, bl
);
1484 ::encode(target_max_bytes
, bl
);
1485 ::encode(target_max_objects
, bl
);
1486 ::encode(cache_target_dirty_ratio_micro
, bl
);
1487 ::encode(cache_target_full_ratio_micro
, bl
);
1488 ::encode(cache_min_flush_age
, bl
);
1489 ::encode(cache_min_evict_age
, bl
);
1490 ::encode(erasure_code_profile
, bl
);
1496 if (!(features
& CEPH_FEATURE_NEW_OSDOP_ENCODING
)) {
1497 // this was the first post-hammer thing we added; if it's missing, encode
1502 (CEPH_FEATURE_RESEND_ON_SPLIT
|CEPH_FEATURE_SERVER_JEWEL
)) !=
1503 (CEPH_FEATURE_RESEND_ON_SPLIT
|CEPH_FEATURE_SERVER_JEWEL
)) {
1507 ENCODE_START(v
, 5, bl
);
1510 ::encode(crush_ruleset
, bl
);
1511 ::encode(object_hash
, bl
);
1512 ::encode(pg_num
, bl
);
1513 ::encode(pgp_num
, bl
);
1514 __u32 lpg_num
= 0, lpgp_num
= 0; // tell old code that there are no localized pgs.
1515 ::encode(lpg_num
, bl
);
1516 ::encode(lpgp_num
, bl
);
1517 ::encode(last_change
, bl
);
1518 ::encode(snap_seq
, bl
);
1519 ::encode(snap_epoch
, bl
);
1520 ::encode(snaps
, bl
, features
);
1521 ::encode(removed_snaps
, bl
);
1523 ::encode(flags
, bl
);
1524 ::encode(crash_replay_interval
, bl
);
1525 ::encode(min_size
, bl
);
1526 ::encode(quota_max_bytes
, bl
);
1527 ::encode(quota_max_objects
, bl
);
1528 ::encode(tiers
, bl
);
1529 ::encode(tier_of
, bl
);
1530 __u8 c
= cache_mode
;
1532 ::encode(read_tier
, bl
);
1533 ::encode(write_tier
, bl
);
1534 ::encode(properties
, bl
);
1535 ::encode(hit_set_params
, bl
);
1536 ::encode(hit_set_period
, bl
);
1537 ::encode(hit_set_count
, bl
);
1538 ::encode(stripe_width
, bl
);
1539 ::encode(target_max_bytes
, bl
);
1540 ::encode(target_max_objects
, bl
);
1541 ::encode(cache_target_dirty_ratio_micro
, bl
);
1542 ::encode(cache_target_full_ratio_micro
, bl
);
1543 ::encode(cache_min_flush_age
, bl
);
1544 ::encode(cache_min_evict_age
, bl
);
1545 ::encode(erasure_code_profile
, bl
);
1546 ::encode(last_force_op_resend_preluminous
, bl
);
1547 ::encode(min_read_recency_for_promote
, bl
);
1548 ::encode(expected_num_objects
, bl
);
1550 ::encode(cache_target_dirty_high_ratio_micro
, bl
);
1553 ::encode(min_write_recency_for_promote
, bl
);
1556 ::encode(use_gmt_hitset
, bl
);
1559 ::encode(fast_read
, bl
);
1562 ::encode(hit_set_grade_decay_rate
, bl
);
1563 ::encode(hit_set_search_last_n
, bl
);
1569 ::encode(last_force_op_resend
, bl
);
1574 void pg_pool_t::decode(bufferlist::iterator
& bl
)
1576 DECODE_START_LEGACY_COMPAT_LEN(24, 5, 5, bl
);
1579 ::decode(crush_ruleset
, bl
);
1580 ::decode(object_hash
, bl
);
1581 ::decode(pg_num
, bl
);
1582 ::decode(pgp_num
, bl
);
1584 __u32 lpg_num
, lpgp_num
;
1585 ::decode(lpg_num
, bl
);
1586 ::decode(lpgp_num
, bl
);
1588 ::decode(last_change
, bl
);
1589 ::decode(snap_seq
, bl
);
1590 ::decode(snap_epoch
, bl
);
1592 if (struct_v
>= 3) {
1593 ::decode(snaps
, bl
);
1594 ::decode(removed_snaps
, bl
);
1601 ::decode_nohead(n
, snaps
, bl
);
1602 ::decode_nohead(m
, removed_snaps
, bl
);
1605 if (struct_v
>= 4) {
1606 ::decode(flags
, bl
);
1607 ::decode(crash_replay_interval
, bl
);
1611 // if this looks like the 'data' pool, set the
1612 // crash_replay_interval appropriately. unfortunately, we can't
1613 // be precise here. this should be good enough to preserve replay
1614 // on the data pool for the majority of cluster upgrades, though.
1615 if (crush_ruleset
== 0 && auid
== 0)
1616 crash_replay_interval
= 60;
1618 crash_replay_interval
= 0;
1620 if (struct_v
>= 7) {
1621 ::decode(min_size
, bl
);
1623 min_size
= size
- size
/2;
1625 if (struct_v
>= 8) {
1626 ::decode(quota_max_bytes
, bl
);
1627 ::decode(quota_max_objects
, bl
);
1629 if (struct_v
>= 9) {
1630 ::decode(tiers
, bl
);
1631 ::decode(tier_of
, bl
);
1634 cache_mode
= (cache_mode_t
)v
;
1635 ::decode(read_tier
, bl
);
1636 ::decode(write_tier
, bl
);
1638 if (struct_v
>= 10) {
1639 ::decode(properties
, bl
);
1641 if (struct_v
>= 11) {
1642 ::decode(hit_set_params
, bl
);
1643 ::decode(hit_set_period
, bl
);
1644 ::decode(hit_set_count
, bl
);
1647 hit_set_period
= def
.hit_set_period
;
1648 hit_set_count
= def
.hit_set_count
;
1650 if (struct_v
>= 12) {
1651 ::decode(stripe_width
, bl
);
1653 set_stripe_width(0);
1655 if (struct_v
>= 13) {
1656 ::decode(target_max_bytes
, bl
);
1657 ::decode(target_max_objects
, bl
);
1658 ::decode(cache_target_dirty_ratio_micro
, bl
);
1659 ::decode(cache_target_full_ratio_micro
, bl
);
1660 ::decode(cache_min_flush_age
, bl
);
1661 ::decode(cache_min_evict_age
, bl
);
1663 target_max_bytes
= 0;
1664 target_max_objects
= 0;
1665 cache_target_dirty_ratio_micro
= 0;
1666 cache_target_full_ratio_micro
= 0;
1667 cache_min_flush_age
= 0;
1668 cache_min_evict_age
= 0;
1670 if (struct_v
>= 14) {
1671 ::decode(erasure_code_profile
, bl
);
1673 if (struct_v
>= 15) {
1674 ::decode(last_force_op_resend_preluminous
, bl
);
1676 last_force_op_resend_preluminous
= 0;
1678 if (struct_v
>= 16) {
1679 ::decode(min_read_recency_for_promote
, bl
);
1681 min_read_recency_for_promote
= 1;
1683 if (struct_v
>= 17) {
1684 ::decode(expected_num_objects
, bl
);
1686 expected_num_objects
= 0;
1688 if (struct_v
>= 19) {
1689 ::decode(cache_target_dirty_high_ratio_micro
, bl
);
1691 cache_target_dirty_high_ratio_micro
= cache_target_dirty_ratio_micro
;
1693 if (struct_v
>= 20) {
1694 ::decode(min_write_recency_for_promote
, bl
);
1696 min_write_recency_for_promote
= 1;
1698 if (struct_v
>= 21) {
1699 ::decode(use_gmt_hitset
, bl
);
1701 use_gmt_hitset
= false;
1703 if (struct_v
>= 22) {
1704 ::decode(fast_read
, bl
);
1708 if (struct_v
>= 23) {
1709 ::decode(hit_set_grade_decay_rate
, bl
);
1710 ::decode(hit_set_search_last_n
, bl
);
1712 hit_set_grade_decay_rate
= 0;
1713 hit_set_search_last_n
= 1;
1715 if (struct_v
>= 24) {
1718 if (struct_v
>= 25) {
1719 ::decode(last_force_op_resend
, bl
);
1721 last_force_op_resend
= last_force_op_resend_preluminous
;
1728 void pg_pool_t::generate_test_instances(list
<pg_pool_t
*>& o
)
1731 o
.push_back(new pg_pool_t(a
));
1733 a
.type
= TYPE_REPLICATED
;
1735 a
.crush_ruleset
= 3;
1740 a
.last_force_op_resend
= 123823;
1741 a
.last_force_op_resend_preluminous
= 123824;
1745 a
.crash_replay_interval
= 13;
1746 a
.quota_max_bytes
= 473;
1747 a
.quota_max_objects
= 474;
1748 o
.push_back(new pg_pool_t(a
));
1750 a
.snaps
[3].name
= "asdf";
1751 a
.snaps
[3].snapid
= 3;
1752 a
.snaps
[3].stamp
= utime_t(123, 4);
1753 a
.snaps
[6].name
= "qwer";
1754 a
.snaps
[6].snapid
= 6;
1755 a
.snaps
[6].stamp
= utime_t(23423, 4);
1756 o
.push_back(new pg_pool_t(a
));
1758 a
.removed_snaps
.insert(2); // not quite valid to combine with snaps!
1759 a
.quota_max_bytes
= 2473;
1760 a
.quota_max_objects
= 4374;
1764 a
.cache_mode
= CACHEMODE_WRITEBACK
;
1767 a
.hit_set_params
= HitSet::Params(new BloomHitSet::Params
);
1768 a
.hit_set_period
= 3600;
1769 a
.hit_set_count
= 8;
1770 a
.min_read_recency_for_promote
= 1;
1771 a
.min_write_recency_for_promote
= 1;
1772 a
.hit_set_grade_decay_rate
= 50;
1773 a
.hit_set_search_last_n
= 1;
1774 a
.calc_grade_table();
1775 a
.set_stripe_width(12345);
1776 a
.target_max_bytes
= 1238132132;
1777 a
.target_max_objects
= 1232132;
1778 a
.cache_target_dirty_ratio_micro
= 187232;
1779 a
.cache_target_dirty_high_ratio_micro
= 309856;
1780 a
.cache_target_full_ratio_micro
= 987222;
1781 a
.cache_min_flush_age
= 231;
1782 a
.cache_min_evict_age
= 2321;
1783 a
.erasure_code_profile
= "profile in osdmap";
1784 a
.expected_num_objects
= 123456;
1785 a
.fast_read
= false;
1786 o
.push_back(new pg_pool_t(a
));
1789 ostream
& operator<<(ostream
& out
, const pg_pool_t
& p
)
1791 out
<< p
.get_type_name()
1792 << " size " << p
.get_size()
1793 << " min_size " << p
.get_min_size()
1794 << " crush_ruleset " << p
.get_crush_ruleset()
1795 << " object_hash " << p
.get_object_hash_name()
1796 << " pg_num " << p
.get_pg_num()
1797 << " pgp_num " << p
.get_pgp_num()
1798 << " last_change " << p
.get_last_change();
1799 if (p
.get_last_force_op_resend() ||
1800 p
.get_last_force_op_resend_preluminous())
1801 out
<< " lfor " << p
.get_last_force_op_resend() << "/"
1802 << p
.get_last_force_op_resend_preluminous();
1804 out
<< " owner " << p
.get_auid();
1806 out
<< " flags " << p
.get_flags_string();
1807 if (p
.crash_replay_interval
)
1808 out
<< " crash_replay_interval " << p
.crash_replay_interval
;
1809 if (p
.quota_max_bytes
)
1810 out
<< " max_bytes " << p
.quota_max_bytes
;
1811 if (p
.quota_max_objects
)
1812 out
<< " max_objects " << p
.quota_max_objects
;
1813 if (!p
.tiers
.empty())
1814 out
<< " tiers " << p
.tiers
;
1816 out
<< " tier_of " << p
.tier_of
;
1817 if (p
.has_read_tier())
1818 out
<< " read_tier " << p
.read_tier
;
1819 if (p
.has_write_tier())
1820 out
<< " write_tier " << p
.write_tier
;
1822 out
<< " cache_mode " << p
.get_cache_mode_name();
1823 if (p
.target_max_bytes
)
1824 out
<< " target_bytes " << p
.target_max_bytes
;
1825 if (p
.target_max_objects
)
1826 out
<< " target_objects " << p
.target_max_objects
;
1827 if (p
.hit_set_params
.get_type() != HitSet::TYPE_NONE
) {
1828 out
<< " hit_set " << p
.hit_set_params
1829 << " " << p
.hit_set_period
<< "s"
1830 << " x" << p
.hit_set_count
<< " decay_rate "
1831 << p
.hit_set_grade_decay_rate
1832 << " search_last_n " << p
.hit_set_search_last_n
;
1834 if (p
.min_read_recency_for_promote
)
1835 out
<< " min_read_recency_for_promote " << p
.min_read_recency_for_promote
;
1836 if (p
.min_write_recency_for_promote
)
1837 out
<< " min_write_recency_for_promote " << p
.min_write_recency_for_promote
;
1838 out
<< " stripe_width " << p
.get_stripe_width();
1839 if (p
.expected_num_objects
)
1840 out
<< " expected_num_objects " << p
.expected_num_objects
;
1842 out
<< " fast_read " << p
.fast_read
;
1848 // -- object_stat_sum_t --
1850 void object_stat_sum_t::dump(Formatter
*f
) const
1852 f
->dump_int("num_bytes", num_bytes
);
1853 f
->dump_int("num_objects", num_objects
);
1854 f
->dump_int("num_object_clones", num_object_clones
);
1855 f
->dump_int("num_object_copies", num_object_copies
);
1856 f
->dump_int("num_objects_missing_on_primary", num_objects_missing_on_primary
);
1857 f
->dump_int("num_objects_missing", num_objects_missing
);
1858 f
->dump_int("num_objects_degraded", num_objects_degraded
);
1859 f
->dump_int("num_objects_misplaced", num_objects_misplaced
);
1860 f
->dump_int("num_objects_unfound", num_objects_unfound
);
1861 f
->dump_int("num_objects_dirty", num_objects_dirty
);
1862 f
->dump_int("num_whiteouts", num_whiteouts
);
1863 f
->dump_int("num_read", num_rd
);
1864 f
->dump_int("num_read_kb", num_rd_kb
);
1865 f
->dump_int("num_write", num_wr
);
1866 f
->dump_int("num_write_kb", num_wr_kb
);
1867 f
->dump_int("num_scrub_errors", num_scrub_errors
);
1868 f
->dump_int("num_shallow_scrub_errors", num_shallow_scrub_errors
);
1869 f
->dump_int("num_deep_scrub_errors", num_deep_scrub_errors
);
1870 f
->dump_int("num_objects_recovered", num_objects_recovered
);
1871 f
->dump_int("num_bytes_recovered", num_bytes_recovered
);
1872 f
->dump_int("num_keys_recovered", num_keys_recovered
);
1873 f
->dump_int("num_objects_omap", num_objects_omap
);
1874 f
->dump_int("num_objects_hit_set_archive", num_objects_hit_set_archive
);
1875 f
->dump_int("num_bytes_hit_set_archive", num_bytes_hit_set_archive
);
1876 f
->dump_int("num_flush", num_flush
);
1877 f
->dump_int("num_flush_kb", num_flush_kb
);
1878 f
->dump_int("num_evict", num_evict
);
1879 f
->dump_int("num_evict_kb", num_evict_kb
);
1880 f
->dump_int("num_promote", num_promote
);
1881 f
->dump_int("num_flush_mode_high", num_flush_mode_high
);
1882 f
->dump_int("num_flush_mode_low", num_flush_mode_low
);
1883 f
->dump_int("num_evict_mode_some", num_evict_mode_some
);
1884 f
->dump_int("num_evict_mode_full", num_evict_mode_full
);
1885 f
->dump_int("num_objects_pinned", num_objects_pinned
);
1886 f
->dump_int("num_legacy_snapsets", num_legacy_snapsets
);
1889 void object_stat_sum_t::encode(bufferlist
& bl
) const
1891 ENCODE_START(16, 14, bl
);
1892 #if defined(CEPH_LITTLE_ENDIAN)
1893 bl
.append((char *)(&num_bytes
), sizeof(object_stat_sum_t
));
1895 ::encode(num_bytes
, bl
);
1896 ::encode(num_objects
, bl
);
1897 ::encode(num_object_clones
, bl
);
1898 ::encode(num_object_copies
, bl
);
1899 ::encode(num_objects_missing_on_primary
, bl
);
1900 ::encode(num_objects_degraded
, bl
);
1901 ::encode(num_objects_unfound
, bl
);
1902 ::encode(num_rd
, bl
);
1903 ::encode(num_rd_kb
, bl
);
1904 ::encode(num_wr
, bl
);
1905 ::encode(num_wr_kb
, bl
);
1906 ::encode(num_scrub_errors
, bl
);
1907 ::encode(num_objects_recovered
, bl
);
1908 ::encode(num_bytes_recovered
, bl
);
1909 ::encode(num_keys_recovered
, bl
);
1910 ::encode(num_shallow_scrub_errors
, bl
);
1911 ::encode(num_deep_scrub_errors
, bl
);
1912 ::encode(num_objects_dirty
, bl
);
1913 ::encode(num_whiteouts
, bl
);
1914 ::encode(num_objects_omap
, bl
);
1915 ::encode(num_objects_hit_set_archive
, bl
);
1916 ::encode(num_objects_misplaced
, bl
);
1917 ::encode(num_bytes_hit_set_archive
, bl
);
1918 ::encode(num_flush
, bl
);
1919 ::encode(num_flush_kb
, bl
);
1920 ::encode(num_evict
, bl
);
1921 ::encode(num_evict_kb
, bl
);
1922 ::encode(num_promote
, bl
);
1923 ::encode(num_flush_mode_high
, bl
);
1924 ::encode(num_flush_mode_low
, bl
);
1925 ::encode(num_evict_mode_some
, bl
);
1926 ::encode(num_evict_mode_full
, bl
);
1927 ::encode(num_objects_pinned
, bl
);
1928 ::encode(num_objects_missing
, bl
);
1929 ::encode(num_legacy_snapsets
, bl
);
1934 void object_stat_sum_t::decode(bufferlist::iterator
& bl
)
1936 bool decode_finish
= false;
1937 DECODE_START(16, bl
);
1938 #if defined(CEPH_LITTLE_ENDIAN)
1939 if (struct_v
>= 16) {
1940 bl
.copy(sizeof(object_stat_sum_t
), (char*)(&num_bytes
));
1941 decode_finish
= true;
1944 if (!decode_finish
) {
1945 ::decode(num_bytes
, bl
);
1946 ::decode(num_objects
, bl
);
1947 ::decode(num_object_clones
, bl
);
1948 ::decode(num_object_copies
, bl
);
1949 ::decode(num_objects_missing_on_primary
, bl
);
1950 ::decode(num_objects_degraded
, bl
);
1951 ::decode(num_objects_unfound
, bl
);
1952 ::decode(num_rd
, bl
);
1953 ::decode(num_rd_kb
, bl
);
1954 ::decode(num_wr
, bl
);
1955 ::decode(num_wr_kb
, bl
);
1956 ::decode(num_scrub_errors
, bl
);
1957 ::decode(num_objects_recovered
, bl
);
1958 ::decode(num_bytes_recovered
, bl
);
1959 ::decode(num_keys_recovered
, bl
);
1960 ::decode(num_shallow_scrub_errors
, bl
);
1961 ::decode(num_deep_scrub_errors
, bl
);
1962 ::decode(num_objects_dirty
, bl
);
1963 ::decode(num_whiteouts
, bl
);
1964 ::decode(num_objects_omap
, bl
);
1965 ::decode(num_objects_hit_set_archive
, bl
);
1966 ::decode(num_objects_misplaced
, bl
);
1967 ::decode(num_bytes_hit_set_archive
, bl
);
1968 ::decode(num_flush
, bl
);
1969 ::decode(num_flush_kb
, bl
);
1970 ::decode(num_evict
, bl
);
1971 ::decode(num_evict_kb
, bl
);
1972 ::decode(num_promote
, bl
);
1973 ::decode(num_flush_mode_high
, bl
);
1974 ::decode(num_flush_mode_low
, bl
);
1975 ::decode(num_evict_mode_some
, bl
);
1976 ::decode(num_evict_mode_full
, bl
);
1977 ::decode(num_objects_pinned
, bl
);
1978 ::decode(num_objects_missing
, bl
);
1979 if (struct_v
>= 16) {
1980 ::decode(num_legacy_snapsets
, bl
);
1982 num_legacy_snapsets
= num_object_clones
; // upper bound
1988 void object_stat_sum_t::generate_test_instances(list
<object_stat_sum_t
*>& o
)
1990 object_stat_sum_t a
;
1994 a
.num_object_clones
= 4;
1995 a
.num_object_copies
= 5;
1996 a
.num_objects_missing_on_primary
= 6;
1997 a
.num_objects_missing
= 123;
1998 a
.num_objects_degraded
= 7;
1999 a
.num_objects_unfound
= 8;
2000 a
.num_rd
= 9; a
.num_rd_kb
= 10;
2001 a
.num_wr
= 11; a
.num_wr_kb
= 12;
2002 a
.num_objects_recovered
= 14;
2003 a
.num_bytes_recovered
= 15;
2004 a
.num_keys_recovered
= 16;
2005 a
.num_deep_scrub_errors
= 17;
2006 a
.num_shallow_scrub_errors
= 18;
2007 a
.num_scrub_errors
= a
.num_deep_scrub_errors
+ a
.num_shallow_scrub_errors
;
2008 a
.num_objects_dirty
= 21;
2009 a
.num_whiteouts
= 22;
2010 a
.num_objects_misplaced
= 1232;
2011 a
.num_objects_hit_set_archive
= 2;
2012 a
.num_bytes_hit_set_archive
= 27;
2018 a
.num_flush_mode_high
= 0;
2019 a
.num_flush_mode_low
= 1;
2020 a
.num_evict_mode_some
= 1;
2021 a
.num_evict_mode_full
= 0;
2022 a
.num_objects_pinned
= 20;
2023 o
.push_back(new object_stat_sum_t(a
));
2026 void object_stat_sum_t::add(const object_stat_sum_t
& o
)
2028 num_bytes
+= o
.num_bytes
;
2029 num_objects
+= o
.num_objects
;
2030 num_object_clones
+= o
.num_object_clones
;
2031 num_object_copies
+= o
.num_object_copies
;
2032 num_objects_missing_on_primary
+= o
.num_objects_missing_on_primary
;
2033 num_objects_missing
+= o
.num_objects_missing
;
2034 num_objects_degraded
+= o
.num_objects_degraded
;
2035 num_objects_misplaced
+= o
.num_objects_misplaced
;
2037 num_rd_kb
+= o
.num_rd_kb
;
2039 num_wr_kb
+= o
.num_wr_kb
;
2040 num_objects_unfound
+= o
.num_objects_unfound
;
2041 num_scrub_errors
+= o
.num_scrub_errors
;
2042 num_shallow_scrub_errors
+= o
.num_shallow_scrub_errors
;
2043 num_deep_scrub_errors
+= o
.num_deep_scrub_errors
;
2044 num_objects_recovered
+= o
.num_objects_recovered
;
2045 num_bytes_recovered
+= o
.num_bytes_recovered
;
2046 num_keys_recovered
+= o
.num_keys_recovered
;
2047 num_objects_dirty
+= o
.num_objects_dirty
;
2048 num_whiteouts
+= o
.num_whiteouts
;
2049 num_objects_omap
+= o
.num_objects_omap
;
2050 num_objects_hit_set_archive
+= o
.num_objects_hit_set_archive
;
2051 num_bytes_hit_set_archive
+= o
.num_bytes_hit_set_archive
;
2052 num_flush
+= o
.num_flush
;
2053 num_flush_kb
+= o
.num_flush_kb
;
2054 num_evict
+= o
.num_evict
;
2055 num_evict_kb
+= o
.num_evict_kb
;
2056 num_promote
+= o
.num_promote
;
2057 num_flush_mode_high
+= o
.num_flush_mode_high
;
2058 num_flush_mode_low
+= o
.num_flush_mode_low
;
2059 num_evict_mode_some
+= o
.num_evict_mode_some
;
2060 num_evict_mode_full
+= o
.num_evict_mode_full
;
2061 num_objects_pinned
+= o
.num_objects_pinned
;
2062 num_legacy_snapsets
+= o
.num_legacy_snapsets
;
2065 void object_stat_sum_t::sub(const object_stat_sum_t
& o
)
2067 num_bytes
-= o
.num_bytes
;
2068 num_objects
-= o
.num_objects
;
2069 num_object_clones
-= o
.num_object_clones
;
2070 num_object_copies
-= o
.num_object_copies
;
2071 num_objects_missing_on_primary
-= o
.num_objects_missing_on_primary
;
2072 num_objects_missing
-= o
.num_objects_missing
;
2073 num_objects_degraded
-= o
.num_objects_degraded
;
2074 num_objects_misplaced
-= o
.num_objects_misplaced
;
2076 num_rd_kb
-= o
.num_rd_kb
;
2078 num_wr_kb
-= o
.num_wr_kb
;
2079 num_objects_unfound
-= o
.num_objects_unfound
;
2080 num_scrub_errors
-= o
.num_scrub_errors
;
2081 num_shallow_scrub_errors
-= o
.num_shallow_scrub_errors
;
2082 num_deep_scrub_errors
-= o
.num_deep_scrub_errors
;
2083 num_objects_recovered
-= o
.num_objects_recovered
;
2084 num_bytes_recovered
-= o
.num_bytes_recovered
;
2085 num_keys_recovered
-= o
.num_keys_recovered
;
2086 num_objects_dirty
-= o
.num_objects_dirty
;
2087 num_whiteouts
-= o
.num_whiteouts
;
2088 num_objects_omap
-= o
.num_objects_omap
;
2089 num_objects_hit_set_archive
-= o
.num_objects_hit_set_archive
;
2090 num_bytes_hit_set_archive
-= o
.num_bytes_hit_set_archive
;
2091 num_flush
-= o
.num_flush
;
2092 num_flush_kb
-= o
.num_flush_kb
;
2093 num_evict
-= o
.num_evict
;
2094 num_evict_kb
-= o
.num_evict_kb
;
2095 num_promote
-= o
.num_promote
;
2096 num_flush_mode_high
-= o
.num_flush_mode_high
;
2097 num_flush_mode_low
-= o
.num_flush_mode_low
;
2098 num_evict_mode_some
-= o
.num_evict_mode_some
;
2099 num_evict_mode_full
-= o
.num_evict_mode_full
;
2100 num_objects_pinned
-= o
.num_objects_pinned
;
2101 num_legacy_snapsets
-= o
.num_legacy_snapsets
;
2104 bool operator==(const object_stat_sum_t
& l
, const object_stat_sum_t
& r
)
2107 l
.num_bytes
== r
.num_bytes
&&
2108 l
.num_objects
== r
.num_objects
&&
2109 l
.num_object_clones
== r
.num_object_clones
&&
2110 l
.num_object_copies
== r
.num_object_copies
&&
2111 l
.num_objects_missing_on_primary
== r
.num_objects_missing_on_primary
&&
2112 l
.num_objects_missing
== r
.num_objects_missing
&&
2113 l
.num_objects_degraded
== r
.num_objects_degraded
&&
2114 l
.num_objects_misplaced
== r
.num_objects_misplaced
&&
2115 l
.num_objects_unfound
== r
.num_objects_unfound
&&
2116 l
.num_rd
== r
.num_rd
&&
2117 l
.num_rd_kb
== r
.num_rd_kb
&&
2118 l
.num_wr
== r
.num_wr
&&
2119 l
.num_wr_kb
== r
.num_wr_kb
&&
2120 l
.num_scrub_errors
== r
.num_scrub_errors
&&
2121 l
.num_shallow_scrub_errors
== r
.num_shallow_scrub_errors
&&
2122 l
.num_deep_scrub_errors
== r
.num_deep_scrub_errors
&&
2123 l
.num_objects_recovered
== r
.num_objects_recovered
&&
2124 l
.num_bytes_recovered
== r
.num_bytes_recovered
&&
2125 l
.num_keys_recovered
== r
.num_keys_recovered
&&
2126 l
.num_objects_dirty
== r
.num_objects_dirty
&&
2127 l
.num_whiteouts
== r
.num_whiteouts
&&
2128 l
.num_objects_omap
== r
.num_objects_omap
&&
2129 l
.num_objects_hit_set_archive
== r
.num_objects_hit_set_archive
&&
2130 l
.num_bytes_hit_set_archive
== r
.num_bytes_hit_set_archive
&&
2131 l
.num_flush
== r
.num_flush
&&
2132 l
.num_flush_kb
== r
.num_flush_kb
&&
2133 l
.num_evict
== r
.num_evict
&&
2134 l
.num_evict_kb
== r
.num_evict_kb
&&
2135 l
.num_promote
== r
.num_promote
&&
2136 l
.num_flush_mode_high
== r
.num_flush_mode_high
&&
2137 l
.num_flush_mode_low
== r
.num_flush_mode_low
&&
2138 l
.num_evict_mode_some
== r
.num_evict_mode_some
&&
2139 l
.num_evict_mode_full
== r
.num_evict_mode_full
&&
2140 l
.num_objects_pinned
== r
.num_objects_pinned
&&
2141 l
.num_legacy_snapsets
== r
.num_legacy_snapsets
;
2144 // -- object_stat_collection_t --
2146 void object_stat_collection_t::dump(Formatter
*f
) const
2148 f
->open_object_section("stat_sum");
2153 void object_stat_collection_t::encode(bufferlist
& bl
) const
2155 ENCODE_START(2, 2, bl
);
2157 ::encode((__u32
)0, bl
);
2161 void object_stat_collection_t::decode(bufferlist::iterator
& bl
)
2163 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl
);
2166 map
<string
,object_stat_sum_t
> cat_sum
;
2167 ::decode(cat_sum
, bl
);
2172 void object_stat_collection_t::generate_test_instances(list
<object_stat_collection_t
*>& o
)
2174 object_stat_collection_t a
;
2175 o
.push_back(new object_stat_collection_t(a
));
2176 list
<object_stat_sum_t
*> l
;
2177 object_stat_sum_t::generate_test_instances(l
);
2178 for (list
<object_stat_sum_t
*>::iterator p
= l
.begin(); p
!= l
.end(); ++p
) {
2180 o
.push_back(new object_stat_collection_t(a
));
2187 bool pg_stat_t::is_acting_osd(int32_t osd
, bool primary
) const
2189 if (primary
&& osd
== acting_primary
) {
2191 } else if (!primary
) {
2192 for(vector
<int32_t>::const_iterator it
= acting
.begin();
2193 it
!= acting
.end(); ++it
)
2202 void pg_stat_t::dump(Formatter
*f
) const
2204 f
->dump_stream("version") << version
;
2205 f
->dump_stream("reported_seq") << reported_seq
;
2206 f
->dump_stream("reported_epoch") << reported_epoch
;
2207 f
->dump_string("state", pg_state_string(state
));
2208 f
->dump_stream("last_fresh") << last_fresh
;
2209 f
->dump_stream("last_change") << last_change
;
2210 f
->dump_stream("last_active") << last_active
;
2211 f
->dump_stream("last_peered") << last_peered
;
2212 f
->dump_stream("last_clean") << last_clean
;
2213 f
->dump_stream("last_became_active") << last_became_active
;
2214 f
->dump_stream("last_became_peered") << last_became_peered
;
2215 f
->dump_stream("last_unstale") << last_unstale
;
2216 f
->dump_stream("last_undegraded") << last_undegraded
;
2217 f
->dump_stream("last_fullsized") << last_fullsized
;
2218 f
->dump_unsigned("mapping_epoch", mapping_epoch
);
2219 f
->dump_stream("log_start") << log_start
;
2220 f
->dump_stream("ondisk_log_start") << ondisk_log_start
;
2221 f
->dump_unsigned("created", created
);
2222 f
->dump_unsigned("last_epoch_clean", last_epoch_clean
);
2223 f
->dump_stream("parent") << parent
;
2224 f
->dump_unsigned("parent_split_bits", parent_split_bits
);
2225 f
->dump_stream("last_scrub") << last_scrub
;
2226 f
->dump_stream("last_scrub_stamp") << last_scrub_stamp
;
2227 f
->dump_stream("last_deep_scrub") << last_deep_scrub
;
2228 f
->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp
;
2229 f
->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp
;
2230 f
->dump_int("log_size", log_size
);
2231 f
->dump_int("ondisk_log_size", ondisk_log_size
);
2232 f
->dump_bool("stats_invalid", stats_invalid
);
2233 f
->dump_bool("dirty_stats_invalid", dirty_stats_invalid
);
2234 f
->dump_bool("omap_stats_invalid", omap_stats_invalid
);
2235 f
->dump_bool("hitset_stats_invalid", hitset_stats_invalid
);
2236 f
->dump_bool("hitset_bytes_stats_invalid", hitset_bytes_stats_invalid
);
2237 f
->dump_bool("pin_stats_invalid", pin_stats_invalid
);
2239 f
->open_array_section("up");
2240 for (vector
<int32_t>::const_iterator p
= up
.begin(); p
!= up
.end(); ++p
)
2241 f
->dump_int("osd", *p
);
2243 f
->open_array_section("acting");
2244 for (vector
<int32_t>::const_iterator p
= acting
.begin(); p
!= acting
.end(); ++p
)
2245 f
->dump_int("osd", *p
);
2247 f
->open_array_section("blocked_by");
2248 for (vector
<int32_t>::const_iterator p
= blocked_by
.begin();
2249 p
!= blocked_by
.end(); ++p
)
2250 f
->dump_int("osd", *p
);
2252 f
->dump_int("up_primary", up_primary
);
2253 f
->dump_int("acting_primary", acting_primary
);
2256 void pg_stat_t::dump_brief(Formatter
*f
) const
2258 f
->dump_string("state", pg_state_string(state
));
2259 f
->open_array_section("up");
2260 for (vector
<int32_t>::const_iterator p
= up
.begin(); p
!= up
.end(); ++p
)
2261 f
->dump_int("osd", *p
);
2263 f
->open_array_section("acting");
2264 for (vector
<int32_t>::const_iterator p
= acting
.begin(); p
!= acting
.end(); ++p
)
2265 f
->dump_int("osd", *p
);
2267 f
->dump_int("up_primary", up_primary
);
2268 f
->dump_int("acting_primary", acting_primary
);
2271 void pg_stat_t::encode(bufferlist
&bl
) const
2273 ENCODE_START(22, 22, bl
);
2274 ::encode(version
, bl
);
2275 ::encode(reported_seq
, bl
);
2276 ::encode(reported_epoch
, bl
);
2277 ::encode(state
, bl
);
2278 ::encode(log_start
, bl
);
2279 ::encode(ondisk_log_start
, bl
);
2280 ::encode(created
, bl
);
2281 ::encode(last_epoch_clean
, bl
);
2282 ::encode(parent
, bl
);
2283 ::encode(parent_split_bits
, bl
);
2284 ::encode(last_scrub
, bl
);
2285 ::encode(last_scrub_stamp
, bl
);
2286 ::encode(stats
, bl
);
2287 ::encode(log_size
, bl
);
2288 ::encode(ondisk_log_size
, bl
);
2290 ::encode(acting
, bl
);
2291 ::encode(last_fresh
, bl
);
2292 ::encode(last_change
, bl
);
2293 ::encode(last_active
, bl
);
2294 ::encode(last_clean
, bl
);
2295 ::encode(last_unstale
, bl
);
2296 ::encode(mapping_epoch
, bl
);
2297 ::encode(last_deep_scrub
, bl
);
2298 ::encode(last_deep_scrub_stamp
, bl
);
2299 ::encode(stats_invalid
, bl
);
2300 ::encode(last_clean_scrub_stamp
, bl
);
2301 ::encode(last_became_active
, bl
);
2302 ::encode(dirty_stats_invalid
, bl
);
2303 ::encode(up_primary
, bl
);
2304 ::encode(acting_primary
, bl
);
2305 ::encode(omap_stats_invalid
, bl
);
2306 ::encode(hitset_stats_invalid
, bl
);
2307 ::encode(blocked_by
, bl
);
2308 ::encode(last_undegraded
, bl
);
2309 ::encode(last_fullsized
, bl
);
2310 ::encode(hitset_bytes_stats_invalid
, bl
);
2311 ::encode(last_peered
, bl
);
2312 ::encode(last_became_peered
, bl
);
2313 ::encode(pin_stats_invalid
, bl
);
2317 void pg_stat_t::decode(bufferlist::iterator
&bl
)
2320 DECODE_START(22, bl
);
2321 ::decode(version
, bl
);
2322 ::decode(reported_seq
, bl
);
2323 ::decode(reported_epoch
, bl
);
2324 ::decode(state
, bl
);
2325 ::decode(log_start
, bl
);
2326 ::decode(ondisk_log_start
, bl
);
2327 ::decode(created
, bl
);
2328 ::decode(last_epoch_clean
, bl
);
2329 ::decode(parent
, bl
);
2330 ::decode(parent_split_bits
, bl
);
2331 ::decode(last_scrub
, bl
);
2332 ::decode(last_scrub_stamp
, bl
);
2333 ::decode(stats
, bl
);
2334 ::decode(log_size
, bl
);
2335 ::decode(ondisk_log_size
, bl
);
2337 ::decode(acting
, bl
);
2338 ::decode(last_fresh
, bl
);
2339 ::decode(last_change
, bl
);
2340 ::decode(last_active
, bl
);
2341 ::decode(last_clean
, bl
);
2342 ::decode(last_unstale
, bl
);
2343 ::decode(mapping_epoch
, bl
);
2344 ::decode(last_deep_scrub
, bl
);
2345 ::decode(last_deep_scrub_stamp
, bl
);
2347 stats_invalid
= tmp
;
2348 ::decode(last_clean_scrub_stamp
, bl
);
2349 ::decode(last_became_active
, bl
);
2351 dirty_stats_invalid
= tmp
;
2352 ::decode(up_primary
, bl
);
2353 ::decode(acting_primary
, bl
);
2355 omap_stats_invalid
= tmp
;
2357 hitset_stats_invalid
= tmp
;
2358 ::decode(blocked_by
, bl
);
2359 ::decode(last_undegraded
, bl
);
2360 ::decode(last_fullsized
, bl
);
2362 hitset_bytes_stats_invalid
= tmp
;
2363 ::decode(last_peered
, bl
);
2364 ::decode(last_became_peered
, bl
);
2366 pin_stats_invalid
= tmp
;
2370 void pg_stat_t::generate_test_instances(list
<pg_stat_t
*>& o
)
2373 o
.push_back(new pg_stat_t(a
));
2375 a
.version
= eversion_t(1, 3);
2376 a
.reported_epoch
= 1;
2379 a
.mapping_epoch
= 998;
2380 a
.last_fresh
= utime_t(1002, 1);
2381 a
.last_change
= utime_t(1002, 2);
2382 a
.last_active
= utime_t(1002, 3);
2383 a
.last_clean
= utime_t(1002, 4);
2384 a
.last_unstale
= utime_t(1002, 5);
2385 a
.last_undegraded
= utime_t(1002, 7);
2386 a
.last_fullsized
= utime_t(1002, 8);
2387 a
.log_start
= eversion_t(1, 4);
2388 a
.ondisk_log_start
= eversion_t(1, 5);
2390 a
.last_epoch_clean
= 7;
2391 a
.parent
= pg_t(1, 2, 3);
2392 a
.parent_split_bits
= 12;
2393 a
.last_scrub
= eversion_t(9, 10);
2394 a
.last_scrub_stamp
= utime_t(11, 12);
2395 a
.last_deep_scrub
= eversion_t(13, 14);
2396 a
.last_deep_scrub_stamp
= utime_t(15, 16);
2397 a
.last_clean_scrub_stamp
= utime_t(17, 18);
2398 list
<object_stat_collection_t
*> l
;
2399 object_stat_collection_t::generate_test_instances(l
);
2400 a
.stats
= *l
.back();
2402 a
.ondisk_log_size
= 88;
2403 a
.up
.push_back(123);
2405 a
.acting
.push_back(456);
2406 a
.acting_primary
= 456;
2407 o
.push_back(new pg_stat_t(a
));
2409 a
.up
.push_back(124);
2411 a
.acting
.push_back(124);
2412 a
.acting_primary
= 124;
2413 a
.blocked_by
.push_back(155);
2414 a
.blocked_by
.push_back(156);
2415 o
.push_back(new pg_stat_t(a
));
2418 bool operator==(const pg_stat_t
& l
, const pg_stat_t
& r
)
2421 l
.version
== r
.version
&&
2422 l
.reported_seq
== r
.reported_seq
&&
2423 l
.reported_epoch
== r
.reported_epoch
&&
2424 l
.state
== r
.state
&&
2425 l
.last_fresh
== r
.last_fresh
&&
2426 l
.last_change
== r
.last_change
&&
2427 l
.last_active
== r
.last_active
&&
2428 l
.last_peered
== r
.last_peered
&&
2429 l
.last_clean
== r
.last_clean
&&
2430 l
.last_unstale
== r
.last_unstale
&&
2431 l
.last_undegraded
== r
.last_undegraded
&&
2432 l
.last_fullsized
== r
.last_fullsized
&&
2433 l
.log_start
== r
.log_start
&&
2434 l
.ondisk_log_start
== r
.ondisk_log_start
&&
2435 l
.created
== r
.created
&&
2436 l
.last_epoch_clean
== r
.last_epoch_clean
&&
2437 l
.parent
== r
.parent
&&
2438 l
.parent_split_bits
== r
.parent_split_bits
&&
2439 l
.last_scrub
== r
.last_scrub
&&
2440 l
.last_deep_scrub
== r
.last_deep_scrub
&&
2441 l
.last_scrub_stamp
== r
.last_scrub_stamp
&&
2442 l
.last_deep_scrub_stamp
== r
.last_deep_scrub_stamp
&&
2443 l
.last_clean_scrub_stamp
== r
.last_clean_scrub_stamp
&&
2444 l
.stats
== r
.stats
&&
2445 l
.stats_invalid
== r
.stats_invalid
&&
2446 l
.log_size
== r
.log_size
&&
2447 l
.ondisk_log_size
== r
.ondisk_log_size
&&
2449 l
.acting
== r
.acting
&&
2450 l
.mapping_epoch
== r
.mapping_epoch
&&
2451 l
.blocked_by
== r
.blocked_by
&&
2452 l
.last_became_active
== r
.last_became_active
&&
2453 l
.last_became_peered
== r
.last_became_peered
&&
2454 l
.dirty_stats_invalid
== r
.dirty_stats_invalid
&&
2455 l
.omap_stats_invalid
== r
.omap_stats_invalid
&&
2456 l
.hitset_stats_invalid
== r
.hitset_stats_invalid
&&
2457 l
.hitset_bytes_stats_invalid
== r
.hitset_bytes_stats_invalid
&&
2458 l
.up_primary
== r
.up_primary
&&
2459 l
.acting_primary
== r
.acting_primary
&&
2460 l
.pin_stats_invalid
== r
.pin_stats_invalid
;
2463 // -- pool_stat_t --
2465 void pool_stat_t::dump(Formatter
*f
) const
2468 f
->dump_int("log_size", log_size
);
2469 f
->dump_int("ondisk_log_size", ondisk_log_size
);
2470 f
->dump_int("up", up
);
2471 f
->dump_int("acting", acting
);
2474 void pool_stat_t::encode(bufferlist
&bl
, uint64_t features
) const
2476 if ((features
& CEPH_FEATURE_OSDENC
) == 0) {
2479 ::encode(stats
, bl
);
2480 ::encode(log_size
, bl
);
2481 ::encode(ondisk_log_size
, bl
);
2485 ENCODE_START(6, 5, bl
);
2486 ::encode(stats
, bl
);
2487 ::encode(log_size
, bl
);
2488 ::encode(ondisk_log_size
, bl
);
2490 ::encode(acting
, bl
);
2494 void pool_stat_t::decode(bufferlist::iterator
&bl
)
2496 DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl
);
2497 if (struct_v
>= 4) {
2498 ::decode(stats
, bl
);
2499 ::decode(log_size
, bl
);
2500 ::decode(ondisk_log_size
, bl
);
2501 if (struct_v
>= 6) {
2503 ::decode(acting
, bl
);
2509 ::decode(stats
.sum
.num_bytes
, bl
);
2511 ::decode(num_kb
, bl
);
2512 ::decode(stats
.sum
.num_objects
, bl
);
2513 ::decode(stats
.sum
.num_object_clones
, bl
);
2514 ::decode(stats
.sum
.num_object_copies
, bl
);
2515 ::decode(stats
.sum
.num_objects_missing_on_primary
, bl
);
2516 ::decode(stats
.sum
.num_objects_degraded
, bl
);
2517 ::decode(log_size
, bl
);
2518 ::decode(ondisk_log_size
, bl
);
2519 if (struct_v
>= 2) {
2520 ::decode(stats
.sum
.num_rd
, bl
);
2521 ::decode(stats
.sum
.num_rd_kb
, bl
);
2522 ::decode(stats
.sum
.num_wr
, bl
);
2523 ::decode(stats
.sum
.num_wr_kb
, bl
);
2525 if (struct_v
>= 3) {
2526 ::decode(stats
.sum
.num_objects_unfound
, bl
);
2532 void pool_stat_t::generate_test_instances(list
<pool_stat_t
*>& o
)
2535 o
.push_back(new pool_stat_t(a
));
2537 list
<object_stat_collection_t
*> l
;
2538 object_stat_collection_t::generate_test_instances(l
);
2539 a
.stats
= *l
.back();
2541 a
.ondisk_log_size
= 456;
2544 o
.push_back(new pool_stat_t(a
));
2548 // -- pg_history_t --
2550 void pg_history_t::encode(bufferlist
&bl
) const
2552 ENCODE_START(8, 4, bl
);
2553 ::encode(epoch_created
, bl
);
2554 ::encode(last_epoch_started
, bl
);
2555 ::encode(last_epoch_clean
, bl
);
2556 ::encode(last_epoch_split
, bl
);
2557 ::encode(same_interval_since
, bl
);
2558 ::encode(same_up_since
, bl
);
2559 ::encode(same_primary_since
, bl
);
2560 ::encode(last_scrub
, bl
);
2561 ::encode(last_scrub_stamp
, bl
);
2562 ::encode(last_deep_scrub
, bl
);
2563 ::encode(last_deep_scrub_stamp
, bl
);
2564 ::encode(last_clean_scrub_stamp
, bl
);
2565 ::encode(last_epoch_marked_full
, bl
);
2566 ::encode(last_interval_started
, bl
);
2567 ::encode(last_interval_clean
, bl
);
2571 void pg_history_t::decode(bufferlist::iterator
&bl
)
2573 DECODE_START_LEGACY_COMPAT_LEN(8, 4, 4, bl
);
2574 ::decode(epoch_created
, bl
);
2575 ::decode(last_epoch_started
, bl
);
2577 ::decode(last_epoch_clean
, bl
);
2579 last_epoch_clean
= last_epoch_started
; // careful, it's a lie!
2580 ::decode(last_epoch_split
, bl
);
2581 ::decode(same_interval_since
, bl
);
2582 ::decode(same_up_since
, bl
);
2583 ::decode(same_primary_since
, bl
);
2584 if (struct_v
>= 2) {
2585 ::decode(last_scrub
, bl
);
2586 ::decode(last_scrub_stamp
, bl
);
2588 if (struct_v
>= 5) {
2589 ::decode(last_deep_scrub
, bl
);
2590 ::decode(last_deep_scrub_stamp
, bl
);
2592 if (struct_v
>= 6) {
2593 ::decode(last_clean_scrub_stamp
, bl
);
2595 if (struct_v
>= 7) {
2596 ::decode(last_epoch_marked_full
, bl
);
2598 if (struct_v
>= 8) {
2599 ::decode(last_interval_started
, bl
);
2600 ::decode(last_interval_clean
, bl
);
2602 if (last_epoch_started
>= same_interval_since
) {
2603 last_interval_started
= same_interval_since
;
2605 last_interval_started
= last_epoch_started
; // best guess
2607 if (last_epoch_clean
>= same_interval_since
) {
2608 last_interval_clean
= same_interval_since
;
2610 last_interval_clean
= last_epoch_clean
; // best guess
2616 void pg_history_t::dump(Formatter
*f
) const
2618 f
->dump_int("epoch_created", epoch_created
);
2619 f
->dump_int("last_epoch_started", last_epoch_started
);
2620 f
->dump_int("last_interval_started", last_interval_started
);
2621 f
->dump_int("last_epoch_clean", last_epoch_clean
);
2622 f
->dump_int("last_interval_clean", last_interval_clean
);
2623 f
->dump_int("last_epoch_split", last_epoch_split
);
2624 f
->dump_int("last_epoch_marked_full", last_epoch_marked_full
);
2625 f
->dump_int("same_up_since", same_up_since
);
2626 f
->dump_int("same_interval_since", same_interval_since
);
2627 f
->dump_int("same_primary_since", same_primary_since
);
2628 f
->dump_stream("last_scrub") << last_scrub
;
2629 f
->dump_stream("last_scrub_stamp") << last_scrub_stamp
;
2630 f
->dump_stream("last_deep_scrub") << last_deep_scrub
;
2631 f
->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp
;
2632 f
->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp
;
2635 void pg_history_t::generate_test_instances(list
<pg_history_t
*>& o
)
2637 o
.push_back(new pg_history_t
);
2638 o
.push_back(new pg_history_t
);
2639 o
.back()->epoch_created
= 1;
2640 o
.back()->last_epoch_started
= 2;
2641 o
.back()->last_interval_started
= 2;
2642 o
.back()->last_epoch_clean
= 3;
2643 o
.back()->last_interval_clean
= 2;
2644 o
.back()->last_epoch_split
= 4;
2645 o
.back()->same_up_since
= 5;
2646 o
.back()->same_interval_since
= 6;
2647 o
.back()->same_primary_since
= 7;
2648 o
.back()->last_scrub
= eversion_t(8, 9);
2649 o
.back()->last_scrub_stamp
= utime_t(10, 11);
2650 o
.back()->last_deep_scrub
= eversion_t(12, 13);
2651 o
.back()->last_deep_scrub_stamp
= utime_t(14, 15);
2652 o
.back()->last_clean_scrub_stamp
= utime_t(16, 17);
2653 o
.back()->last_epoch_marked_full
= 18;
2659 void pg_info_t::encode(bufferlist
&bl
) const
2661 ENCODE_START(32, 26, bl
);
2662 ::encode(pgid
.pgid
, bl
);
2663 ::encode(last_update
, bl
);
2664 ::encode(last_complete
, bl
);
2665 ::encode(log_tail
, bl
);
2666 if (last_backfill_bitwise
&& !last_backfill
.is_max()) {
2667 ::encode(hobject_t(), bl
);
2669 ::encode(last_backfill
, bl
);
2671 ::encode(stats
, bl
);
2673 ::encode(purged_snaps
, bl
);
2674 ::encode(last_epoch_started
, bl
);
2675 ::encode(last_user_version
, bl
);
2676 ::encode(hit_set
, bl
);
2677 ::encode(pgid
.shard
, bl
);
2678 ::encode(last_backfill
, bl
);
2679 ::encode(last_backfill_bitwise
, bl
);
2680 ::encode(last_interval_started
, bl
);
2684 void pg_info_t::decode(bufferlist::iterator
&bl
)
2686 DECODE_START(32, bl
);
2687 ::decode(pgid
.pgid
, bl
);
2688 ::decode(last_update
, bl
);
2689 ::decode(last_complete
, bl
);
2690 ::decode(log_tail
, bl
);
2692 hobject_t old_last_backfill
;
2693 ::decode(old_last_backfill
, bl
);
2695 ::decode(stats
, bl
);
2697 ::decode(purged_snaps
, bl
);
2698 ::decode(last_epoch_started
, bl
);
2699 ::decode(last_user_version
, bl
);
2700 ::decode(hit_set
, bl
);
2701 ::decode(pgid
.shard
, bl
);
2702 ::decode(last_backfill
, bl
);
2703 ::decode(last_backfill_bitwise
, bl
);
2704 if (struct_v
>= 32) {
2705 ::decode(last_interval_started
, bl
);
2707 last_interval_started
= last_epoch_started
;
2714 void pg_info_t::dump(Formatter
*f
) const
2716 f
->dump_stream("pgid") << pgid
;
2717 f
->dump_stream("last_update") << last_update
;
2718 f
->dump_stream("last_complete") << last_complete
;
2719 f
->dump_stream("log_tail") << log_tail
;
2720 f
->dump_int("last_user_version", last_user_version
);
2721 f
->dump_stream("last_backfill") << last_backfill
;
2722 f
->dump_int("last_backfill_bitwise", (int)last_backfill_bitwise
);
2723 f
->open_array_section("purged_snaps");
2724 for (interval_set
<snapid_t
>::const_iterator i
=purged_snaps
.begin();
2725 i
!= purged_snaps
.end();
2727 f
->open_object_section("purged_snap_interval");
2728 f
->dump_stream("start") << i
.get_start();
2729 f
->dump_stream("length") << i
.get_len();
2733 f
->open_object_section("history");
2736 f
->open_object_section("stats");
2740 f
->dump_int("empty", is_empty());
2741 f
->dump_int("dne", dne());
2742 f
->dump_int("incomplete", is_incomplete());
2743 f
->dump_int("last_epoch_started", last_epoch_started
);
2745 f
->open_object_section("hit_set_history");
2750 void pg_info_t::generate_test_instances(list
<pg_info_t
*>& o
)
2752 o
.push_back(new pg_info_t
);
2753 o
.push_back(new pg_info_t
);
2754 list
<pg_history_t
*> h
;
2755 pg_history_t::generate_test_instances(h
);
2756 o
.back()->history
= *h
.back();
2757 o
.back()->pgid
= spg_t(pg_t(1, 2, -1), shard_id_t::NO_SHARD
);
2758 o
.back()->last_update
= eversion_t(3, 4);
2759 o
.back()->last_complete
= eversion_t(5, 6);
2760 o
.back()->last_user_version
= 2;
2761 o
.back()->log_tail
= eversion_t(7, 8);
2762 o
.back()->last_backfill
= hobject_t(object_t("objname"), "key", 123, 456, -1, "");
2763 o
.back()->last_backfill_bitwise
= true;
2766 pg_stat_t::generate_test_instances(s
);
2767 o
.back()->stats
= *s
.back();
2770 list
<pg_hit_set_history_t
*> s
;
2771 pg_hit_set_history_t::generate_test_instances(s
);
2772 o
.back()->hit_set
= *s
.back();
2776 // -- pg_notify_t --
2777 void pg_notify_t::encode(bufferlist
&bl
) const
2779 ENCODE_START(2, 2, bl
);
2780 ::encode(query_epoch
, bl
);
2781 ::encode(epoch_sent
, bl
);
2788 void pg_notify_t::decode(bufferlist::iterator
&bl
)
2790 DECODE_START(2, bl
);
2791 ::decode(query_epoch
, bl
);
2792 ::decode(epoch_sent
, bl
);
2799 void pg_notify_t::dump(Formatter
*f
) const
2801 f
->dump_int("from", from
);
2802 f
->dump_int("to", to
);
2803 f
->dump_unsigned("query_epoch", query_epoch
);
2804 f
->dump_unsigned("epoch_sent", epoch_sent
);
2806 f
->open_object_section("info");
2812 void pg_notify_t::generate_test_instances(list
<pg_notify_t
*>& o
)
2814 o
.push_back(new pg_notify_t(shard_id_t(3), shard_id_t::NO_SHARD
, 1, 1, pg_info_t()));
2815 o
.push_back(new pg_notify_t(shard_id_t(0), shard_id_t(0), 3, 10, pg_info_t()));
2818 ostream
&operator<<(ostream
&lhs
, const pg_notify_t
¬ify
)
2820 lhs
<< "(query:" << notify
.query_epoch
2821 << " sent:" << notify
.epoch_sent
2822 << " " << notify
.info
;
2823 if (notify
.from
!= shard_id_t::NO_SHARD
||
2824 notify
.to
!= shard_id_t::NO_SHARD
)
2825 lhs
<< " " << (unsigned)notify
.from
2826 << "->" << (unsigned)notify
.to
;
2830 // -- pg_interval_t --
2832 void PastIntervals::pg_interval_t::encode(bufferlist
& bl
) const
2834 ENCODE_START(4, 2, bl
);
2835 ::encode(first
, bl
);
2838 ::encode(acting
, bl
);
2839 ::encode(maybe_went_rw
, bl
);
2840 ::encode(primary
, bl
);
2841 ::encode(up_primary
, bl
);
2845 void PastIntervals::pg_interval_t::decode(bufferlist::iterator
& bl
)
2847 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl
);
2848 ::decode(first
, bl
);
2851 ::decode(acting
, bl
);
2852 ::decode(maybe_went_rw
, bl
);
2853 if (struct_v
>= 3) {
2854 ::decode(primary
, bl
);
2857 primary
= acting
[0];
2859 if (struct_v
>= 4) {
2860 ::decode(up_primary
, bl
);
2868 void PastIntervals::pg_interval_t::dump(Formatter
*f
) const
2870 f
->dump_unsigned("first", first
);
2871 f
->dump_unsigned("last", last
);
2872 f
->dump_int("maybe_went_rw", maybe_went_rw
? 1 : 0);
2873 f
->open_array_section("up");
2874 for (vector
<int>::const_iterator p
= up
.begin(); p
!= up
.end(); ++p
)
2875 f
->dump_int("osd", *p
);
2877 f
->open_array_section("acting");
2878 for (vector
<int>::const_iterator p
= acting
.begin(); p
!= acting
.end(); ++p
)
2879 f
->dump_int("osd", *p
);
2881 f
->dump_int("primary", primary
);
2882 f
->dump_int("up_primary", up_primary
);
2885 void PastIntervals::pg_interval_t::generate_test_instances(list
<pg_interval_t
*>& o
)
2887 o
.push_back(new pg_interval_t
);
2888 o
.push_back(new pg_interval_t
);
2889 o
.back()->up
.push_back(1);
2890 o
.back()->acting
.push_back(2);
2891 o
.back()->acting
.push_back(3);
2892 o
.back()->first
= 4;
2894 o
.back()->maybe_went_rw
= true;
2897 WRITE_CLASS_ENCODER(PastIntervals::pg_interval_t
)
2899 class pi_simple_rep
: public PastIntervals::interval_rep
{
2900 map
<epoch_t
, PastIntervals::pg_interval_t
> interval_map
;
2904 std::list
<PastIntervals::pg_interval_t
> &&intervals
) {
2905 for (auto &&i
: intervals
)
2906 add_interval(ec_pool
, i
);
2910 pi_simple_rep() = default;
2911 pi_simple_rep(const pi_simple_rep
&) = default;
2912 pi_simple_rep(pi_simple_rep
&&) = default;
2913 pi_simple_rep
&operator=(pi_simple_rep
&&) = default;
2914 pi_simple_rep
&operator=(const pi_simple_rep
&) = default;
2916 size_t size() const override
{ return interval_map
.size(); }
2917 bool empty() const override
{ return interval_map
.empty(); }
2918 void clear() override
{ interval_map
.clear(); }
2919 pair
<epoch_t
, epoch_t
> get_bounds() const override
{
2920 auto iter
= interval_map
.begin();
2921 if (iter
!= interval_map
.end()) {
2922 auto riter
= interval_map
.rbegin();
2925 riter
->second
.last
+ 1);
2927 return make_pair(0, 0);
2930 set
<pg_shard_t
> get_all_participants(
2931 bool ec_pool
) const override
{
2932 set
<pg_shard_t
> all_participants
;
2934 // We need to decide who might have unfound objects that we need
2935 auto p
= interval_map
.rbegin();
2936 auto end
= interval_map
.rend();
2937 for (; p
!= end
; ++p
) {
2938 const PastIntervals::pg_interval_t
&interval(p
->second
);
2939 // If nothing changed, we don't care about this interval.
2940 if (!interval
.maybe_went_rw
)
2944 std::vector
<int>::const_iterator a
= interval
.acting
.begin();
2945 std::vector
<int>::const_iterator a_end
= interval
.acting
.end();
2946 for (; a
!= a_end
; ++a
, ++i
) {
2947 pg_shard_t
shard(*a
, ec_pool
? shard_id_t(i
) : shard_id_t::NO_SHARD
);
2948 if (*a
!= CRUSH_ITEM_NONE
)
2949 all_participants
.insert(shard
);
2952 return all_participants
;
2956 const PastIntervals::pg_interval_t
&interval
) override
{
2957 interval_map
[interval
.first
] = interval
;
2959 unique_ptr
<PastIntervals::interval_rep
> clone() const override
{
2960 return unique_ptr
<PastIntervals::interval_rep
>(new pi_simple_rep(*this));
2962 ostream
&print(ostream
&out
) const override
{
2963 return out
<< interval_map
;
2965 void encode(bufferlist
&bl
) const override
{
2966 ::encode(interval_map
, bl
);
2968 void decode(bufferlist::iterator
&bl
) override
{
2969 ::decode(interval_map
, bl
);
2971 void dump(Formatter
*f
) const override
{
2972 f
->open_array_section("PastIntervals::compat_rep");
2973 for (auto &&i
: interval_map
) {
2974 f
->open_object_section("pg_interval_t");
2975 f
->dump_int("epoch", i
.first
);
2976 f
->open_object_section("interval");
2983 bool is_classic() const override
{
2986 static void generate_test_instances(list
<pi_simple_rep
*> &o
) {
2987 using ival
= PastIntervals::pg_interval_t
;
2988 using ivallst
= std::list
<ival
>;
2992 { ival
{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
2993 , ival
{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
2994 , ival
{{ 2}, { 2}, 31, 35, false, 2, 2}
2995 , ival
{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3000 { ival
{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3001 , ival
{{ 1, 2}, { 1, 2}, 20, 30, true, 1, 1}
3002 , ival
{{ 2}, { 2}, 31, 35, false, 2, 2}
3003 , ival
{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3008 { ival
{{2, 1, 0}, {2, 1, 0}, 10, 20, true, 1, 1}
3009 , ival
{{ 0, 2}, { 0, 2}, 21, 30, true, 0, 0}
3010 , ival
{{ 0, 2}, {2, 0}, 31, 35, true, 2, 2}
3011 , ival
{{ 0, 2}, { 0, 2}, 36, 50, true, 0, 0}
3015 void iterate_mayberw_back_to(
3018 std::function
<void(epoch_t
, const set
<pg_shard_t
> &)> &&f
) const override
{
3019 for (auto i
= interval_map
.rbegin(); i
!= interval_map
.rend(); ++i
) {
3020 if (!i
->second
.maybe_went_rw
)
3022 if (i
->second
.last
< les
)
3024 set
<pg_shard_t
> actingset
;
3025 for (unsigned j
= 0; j
< i
->second
.acting
.size(); ++j
) {
3026 if (i
->second
.acting
[j
] == CRUSH_ITEM_NONE
)
3030 i
->second
.acting
[j
],
3031 ec_pool
? shard_id_t(j
) : shard_id_t::NO_SHARD
));
3033 f(i
->second
.first
, actingset
);
3037 bool has_full_intervals() const override
{ return true; }
3038 void iterate_all_intervals(
3039 std::function
<void(const PastIntervals::pg_interval_t
&)> &&f
3041 for (auto &&i
: interval_map
) {
3045 virtual ~pi_simple_rep() override
{}
3051 * PastIntervals only needs to be able to answer two questions:
3052 * 1) Where should the primary look for unfound objects?
3053 * 2) List a set of subsets of the OSDs such that contacting at least
3054 * one from each subset guarrantees we speak to at least one witness
3055 * of any completed write.
3057 * Crucially, 2) does not require keeping *all* past intervals. Certainly,
3058 * we don't need to keep any where maybe_went_rw would be false. We also
3059 * needn't keep two intervals where the actingset in one is a subset
3060 * of the other (only need to keep the smaller of the two sets). In order
3061 * to accurately trim the set of intervals as last_epoch_started changes
3062 * without rebuilding the set from scratch, we'll retain the larger set
3063 * if it in an older interval.
3065 struct compact_interval_t
{
3068 set
<pg_shard_t
> acting
;
3069 bool supersedes(const compact_interval_t
&other
) {
3070 for (auto &&i
: acting
) {
3071 if (!other
.acting
.count(i
))
3076 void dump(Formatter
*f
) const {
3077 f
->open_object_section("compact_interval_t");
3078 f
->dump_stream("first") << first
;
3079 f
->dump_stream("last") << last
;
3080 f
->dump_stream("acting") << acting
;
3083 void encode(bufferlist
&bl
) const {
3084 ENCODE_START(1, 1, bl
);
3085 ::encode(first
, bl
);
3087 ::encode(acting
, bl
);
3090 void decode(bufferlist::iterator
&bl
) {
3091 DECODE_START(1, bl
);
3092 ::decode(first
, bl
);
3094 ::decode(acting
, bl
);
3097 static void generate_test_instances(list
<compact_interval_t
*> & o
) {
3098 /* Not going to be used, we'll generate pi_compact_rep directly */
3101 ostream
&operator<<(ostream
&o
, const compact_interval_t
&rhs
)
3103 return o
<< "([" << rhs
.first
<< "," << rhs
.last
3104 << "] acting " << rhs
.acting
<< ")";
3106 WRITE_CLASS_ENCODER(compact_interval_t
)
3108 class pi_compact_rep
: public PastIntervals::interval_rep
{
3110 epoch_t last
= 0; // inclusive
3111 set
<pg_shard_t
> all_participants
;
3112 list
<compact_interval_t
> intervals
;
3115 std::list
<PastIntervals::pg_interval_t
> &&intervals
) {
3116 for (auto &&i
: intervals
)
3117 add_interval(ec_pool
, i
);
3120 pi_compact_rep() = default;
3121 pi_compact_rep(const pi_compact_rep
&) = default;
3122 pi_compact_rep(pi_compact_rep
&&) = default;
3123 pi_compact_rep
&operator=(const pi_compact_rep
&) = default;
3124 pi_compact_rep
&operator=(pi_compact_rep
&&) = default;
3126 size_t size() const override
{ return intervals
.size(); }
3127 bool empty() const override
{
3128 return first
> last
|| (first
== 0 && last
== 0);
3130 void clear() override
{
3131 *this = pi_compact_rep();
3133 pair
<epoch_t
, epoch_t
> get_bounds() const override
{
3134 return make_pair(first
, last
+ 1);
3136 set
<pg_shard_t
> get_all_participants(
3137 bool ec_pool
) const override
{
3138 return all_participants
;
3141 bool ec_pool
, const PastIntervals::pg_interval_t
&interval
) override
{
3143 first
= interval
.first
;
3144 assert(interval
.last
> last
);
3145 last
= interval
.last
;
3146 set
<pg_shard_t
> acting
;
3147 for (unsigned i
= 0; i
< interval
.acting
.size(); ++i
) {
3148 if (interval
.acting
[i
] == CRUSH_ITEM_NONE
)
3153 ec_pool
? shard_id_t(i
) : shard_id_t::NO_SHARD
));
3155 all_participants
.insert(acting
.begin(), acting
.end());
3156 if (!interval
.maybe_went_rw
)
3158 intervals
.push_back(
3159 compact_interval_t
{interval
.first
, interval
.last
, acting
});
3160 auto plast
= intervals
.end();
3162 for (auto cur
= intervals
.begin(); cur
!= plast
; ) {
3163 if (plast
->supersedes(*cur
)) {
3164 intervals
.erase(cur
++);
3170 unique_ptr
<PastIntervals::interval_rep
> clone() const override
{
3171 return unique_ptr
<PastIntervals::interval_rep
>(new pi_compact_rep(*this));
3173 ostream
&print(ostream
&out
) const override
{
3174 return out
<< "([" << first
<< "," << last
3175 << "] intervals=" << intervals
<< ")";
3177 void encode(bufferlist
&bl
) const override
{
3178 ENCODE_START(1, 1, bl
);
3179 ::encode(first
, bl
);
3181 ::encode(all_participants
, bl
);
3182 ::encode(intervals
, bl
);
3185 void decode(bufferlist::iterator
&bl
) override
{
3186 DECODE_START(1, bl
);
3187 ::decode(first
, bl
);
3189 ::decode(all_participants
, bl
);
3190 ::decode(intervals
, bl
);
3193 void dump(Formatter
*f
) const override
{
3194 f
->open_object_section("PastIntervals::compact_rep");
3195 f
->dump_stream("first") << first
;
3196 f
->dump_stream("last") << last
;
3197 f
->open_array_section("all_participants");
3198 for (auto& i
: all_participants
) {
3199 f
->dump_object("pg_shard", i
);
3202 f
->open_array_section("intervals");
3203 for (auto &&i
: intervals
) {
3209 bool is_classic() const override
{
3212 static void generate_test_instances(list
<pi_compact_rep
*> &o
) {
3213 using ival
= PastIntervals::pg_interval_t
;
3214 using ivallst
= std::list
<ival
>;
3218 { ival
{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3219 , ival
{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3220 , ival
{{ 2}, { 2}, 31, 35, false, 2, 2}
3221 , ival
{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3226 { ival
{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3227 , ival
{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3228 , ival
{{ 2}, { 2}, 31, 35, false, 2, 2}
3229 , ival
{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3234 { ival
{{2, 1, 0}, {2, 1, 0}, 10, 20, true, 1, 1}
3235 , ival
{{ 0, 2}, { 0, 2}, 21, 30, true, 0, 0}
3236 , ival
{{ 0, 2}, {2, 0}, 31, 35, true, 2, 2}
3237 , ival
{{ 0, 2}, { 0, 2}, 36, 50, true, 0, 0}
3240 void iterate_mayberw_back_to(
3243 std::function
<void(epoch_t
, const set
<pg_shard_t
> &)> &&f
) const override
{
3244 for (auto i
= intervals
.rbegin(); i
!= intervals
.rend(); ++i
) {
3247 f(i
->first
, i
->acting
);
3250 virtual ~pi_compact_rep() override
{}
3252 WRITE_CLASS_ENCODER(pi_compact_rep
)
3254 PastIntervals::PastIntervals(const PastIntervals
&rhs
)
3255 : past_intervals(rhs
.past_intervals
?
3256 rhs
.past_intervals
->clone() :
3259 PastIntervals
&PastIntervals::operator=(const PastIntervals
&rhs
)
3261 PastIntervals
other(rhs
);
3262 ::swap(other
, *this);
3266 ostream
& operator<<(ostream
& out
, const PastIntervals
&i
)
3268 if (i
.past_intervals
) {
3269 return i
.past_intervals
->print(out
);
3271 return out
<< "(empty)";
3275 ostream
& operator<<(ostream
& out
, const PastIntervals::PriorSet
&i
)
3277 return out
<< "PriorSet("
3278 << "ec_pool: " << i
.ec_pool
3279 << ", probe: " << i
.probe
3280 << ", down: " << i
.down
3281 << ", blocked_by: " << i
.blocked_by
3282 << ", pg_down: " << i
.pg_down
3286 void PastIntervals::decode(bufferlist::iterator
&bl
)
3288 DECODE_START(1, bl
);
3295 past_intervals
.reset(new pi_simple_rep
);
3296 past_intervals
->decode(bl
);
3299 past_intervals
.reset(new pi_compact_rep
);
3300 past_intervals
->decode(bl
);
3306 void PastIntervals::decode_classic(bufferlist::iterator
&bl
)
3308 past_intervals
.reset(new pi_simple_rep
);
3309 past_intervals
->decode(bl
);
3312 void PastIntervals::generate_test_instances(list
<PastIntervals
*> &o
)
3315 list
<pi_simple_rep
*> simple
;
3316 pi_simple_rep::generate_test_instances(simple
);
3317 for (auto &&i
: simple
) {
3318 // takes ownership of contents
3319 o
.push_back(new PastIntervals(i
));
3323 list
<pi_compact_rep
*> compact
;
3324 pi_compact_rep::generate_test_instances(compact
);
3325 for (auto &&i
: compact
) {
3326 // takes ownership of contents
3327 o
.push_back(new PastIntervals(i
));
3333 void PastIntervals::update_type(bool ec_pool
, bool compact
)
3336 if (!past_intervals
) {
3337 past_intervals
.reset(new pi_simple_rep
);
3339 // we never convert from compact back to classic
3340 assert(is_classic());
3343 if (!past_intervals
) {
3344 past_intervals
.reset(new pi_compact_rep
);
3345 } else if (is_classic()) {
3346 auto old
= std::move(past_intervals
);
3347 past_intervals
.reset(new pi_compact_rep
);
3348 assert(old
->has_full_intervals());
3349 old
->iterate_all_intervals([&](const pg_interval_t
&i
) {
3350 past_intervals
->add_interval(ec_pool
, i
);
3356 void PastIntervals::update_type_from_map(bool ec_pool
, const OSDMap
&osdmap
)
3358 update_type(ec_pool
, osdmap
.test_flag(CEPH_OSDMAP_REQUIRE_LUMINOUS
));
3361 bool PastIntervals::is_new_interval(
3362 int old_acting_primary
,
3363 int new_acting_primary
,
3364 const vector
<int> &old_acting
,
3365 const vector
<int> &new_acting
,
3368 const vector
<int> &old_up
,
3369 const vector
<int> &new_up
,
3374 unsigned old_pg_num
,
3375 unsigned new_pg_num
,
3376 bool old_sort_bitwise
,
3377 bool new_sort_bitwise
,
3379 return old_acting_primary
!= new_acting_primary
||
3380 new_acting
!= old_acting
||
3381 old_up_primary
!= new_up_primary
||
3383 old_min_size
!= new_min_size
||
3384 old_size
!= new_size
||
3385 pgid
.is_split(old_pg_num
, new_pg_num
, 0) ||
3386 old_sort_bitwise
!= new_sort_bitwise
;
3389 bool PastIntervals::is_new_interval(
3390 int old_acting_primary
,
3391 int new_acting_primary
,
3392 const vector
<int> &old_acting
,
3393 const vector
<int> &new_acting
,
3396 const vector
<int> &old_up
,
3397 const vector
<int> &new_up
,
3401 return !(lastmap
->get_pools().count(pgid
.pool())) ||
3402 is_new_interval(old_acting_primary
,
3410 lastmap
->get_pools().find(pgid
.pool())->second
.size
,
3411 osdmap
->get_pools().find(pgid
.pool())->second
.size
,
3412 lastmap
->get_pools().find(pgid
.pool())->second
.min_size
,
3413 osdmap
->get_pools().find(pgid
.pool())->second
.min_size
,
3414 lastmap
->get_pg_num(pgid
.pool()),
3415 osdmap
->get_pg_num(pgid
.pool()),
3416 lastmap
->test_flag(CEPH_OSDMAP_SORTBITWISE
),
3417 osdmap
->test_flag(CEPH_OSDMAP_SORTBITWISE
),
3421 bool PastIntervals::check_new_interval(
3422 int old_acting_primary
,
3423 int new_acting_primary
,
3424 const vector
<int> &old_acting
,
3425 const vector
<int> &new_acting
,
3428 const vector
<int> &old_up
,
3429 const vector
<int> &new_up
,
3430 epoch_t same_interval_since
,
3431 epoch_t last_epoch_clean
,
3435 IsPGRecoverablePredicate
*could_have_gone_active
,
3436 PastIntervals
*past_intervals
,
3440 * We have to be careful to gracefully deal with situations like
3441 * so. Say we have a power outage or something that takes out both
3442 * OSDs, but the monitor doesn't mark them down in the same epoch.
3443 * The history may look like
3447 * 3: let's say B dies for good, too (say, from the power spike)
3450 * which makes it look like B may have applied updates to the PG
3451 * that we need in order to proceed. This sucks...
3453 * To minimize the risk of this happening, we CANNOT go active if
3454 * _any_ OSDs in the prior set are down until we send an MOSDAlive
3455 * to the monitor such that the OSDMap sets osd_up_thru to an epoch.
3456 * Then, we have something like
3463 * -> we can ignore B, bc it couldn't have gone active (up_thru still 0).
3473 * -> we must wait for B, bc it was alive through 2, and could have
3474 * written to the pg.
3476 * If B is really dead, then an administrator will need to manually
3477 * intervene by marking the OSD as "lost."
3480 // remember past interval
3481 // NOTE: a change in the up set primary triggers an interval
3482 // change, even though the interval members in the pg_interval_t
3484 assert(past_intervals
);
3485 assert(past_intervals
->past_intervals
);
3486 if (is_new_interval(
3499 i
.first
= same_interval_since
;
3500 i
.last
= osdmap
->get_epoch() - 1;
3501 assert(i
.first
<= i
.last
);
3502 i
.acting
= old_acting
;
3504 i
.primary
= old_acting_primary
;
3505 i
.up_primary
= old_up_primary
;
3507 unsigned num_acting
= 0;
3508 for (vector
<int>::const_iterator p
= i
.acting
.begin(); p
!= i
.acting
.end();
3510 if (*p
!= CRUSH_ITEM_NONE
)
3513 assert(lastmap
->get_pools().count(pgid
.pool()));
3514 const pg_pool_t
& old_pg_pool
= lastmap
->get_pools().find(pgid
.pool())->second
;
3515 set
<pg_shard_t
> old_acting_shards
;
3516 old_pg_pool
.convert_to_pg_shards(old_acting
, &old_acting_shards
);
3520 num_acting
>= old_pg_pool
.min_size
&&
3521 (*could_have_gone_active
)(old_acting_shards
)) {
3523 *out
<< __func__
<< " " << i
3525 << " up_thru " << lastmap
->get_up_thru(i
.primary
)
3526 << " up_from " << lastmap
->get_up_from(i
.primary
)
3527 << " last_epoch_clean " << last_epoch_clean
3529 if (lastmap
->get_up_thru(i
.primary
) >= i
.first
&&
3530 lastmap
->get_up_from(i
.primary
) <= i
.first
) {
3531 i
.maybe_went_rw
= true;
3533 *out
<< __func__
<< " " << i
3534 << " : primary up " << lastmap
->get_up_from(i
.primary
)
3535 << "-" << lastmap
->get_up_thru(i
.primary
)
3536 << " includes interval"
3538 } else if (last_epoch_clean
>= i
.first
&&
3539 last_epoch_clean
<= i
.last
) {
3540 // If the last_epoch_clean is included in this interval, then
3541 // the pg must have been rw (for recovery to have completed).
3542 // This is important because we won't know the _real_
3543 // first_epoch because we stop at last_epoch_clean, and we
3544 // don't want the oldest interval to randomly have
3545 // maybe_went_rw false depending on the relative up_thru vs
3546 // last_epoch_clean timing.
3547 i
.maybe_went_rw
= true;
3549 *out
<< __func__
<< " " << i
3550 << " : includes last_epoch_clean " << last_epoch_clean
3551 << " and presumed to have been rw"
3554 i
.maybe_went_rw
= false;
3556 *out
<< __func__
<< " " << i
3557 << " : primary up " << lastmap
->get_up_from(i
.primary
)
3558 << "-" << lastmap
->get_up_thru(i
.primary
)
3559 << " does not include interval"
3563 i
.maybe_went_rw
= false;
3565 *out
<< __func__
<< " " << i
<< " : acting set is too small" << std::endl
;
3567 past_intervals
->past_intervals
->add_interval(old_pg_pool
.ec_pool(), i
);
3575 // true if the given map affects the prior set
3576 bool PastIntervals::PriorSet::affected_by_map(
3577 const OSDMap
&osdmap
,
3578 const DoutPrefixProvider
*dpp
) const
3580 for (set
<pg_shard_t
>::iterator p
= probe
.begin();
3585 // did someone in the prior set go down?
3586 if (osdmap
.is_down(o
) && down
.count(o
) == 0) {
3587 ldpp_dout(dpp
, 10) << "affected_by_map osd." << o
<< " now down" << dendl
;
3591 // did a down osd in cur get (re)marked as lost?
3592 map
<int, epoch_t
>::const_iterator r
= blocked_by
.find(o
);
3593 if (r
!= blocked_by
.end()) {
3594 if (!osdmap
.exists(o
)) {
3595 ldpp_dout(dpp
, 10) << "affected_by_map osd." << o
<< " no longer exists" << dendl
;
3598 if (osdmap
.get_info(o
).lost_at
!= r
->second
) {
3599 ldpp_dout(dpp
, 10) << "affected_by_map osd." << o
<< " (re)marked as lost" << dendl
;
3605 // did someone in the prior down set go up?
3606 for (set
<int>::const_iterator p
= down
.begin();
3611 if (osdmap
.is_up(o
)) {
3612 ldpp_dout(dpp
, 10) << "affected_by_map osd." << o
<< " now up" << dendl
;
3616 // did someone in the prior set get lost or destroyed?
3617 if (!osdmap
.exists(o
)) {
3618 ldpp_dout(dpp
, 10) << "affected_by_map osd." << o
<< " no longer exists" << dendl
;
3621 // did a down osd in down get (re)marked as lost?
3622 map
<int, epoch_t
>::const_iterator r
= blocked_by
.find(o
);
3623 if (r
!= blocked_by
.end()) {
3624 if (osdmap
.get_info(o
).lost_at
!= r
->second
) {
3625 ldpp_dout(dpp
, 10) << "affected_by_map osd." << o
<< " (re)marked as lost" << dendl
;
3634 ostream
& operator<<(ostream
& out
, const PastIntervals::pg_interval_t
& i
)
3636 out
<< "interval(" << i
.first
<< "-" << i
.last
3637 << " up " << i
.up
<< "(" << i
.up_primary
<< ")"
3638 << " acting " << i
.acting
<< "(" << i
.primary
<< ")";
3639 if (i
.maybe_went_rw
)
3640 out
<< " maybe_went_rw";
3649 void pg_query_t::encode(bufferlist
&bl
, uint64_t features
) const {
3650 ENCODE_START(3, 3, bl
);
3652 ::encode(since
, bl
);
3654 ::encode(epoch_sent
, bl
);
3660 void pg_query_t::decode(bufferlist::iterator
&bl
) {
3661 DECODE_START(3, bl
);
3663 ::decode(since
, bl
);
3665 ::decode(epoch_sent
, bl
);
3671 void pg_query_t::dump(Formatter
*f
) const
3673 f
->dump_int("from", from
);
3674 f
->dump_int("to", to
);
3675 f
->dump_string("type", get_type_name());
3676 f
->dump_stream("since") << since
;
3677 f
->dump_stream("epoch_sent") << epoch_sent
;
3678 f
->open_object_section("history");
3682 void pg_query_t::generate_test_instances(list
<pg_query_t
*>& o
)
3684 o
.push_back(new pg_query_t());
3685 list
<pg_history_t
*> h
;
3686 pg_history_t::generate_test_instances(h
);
3687 o
.push_back(new pg_query_t(pg_query_t::INFO
, shard_id_t(1), shard_id_t(2), *h
.back(), 4));
3688 o
.push_back(new pg_query_t(pg_query_t::MISSING
, shard_id_t(2), shard_id_t(3), *h
.back(), 4));
3689 o
.push_back(new pg_query_t(pg_query_t::LOG
, shard_id_t(0), shard_id_t(0),
3690 eversion_t(4, 5), *h
.back(), 4));
3691 o
.push_back(new pg_query_t(pg_query_t::FULLLOG
,
3692 shard_id_t::NO_SHARD
, shard_id_t::NO_SHARD
,
3696 // -- ObjectModDesc --
3697 void ObjectModDesc::visit(Visitor
*visitor
) const
3699 bufferlist::iterator bp
= bl
.begin();
3702 DECODE_START(max_required_version
, bp
);
3709 visitor
->append(size
);
3713 map
<string
, boost::optional
<bufferlist
> > attrs
;
3714 ::decode(attrs
, bp
);
3715 visitor
->setattrs(attrs
);
3719 version_t old_version
;
3720 ::decode(old_version
, bp
);
3721 visitor
->rmobject(old_version
);
3728 case UPDATE_SNAPS
: {
3729 set
<snapid_t
> snaps
;
3730 ::decode(snaps
, bp
);
3731 visitor
->update_snaps(snaps
);
3735 version_t old_version
;
3736 ::decode(old_version
, bp
);
3737 visitor
->try_rmobject(old_version
);
3740 case ROLLBACK_EXTENTS
: {
3741 vector
<pair
<uint64_t, uint64_t> > extents
;
3744 ::decode(extents
, bp
);
3745 visitor
->rollback_extents(gen
,extents
);
3749 assert(0 == "Invalid rollback code");
3754 assert(0 == "Invalid encoding");
3758 struct DumpVisitor
: public ObjectModDesc::Visitor
{
3760 explicit DumpVisitor(Formatter
*f
) : f(f
) {}
3761 void append(uint64_t old_size
) override
{
3762 f
->open_object_section("op");
3763 f
->dump_string("code", "APPEND");
3764 f
->dump_unsigned("old_size", old_size
);
3767 void setattrs(map
<string
, boost::optional
<bufferlist
> > &attrs
) override
{
3768 f
->open_object_section("op");
3769 f
->dump_string("code", "SETATTRS");
3770 f
->open_array_section("attrs");
3771 for (map
<string
, boost::optional
<bufferlist
> >::iterator i
= attrs
.begin();
3774 f
->dump_string("attr_name", i
->first
);
3779 void rmobject(version_t old_version
) override
{
3780 f
->open_object_section("op");
3781 f
->dump_string("code", "RMOBJECT");
3782 f
->dump_unsigned("old_version", old_version
);
3785 void try_rmobject(version_t old_version
) override
{
3786 f
->open_object_section("op");
3787 f
->dump_string("code", "TRY_RMOBJECT");
3788 f
->dump_unsigned("old_version", old_version
);
3791 void create() override
{
3792 f
->open_object_section("op");
3793 f
->dump_string("code", "CREATE");
3796 void update_snaps(const set
<snapid_t
> &snaps
) override
{
3797 f
->open_object_section("op");
3798 f
->dump_string("code", "UPDATE_SNAPS");
3799 f
->dump_stream("snaps") << snaps
;
3802 void rollback_extents(
3804 const vector
<pair
<uint64_t, uint64_t> > &extents
) override
{
3805 f
->open_object_section("op");
3806 f
->dump_string("code", "ROLLBACK_EXTENTS");
3807 f
->dump_unsigned("gen", gen
);
3808 f
->dump_stream("snaps") << extents
;
3813 void ObjectModDesc::dump(Formatter
*f
) const
3815 f
->open_object_section("object_mod_desc");
3816 f
->dump_bool("can_local_rollback", can_local_rollback
);
3817 f
->dump_bool("rollback_info_completed", rollback_info_completed
);
3819 f
->open_array_section("ops");
3827 void ObjectModDesc::generate_test_instances(list
<ObjectModDesc
*>& o
)
3829 map
<string
, boost::optional
<bufferlist
> > attrs
;
3833 o
.push_back(new ObjectModDesc());
3834 o
.back()->append(100);
3835 o
.back()->setattrs(attrs
);
3836 o
.push_back(new ObjectModDesc());
3837 o
.back()->rmobject(1001);
3838 o
.push_back(new ObjectModDesc());
3840 o
.back()->setattrs(attrs
);
3841 o
.push_back(new ObjectModDesc());
3843 o
.back()->setattrs(attrs
);
3844 o
.back()->mark_unrollbackable();
3845 o
.back()->append(1000);
3848 void ObjectModDesc::encode(bufferlist
&_bl
) const
3850 ENCODE_START(max_required_version
, max_required_version
, _bl
);
3851 ::encode(can_local_rollback
, _bl
);
3852 ::encode(rollback_info_completed
, _bl
);
3856 void ObjectModDesc::decode(bufferlist::iterator
&_bl
)
3858 DECODE_START(2, _bl
);
3859 max_required_version
= struct_v
;
3860 ::decode(can_local_rollback
, _bl
);
3861 ::decode(rollback_info_completed
, _bl
);
3863 // ensure bl does not pin a larger buffer in memory
3868 // -- pg_log_entry_t --
3870 string
pg_log_entry_t::get_key_name() const
3872 return version
.get_key_name();
3875 void pg_log_entry_t::encode_with_checksum(bufferlist
& bl
) const
3877 bufferlist
ebl(sizeof(*this)*2);
3879 __u32 crc
= ebl
.crc32c(0);
3884 void pg_log_entry_t::decode_with_checksum(bufferlist::iterator
& p
)
3890 if (crc
!= bl
.crc32c(0))
3891 throw buffer::malformed_input("bad checksum on pg_log_entry_t");
3892 bufferlist::iterator q
= bl
.begin();
3896 void pg_log_entry_t::encode(bufferlist
&bl
) const
3898 ENCODE_START(11, 4, bl
);
3901 ::encode(version
, bl
);
3904 * Added with reverting_to:
3905 * Previous code used prior_version to encode
3906 * what we now call reverting_to. This will
3907 * allow older code to decode reverting_to
3908 * into prior_version as expected.
3910 if (op
== LOST_REVERT
)
3911 ::encode(reverting_to
, bl
);
3913 ::encode(prior_version
, bl
);
3915 ::encode(reqid
, bl
);
3916 ::encode(mtime
, bl
);
3917 if (op
== LOST_REVERT
)
3918 ::encode(prior_version
, bl
);
3919 ::encode(snaps
, bl
);
3920 ::encode(user_version
, bl
);
3921 ::encode(mod_desc
, bl
);
3922 ::encode(extra_reqids
, bl
);
3924 ::encode(return_code
, bl
);
3928 void pg_log_entry_t::decode(bufferlist::iterator
&bl
)
3930 DECODE_START_LEGACY_COMPAT_LEN(11, 4, 4, bl
);
3934 ::decode(old_soid
, bl
);
3935 soid
.oid
= old_soid
.oid
;
3936 soid
.snap
= old_soid
.snap
;
3937 invalid_hash
= true;
3942 invalid_hash
= true;
3943 ::decode(version
, bl
);
3945 if (struct_v
>= 6 && op
== LOST_REVERT
)
3946 ::decode(reverting_to
, bl
);
3948 ::decode(prior_version
, bl
);
3950 ::decode(reqid
, bl
);
3952 ::decode(mtime
, bl
);
3954 invalid_pool
= true;
3956 if (op
== LOST_REVERT
) {
3957 if (struct_v
>= 6) {
3958 ::decode(prior_version
, bl
);
3960 reverting_to
= prior_version
;
3963 if (struct_v
>= 7 || // for v >= 7, this is for all ops.
3964 op
== CLONE
) { // for v < 7, it's only present for CLONE.
3965 ::decode(snaps
, bl
);
3966 // ensure snaps does not pin a larger buffer in memory
3971 ::decode(user_version
, bl
);
3973 user_version
= version
.version
;
3976 ::decode(mod_desc
, bl
);
3978 mod_desc
.mark_unrollbackable();
3980 ::decode(extra_reqids
, bl
);
3981 if (struct_v
>= 11 && op
== ERROR
)
3982 ::decode(return_code
, bl
);
3986 void pg_log_entry_t::dump(Formatter
*f
) const
3988 f
->dump_string("op", get_op_name());
3989 f
->dump_stream("object") << soid
;
3990 f
->dump_stream("version") << version
;
3991 f
->dump_stream("prior_version") << prior_version
;
3992 f
->dump_stream("reqid") << reqid
;
3993 f
->open_array_section("extra_reqids");
3994 for (vector
<pair
<osd_reqid_t
, version_t
> >::const_iterator p
=
3995 extra_reqids
.begin();
3996 p
!= extra_reqids
.end();
3998 f
->open_object_section("extra_reqid");
3999 f
->dump_stream("reqid") << p
->first
;
4000 f
->dump_stream("user_version") << p
->second
;
4004 f
->dump_stream("mtime") << mtime
;
4005 f
->dump_int("return_code", return_code
);
4006 if (snaps
.length() > 0) {
4008 bufferlist c
= snaps
;
4009 bufferlist::iterator p
= c
.begin();
4015 f
->open_object_section("snaps");
4016 for (vector
<snapid_t
>::iterator p
= v
.begin(); p
!= v
.end(); ++p
)
4017 f
->dump_unsigned("snap", *p
);
4021 f
->open_object_section("mod_desc");
4027 void pg_log_entry_t::generate_test_instances(list
<pg_log_entry_t
*>& o
)
4029 o
.push_back(new pg_log_entry_t());
4030 hobject_t
oid(object_t("objname"), "key", 123, 456, 0, "");
4031 o
.push_back(new pg_log_entry_t(MODIFY
, oid
, eversion_t(1,2), eversion_t(3,4),
4032 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4034 o
.push_back(new pg_log_entry_t(ERROR
, oid
, eversion_t(1,2), eversion_t(3,4),
4035 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4036 utime_t(8,9), -ENOENT
));
4039 ostream
& operator<<(ostream
& out
, const pg_log_entry_t
& e
)
4041 out
<< e
.version
<< " (" << e
.prior_version
<< ") "
4042 << std::left
<< std::setw(8) << e
.get_op_name() << ' '
4043 << e
.soid
<< " by " << e
.reqid
<< " " << e
.mtime
4044 << " " << e
.return_code
;
4045 if (e
.snaps
.length()) {
4046 vector
<snapid_t
> snaps
;
4047 bufferlist c
= e
.snaps
;
4048 bufferlist::iterator p
= c
.begin();
4054 out
<< " snaps " << snaps
;
4062 // out: pg_log_t that only has entries that apply to import_pgid using curmap
4063 // reject: Entries rejected from "in" are in the reject.log. Other fields not set.
4064 void pg_log_t::filter_log(spg_t import_pgid
, const OSDMap
&curmap
,
4065 const string
&hit_set_namespace
, const pg_log_t
&in
,
4066 pg_log_t
&out
, pg_log_t
&reject
)
4072 for (list
<pg_log_entry_t
>::const_iterator i
= in
.log
.begin();
4073 i
!= in
.log
.end(); ++i
) {
4075 // Reject pg log entries for temporary objects
4076 if (i
->soid
.is_temp()) {
4077 reject
.log
.push_back(*i
);
4081 if (i
->soid
.nspace
!= hit_set_namespace
) {
4082 object_t oid
= i
->soid
.oid
;
4083 object_locator_t
loc(i
->soid
);
4084 pg_t raw_pgid
= curmap
.object_locator_to_pg(oid
, loc
);
4085 pg_t pgid
= curmap
.raw_pg_to_pg(raw_pgid
);
4087 if (import_pgid
.pgid
== pgid
) {
4088 out
.log
.push_back(*i
);
4090 reject
.log
.push_back(*i
);
4093 out
.log
.push_back(*i
);
4098 void pg_log_t::encode(bufferlist
& bl
) const
4100 ENCODE_START(6, 3, bl
);
4104 ::encode(can_rollback_to
, bl
);
4105 ::encode(rollback_info_trimmed_to
, bl
);
4109 void pg_log_t::decode(bufferlist::iterator
&bl
, int64_t pool
)
4111 DECODE_START_LEGACY_COMPAT_LEN(6, 3, 3, bl
);
4116 ::decode(backlog
, bl
);
4120 ::decode(can_rollback_to
, bl
);
4123 ::decode(rollback_info_trimmed_to
, bl
);
4125 rollback_info_trimmed_to
= tail
;
4128 // handle hobject_t format change
4130 for (list
<pg_log_entry_t
>::iterator i
= log
.begin();
4133 if (!i
->soid
.is_max() && i
->soid
.pool
== -1)
4134 i
->soid
.pool
= pool
;
4139 void pg_log_t::dump(Formatter
*f
) const
4141 f
->dump_stream("head") << head
;
4142 f
->dump_stream("tail") << tail
;
4143 f
->open_array_section("log");
4144 for (list
<pg_log_entry_t
>::const_iterator p
= log
.begin(); p
!= log
.end(); ++p
) {
4145 f
->open_object_section("entry");
4152 void pg_log_t::generate_test_instances(list
<pg_log_t
*>& o
)
4154 o
.push_back(new pg_log_t
);
4156 // this is nonsensical:
4157 o
.push_back(new pg_log_t
);
4158 o
.back()->head
= eversion_t(1,2);
4159 o
.back()->tail
= eversion_t(3,4);
4160 list
<pg_log_entry_t
*> e
;
4161 pg_log_entry_t::generate_test_instances(e
);
4162 for (list
<pg_log_entry_t
*>::iterator p
= e
.begin(); p
!= e
.end(); ++p
)
4163 o
.back()->log
.push_back(**p
);
4166 void pg_log_t::copy_after(const pg_log_t
&other
, eversion_t v
)
4168 can_rollback_to
= other
.can_rollback_to
;
4171 for (list
<pg_log_entry_t
>::const_reverse_iterator i
= other
.log
.rbegin();
4172 i
!= other
.log
.rend();
4174 assert(i
->version
> other
.tail
);
4175 if (i
->version
<= v
) {
4176 // make tail accurate.
4184 void pg_log_t::copy_range(const pg_log_t
&other
, eversion_t from
, eversion_t to
)
4186 can_rollback_to
= other
.can_rollback_to
;
4187 list
<pg_log_entry_t
>::const_reverse_iterator i
= other
.log
.rbegin();
4188 assert(i
!= other
.log
.rend());
4189 while (i
->version
> to
) {
4191 assert(i
!= other
.log
.rend());
4193 assert(i
->version
== to
);
4195 for ( ; i
!= other
.log
.rend(); ++i
) {
4196 if (i
->version
<= from
) {
4204 void pg_log_t::copy_up_to(const pg_log_t
&other
, int max
)
4206 can_rollback_to
= other
.can_rollback_to
;
4210 for (list
<pg_log_entry_t
>::const_reverse_iterator i
= other
.log
.rbegin();
4211 i
!= other
.log
.rend();
4221 ostream
& pg_log_t::print(ostream
& out
) const
4223 out
<< *this << std::endl
;
4224 for (list
<pg_log_entry_t
>::const_iterator p
= log
.begin();
4227 out
<< *p
<< std::endl
;
4231 // -- pg_missing_t --
4233 ostream
& operator<<(ostream
& out
, const pg_missing_item
& i
)
4236 if (i
.have
!= eversion_t())
4237 out
<< "(" << i
.have
<< ")";
4241 // -- object_copy_cursor_t --
4243 void object_copy_cursor_t::encode(bufferlist
& bl
) const
4245 ENCODE_START(1, 1, bl
);
4246 ::encode(attr_complete
, bl
);
4247 ::encode(data_offset
, bl
);
4248 ::encode(data_complete
, bl
);
4249 ::encode(omap_offset
, bl
);
4250 ::encode(omap_complete
, bl
);
4254 void object_copy_cursor_t::decode(bufferlist::iterator
&bl
)
4256 DECODE_START(1, bl
);
4257 ::decode(attr_complete
, bl
);
4258 ::decode(data_offset
, bl
);
4259 ::decode(data_complete
, bl
);
4260 ::decode(omap_offset
, bl
);
4261 ::decode(omap_complete
, bl
);
4265 void object_copy_cursor_t::dump(Formatter
*f
) const
4267 f
->dump_unsigned("attr_complete", (int)attr_complete
);
4268 f
->dump_unsigned("data_offset", data_offset
);
4269 f
->dump_unsigned("data_complete", (int)data_complete
);
4270 f
->dump_string("omap_offset", omap_offset
);
4271 f
->dump_unsigned("omap_complete", (int)omap_complete
);
4274 void object_copy_cursor_t::generate_test_instances(list
<object_copy_cursor_t
*>& o
)
4276 o
.push_back(new object_copy_cursor_t
);
4277 o
.push_back(new object_copy_cursor_t
);
4278 o
.back()->attr_complete
= true;
4279 o
.back()->data_offset
= 123;
4280 o
.push_back(new object_copy_cursor_t
);
4281 o
.back()->attr_complete
= true;
4282 o
.back()->data_complete
= true;
4283 o
.back()->omap_offset
= "foo";
4284 o
.push_back(new object_copy_cursor_t
);
4285 o
.back()->attr_complete
= true;
4286 o
.back()->data_complete
= true;
4287 o
.back()->omap_complete
= true;
4290 // -- object_copy_data_t --
4292 void object_copy_data_t::encode(bufferlist
& bl
, uint64_t features
) const
4294 ENCODE_START(7, 5, bl
);
4296 ::encode(mtime
, bl
);
4297 ::encode(attrs
, bl
);
4299 ::encode(omap_data
, bl
);
4300 ::encode(cursor
, bl
);
4301 ::encode(omap_header
, bl
);
4302 ::encode(snaps
, bl
);
4303 ::encode(snap_seq
, bl
);
4304 ::encode(flags
, bl
);
4305 ::encode(data_digest
, bl
);
4306 ::encode(omap_digest
, bl
);
4307 ::encode(reqids
, bl
);
4308 ::encode(truncate_seq
, bl
);
4309 ::encode(truncate_size
, bl
);
4313 void object_copy_data_t::decode(bufferlist::iterator
& bl
)
4315 DECODE_START(7, bl
);
4319 ::decode(mtime
, bl
);
4322 ::decode(category
, bl
); // no longer used
4324 ::decode(attrs
, bl
);
4327 map
<string
,bufferlist
> omap
;
4331 ::encode(omap
, omap_data
);
4333 ::decode(cursor
, bl
);
4335 ::decode(omap_header
, bl
);
4336 if (struct_v
>= 3) {
4337 ::decode(snaps
, bl
);
4338 ::decode(snap_seq
, bl
);
4343 if (struct_v
>= 4) {
4344 ::decode(flags
, bl
);
4345 ::decode(data_digest
, bl
);
4346 ::decode(omap_digest
, bl
);
4351 ::decode(mtime
, bl
);
4352 ::decode(attrs
, bl
);
4354 ::decode(omap_data
, bl
);
4355 ::decode(cursor
, bl
);
4356 ::decode(omap_header
, bl
);
4357 ::decode(snaps
, bl
);
4358 ::decode(snap_seq
, bl
);
4359 if (struct_v
>= 4) {
4360 ::decode(flags
, bl
);
4361 ::decode(data_digest
, bl
);
4362 ::decode(omap_digest
, bl
);
4364 if (struct_v
>= 6) {
4365 ::decode(reqids
, bl
);
4367 if (struct_v
>= 7) {
4368 ::decode(truncate_seq
, bl
);
4369 ::decode(truncate_size
, bl
);
4375 void object_copy_data_t::generate_test_instances(list
<object_copy_data_t
*>& o
)
4377 o
.push_back(new object_copy_data_t());
4379 list
<object_copy_cursor_t
*> cursors
;
4380 object_copy_cursor_t::generate_test_instances(cursors
);
4381 list
<object_copy_cursor_t
*>::iterator ci
= cursors
.begin();
4382 o
.back()->cursor
= **(ci
++);
4384 o
.push_back(new object_copy_data_t());
4385 o
.back()->cursor
= **(ci
++);
4387 o
.push_back(new object_copy_data_t());
4388 o
.back()->size
= 1234;
4389 o
.back()->mtime
.set_from_double(1234);
4390 bufferptr
bp("there", 5);
4393 o
.back()->attrs
["hello"] = bl
;
4394 bufferptr
bp2("not", 3);
4397 map
<string
,bufferlist
> omap
;
4399 ::encode(omap
, o
.back()->omap_data
);
4400 bufferptr
databp("iamsomedatatocontain", 20);
4401 o
.back()->data
.push_back(databp
);
4402 o
.back()->omap_header
.append("this is an omap header");
4403 o
.back()->snaps
.push_back(123);
4404 o
.back()->reqids
.push_back(make_pair(osd_reqid_t(), version_t()));
4407 void object_copy_data_t::dump(Formatter
*f
) const
4409 f
->open_object_section("cursor");
4411 f
->close_section(); // cursor
4412 f
->dump_int("size", size
);
4413 f
->dump_stream("mtime") << mtime
;
4414 /* we should really print out the attrs here, but bufferlist
4415 const-correctness prevents that */
4416 f
->dump_int("attrs_size", attrs
.size());
4417 f
->dump_int("flags", flags
);
4418 f
->dump_unsigned("data_digest", data_digest
);
4419 f
->dump_unsigned("omap_digest", omap_digest
);
4420 f
->dump_int("omap_data_length", omap_data
.length());
4421 f
->dump_int("omap_header_length", omap_header
.length());
4422 f
->dump_int("data_length", data
.length());
4423 f
->open_array_section("snaps");
4424 for (vector
<snapid_t
>::const_iterator p
= snaps
.begin();
4425 p
!= snaps
.end(); ++p
)
4426 f
->dump_unsigned("snap", *p
);
4428 f
->open_array_section("reqids");
4429 for (vector
<pair
<osd_reqid_t
, version_t
> >::const_iterator p
= reqids
.begin();
4432 f
->open_object_section("extra_reqid");
4433 f
->dump_stream("reqid") << p
->first
;
4434 f
->dump_stream("user_version") << p
->second
;
4440 // -- pg_create_t --
4442 void pg_create_t::encode(bufferlist
&bl
) const
4444 ENCODE_START(1, 1, bl
);
4445 ::encode(created
, bl
);
4446 ::encode(parent
, bl
);
4447 ::encode(split_bits
, bl
);
4451 void pg_create_t::decode(bufferlist::iterator
&bl
)
4453 DECODE_START(1, bl
);
4454 ::decode(created
, bl
);
4455 ::decode(parent
, bl
);
4456 ::decode(split_bits
, bl
);
4460 void pg_create_t::dump(Formatter
*f
) const
4462 f
->dump_unsigned("created", created
);
4463 f
->dump_stream("parent") << parent
;
4464 f
->dump_int("split_bits", split_bits
);
4467 void pg_create_t::generate_test_instances(list
<pg_create_t
*>& o
)
4469 o
.push_back(new pg_create_t
);
4470 o
.push_back(new pg_create_t(1, pg_t(3, 4, -1), 2));
4474 // -- pg_hit_set_info_t --
4476 void pg_hit_set_info_t::encode(bufferlist
& bl
) const
4478 ENCODE_START(2, 1, bl
);
4479 ::encode(begin
, bl
);
4481 ::encode(version
, bl
);
4482 ::encode(using_gmt
, bl
);
4486 void pg_hit_set_info_t::decode(bufferlist::iterator
& p
)
4491 ::decode(version
, p
);
4492 if (struct_v
>= 2) {
4493 ::decode(using_gmt
, p
);
4500 void pg_hit_set_info_t::dump(Formatter
*f
) const
4502 f
->dump_stream("begin") << begin
;
4503 f
->dump_stream("end") << end
;
4504 f
->dump_stream("version") << version
;
4505 f
->dump_stream("using_gmt") << using_gmt
;
4508 void pg_hit_set_info_t::generate_test_instances(list
<pg_hit_set_info_t
*>& ls
)
4510 ls
.push_back(new pg_hit_set_info_t
);
4511 ls
.push_back(new pg_hit_set_info_t
);
4512 ls
.back()->begin
= utime_t(1, 2);
4513 ls
.back()->end
= utime_t(3, 4);
4517 // -- pg_hit_set_history_t --
4519 void pg_hit_set_history_t::encode(bufferlist
& bl
) const
4521 ENCODE_START(1, 1, bl
);
4522 ::encode(current_last_update
, bl
);
4524 utime_t dummy_stamp
;
4525 ::encode(dummy_stamp
, bl
);
4528 pg_hit_set_info_t dummy_info
;
4529 ::encode(dummy_info
, bl
);
4531 ::encode(history
, bl
);
4535 void pg_hit_set_history_t::decode(bufferlist::iterator
& p
)
4538 ::decode(current_last_update
, p
);
4540 utime_t dummy_stamp
;
4541 ::decode(dummy_stamp
, p
);
4544 pg_hit_set_info_t dummy_info
;
4545 ::decode(dummy_info
, p
);
4547 ::decode(history
, p
);
4551 void pg_hit_set_history_t::dump(Formatter
*f
) const
4553 f
->dump_stream("current_last_update") << current_last_update
;
4554 f
->open_array_section("history");
4555 for (list
<pg_hit_set_info_t
>::const_iterator p
= history
.begin();
4556 p
!= history
.end(); ++p
) {
4557 f
->open_object_section("info");
4564 void pg_hit_set_history_t::generate_test_instances(list
<pg_hit_set_history_t
*>& ls
)
4566 ls
.push_back(new pg_hit_set_history_t
);
4567 ls
.push_back(new pg_hit_set_history_t
);
4568 ls
.back()->current_last_update
= eversion_t(1, 2);
4569 ls
.back()->history
.push_back(pg_hit_set_info_t());
4572 // -- osd_peer_stat_t --
4574 void osd_peer_stat_t::encode(bufferlist
& bl
) const
4576 ENCODE_START(1, 1, bl
);
4577 ::encode(stamp
, bl
);
4581 void osd_peer_stat_t::decode(bufferlist::iterator
& bl
)
4583 DECODE_START(1, bl
);
4584 ::decode(stamp
, bl
);
4588 void osd_peer_stat_t::dump(Formatter
*f
) const
4590 f
->dump_stream("stamp") << stamp
;
4593 void osd_peer_stat_t::generate_test_instances(list
<osd_peer_stat_t
*>& o
)
4595 o
.push_back(new osd_peer_stat_t
);
4596 o
.push_back(new osd_peer_stat_t
);
4597 o
.back()->stamp
= utime_t(1, 2);
4600 ostream
& operator<<(ostream
& out
, const osd_peer_stat_t
&stat
)
4602 return out
<< "stat(" << stat
.stamp
<< ")";
4606 // -- OSDSuperblock --
4608 void OSDSuperblock::encode(bufferlist
&bl
) const
4610 ENCODE_START(8, 5, bl
);
4611 ::encode(cluster_fsid
, bl
);
4612 ::encode(whoami
, bl
);
4613 ::encode(current_epoch
, bl
);
4614 ::encode(oldest_map
, bl
);
4615 ::encode(newest_map
, bl
);
4616 ::encode(weight
, bl
);
4617 compat_features
.encode(bl
);
4618 ::encode(clean_thru
, bl
);
4619 ::encode(mounted
, bl
);
4620 ::encode(osd_fsid
, bl
);
4621 ::encode((epoch_t
)0, bl
); // epoch_t last_epoch_marked_full
4622 ::encode((uint32_t)0, bl
); // map<int64_t,epoch_t> pool_last_epoch_marked_full
4626 void OSDSuperblock::decode(bufferlist::iterator
&bl
)
4628 DECODE_START_LEGACY_COMPAT_LEN(8, 5, 5, bl
);
4631 ::decode(magic
, bl
);
4633 ::decode(cluster_fsid
, bl
);
4634 ::decode(whoami
, bl
);
4635 ::decode(current_epoch
, bl
);
4636 ::decode(oldest_map
, bl
);
4637 ::decode(newest_map
, bl
);
4638 ::decode(weight
, bl
);
4639 if (struct_v
>= 2) {
4640 compat_features
.decode(bl
);
4641 } else { //upgrade it!
4642 compat_features
.incompat
.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE
);
4644 ::decode(clean_thru
, bl
);
4645 ::decode(mounted
, bl
);
4647 ::decode(osd_fsid
, bl
);
4648 if (struct_v
>= 6) {
4649 epoch_t last_map_marked_full
;
4650 ::decode(last_map_marked_full
, bl
);
4652 if (struct_v
>= 7) {
4653 map
<int64_t,epoch_t
> pool_last_map_marked_full
;
4654 ::decode(pool_last_map_marked_full
, bl
);
4659 void OSDSuperblock::dump(Formatter
*f
) const
4661 f
->dump_stream("cluster_fsid") << cluster_fsid
;
4662 f
->dump_stream("osd_fsid") << osd_fsid
;
4663 f
->dump_int("whoami", whoami
);
4664 f
->dump_int("current_epoch", current_epoch
);
4665 f
->dump_int("oldest_map", oldest_map
);
4666 f
->dump_int("newest_map", newest_map
);
4667 f
->dump_float("weight", weight
);
4668 f
->open_object_section("compat");
4669 compat_features
.dump(f
);
4671 f
->dump_int("clean_thru", clean_thru
);
4672 f
->dump_int("last_epoch_mounted", mounted
);
4675 void OSDSuperblock::generate_test_instances(list
<OSDSuperblock
*>& o
)
4678 o
.push_back(new OSDSuperblock(z
));
4679 memset(&z
.cluster_fsid
, 1, sizeof(z
.cluster_fsid
));
4680 memset(&z
.osd_fsid
, 2, sizeof(z
.osd_fsid
));
4682 z
.current_epoch
= 4;
4687 o
.push_back(new OSDSuperblock(z
));
4688 o
.push_back(new OSDSuperblock(z
));
4693 void SnapSet::encode(bufferlist
& bl
) const
4695 ENCODE_START(3, 2, bl
);
4697 ::encode(head_exists
, bl
);
4698 ::encode(snaps
, bl
);
4699 ::encode(clones
, bl
);
4700 ::encode(clone_overlap
, bl
);
4701 ::encode(clone_size
, bl
);
4702 ::encode(clone_snaps
, bl
);
4706 void SnapSet::decode(bufferlist::iterator
& bl
)
4708 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl
);
4710 ::decode(head_exists
, bl
);
4711 ::decode(snaps
, bl
);
4712 ::decode(clones
, bl
);
4713 ::decode(clone_overlap
, bl
);
4714 ::decode(clone_size
, bl
);
4715 if (struct_v
>= 3) {
4716 ::decode(clone_snaps
, bl
);
4718 clone_snaps
.clear();
4723 void SnapSet::dump(Formatter
*f
) const
4725 SnapContext
sc(seq
, snaps
);
4726 f
->open_object_section("snap_context");
4729 f
->dump_int("head_exists", head_exists
);
4730 f
->open_array_section("clones");
4731 for (vector
<snapid_t
>::const_iterator p
= clones
.begin(); p
!= clones
.end(); ++p
) {
4732 f
->open_object_section("clone");
4733 f
->dump_unsigned("snap", *p
);
4734 f
->dump_unsigned("size", clone_size
.find(*p
)->second
);
4735 f
->dump_stream("overlap") << clone_overlap
.find(*p
)->second
;
4736 auto q
= clone_snaps
.find(*p
);
4737 if (q
!= clone_snaps
.end()) {
4738 f
->open_array_section("snaps");
4739 for (auto s
: q
->second
) {
4740 f
->dump_unsigned("snap", s
);
4749 void SnapSet::generate_test_instances(list
<SnapSet
*>& o
)
4751 o
.push_back(new SnapSet
);
4752 o
.push_back(new SnapSet
);
4753 o
.back()->head_exists
= true;
4754 o
.back()->seq
= 123;
4755 o
.back()->snaps
.push_back(123);
4756 o
.back()->snaps
.push_back(12);
4757 o
.push_back(new SnapSet
);
4758 o
.back()->head_exists
= true;
4759 o
.back()->seq
= 123;
4760 o
.back()->snaps
.push_back(123);
4761 o
.back()->snaps
.push_back(12);
4762 o
.back()->clones
.push_back(12);
4763 o
.back()->clone_size
[12] = 12345;
4764 o
.back()->clone_overlap
[12];
4765 o
.back()->clone_snaps
[12] = {12, 10, 8};
4768 ostream
& operator<<(ostream
& out
, const SnapSet
& cs
)
4770 if (cs
.is_legacy()) {
4771 out
<< cs
.seq
<< "=" << cs
.snaps
<< ":"
4773 << (cs
.head_exists
? "+head":"");
4774 if (!cs
.clone_snaps
.empty()) {
4775 out
<< "+stray_clone_snaps=" << cs
.clone_snaps
;
4779 return out
<< cs
.seq
<< "=" << cs
.snaps
<< ":"
4784 void SnapSet::from_snap_set(const librados::snap_set_t
& ss
, bool legacy
)
4786 // NOTE: our reconstruction of snaps (and the snapc) is not strictly
4787 // correct: it will not include snaps that still logically exist
4788 // but for which there was no clone that is defined. For all
4789 // practical purposes this doesn't matter, since we only use that
4790 // information to clone on the OSD, and we have already moved
4791 // forward past that part of the object history.
4794 set
<snapid_t
> _snaps
;
4795 set
<snapid_t
> _clones
;
4796 head_exists
= false;
4797 for (vector
<librados::clone_info_t
>::const_iterator p
= ss
.clones
.begin();
4798 p
!= ss
.clones
.end();
4800 if (p
->cloneid
== librados::SNAP_HEAD
) {
4803 _clones
.insert(p
->cloneid
);
4804 _snaps
.insert(p
->snaps
.begin(), p
->snaps
.end());
4805 clone_size
[p
->cloneid
] = p
->size
;
4806 clone_overlap
[p
->cloneid
]; // the entry must exist, even if it's empty.
4807 for (vector
<pair
<uint64_t, uint64_t> >::const_iterator q
=
4808 p
->overlap
.begin(); q
!= p
->overlap
.end(); ++q
)
4809 clone_overlap
[p
->cloneid
].insert(q
->first
, q
->second
);
4811 // p->snaps is ascending; clone_snaps is descending
4812 vector
<snapid_t
>& v
= clone_snaps
[p
->cloneid
];
4813 for (auto q
= p
->snaps
.rbegin(); q
!= p
->snaps
.rend(); ++q
) {
4822 clones
.reserve(_clones
.size());
4823 for (set
<snapid_t
>::iterator p
= _clones
.begin(); p
!= _clones
.end(); ++p
)
4824 clones
.push_back(*p
);
4828 snaps
.reserve(_snaps
.size());
4829 for (set
<snapid_t
>::reverse_iterator p
= _snaps
.rbegin();
4830 p
!= _snaps
.rend(); ++p
)
4831 snaps
.push_back(*p
);
4834 uint64_t SnapSet::get_clone_bytes(snapid_t clone
) const
4836 assert(clone_size
.count(clone
));
4837 uint64_t size
= clone_size
.find(clone
)->second
;
4838 assert(clone_overlap
.count(clone
));
4839 const interval_set
<uint64_t> &overlap
= clone_overlap
.find(clone
)->second
;
4840 for (interval_set
<uint64_t>::const_iterator i
= overlap
.begin();
4843 assert(size
>= i
.get_len());
4844 size
-= i
.get_len();
4849 void SnapSet::filter(const pg_pool_t
&pinfo
)
4851 vector
<snapid_t
> oldsnaps
;
4852 oldsnaps
.swap(snaps
);
4853 for (vector
<snapid_t
>::const_iterator i
= oldsnaps
.begin();
4854 i
!= oldsnaps
.end();
4856 if (!pinfo
.is_removed_snap(*i
))
4857 snaps
.push_back(*i
);
4861 SnapSet
SnapSet::get_filtered(const pg_pool_t
&pinfo
) const
4868 // -- watch_info_t --
4870 void watch_info_t::encode(bufferlist
& bl
, uint64_t features
) const
4872 ENCODE_START(4, 3, bl
);
4873 ::encode(cookie
, bl
);
4874 ::encode(timeout_seconds
, bl
);
4875 ::encode(addr
, bl
, features
);
4879 void watch_info_t::decode(bufferlist::iterator
& bl
)
4881 DECODE_START_LEGACY_COMPAT_LEN(4, 3, 3, bl
);
4882 ::decode(cookie
, bl
);
4887 ::decode(timeout_seconds
, bl
);
4888 if (struct_v
>= 4) {
4894 void watch_info_t::dump(Formatter
*f
) const
4896 f
->dump_unsigned("cookie", cookie
);
4897 f
->dump_unsigned("timeout_seconds", timeout_seconds
);
4898 f
->open_object_section("addr");
4903 void watch_info_t::generate_test_instances(list
<watch_info_t
*>& o
)
4905 o
.push_back(new watch_info_t
);
4906 o
.push_back(new watch_info_t
);
4907 o
.back()->cookie
= 123;
4908 o
.back()->timeout_seconds
= 99;
4910 ea
.set_type(entity_addr_t::TYPE_LEGACY
);
4912 ea
.set_family(AF_INET
);
4913 ea
.set_in4_quad(0, 127);
4914 ea
.set_in4_quad(1, 0);
4915 ea
.set_in4_quad(2, 1);
4916 ea
.set_in4_quad(3, 2);
4918 o
.back()->addr
= ea
;
4922 // -- object_info_t --
4924 void object_info_t::copy_user_bits(const object_info_t
& other
)
4926 // these bits are copied from head->clone.
4928 mtime
= other
.mtime
;
4929 local_mtime
= other
.local_mtime
;
4930 last_reqid
= other
.last_reqid
;
4931 truncate_seq
= other
.truncate_seq
;
4932 truncate_size
= other
.truncate_size
;
4933 flags
= other
.flags
;
4934 user_version
= other
.user_version
;
4935 data_digest
= other
.data_digest
;
4936 omap_digest
= other
.omap_digest
;
4939 ps_t
object_info_t::legacy_object_locator_to_ps(const object_t
&oid
,
4940 const object_locator_t
&loc
) {
4942 if (loc
.key
.length())
4943 // Hack, we don't have the osd map, so we don't really know the hash...
4944 ps
= ceph_str_hash(CEPH_STR_HASH_RJENKINS
, loc
.key
.c_str(),
4947 ps
= ceph_str_hash(CEPH_STR_HASH_RJENKINS
, oid
.name
.c_str(),
4952 void object_info_t::encode(bufferlist
& bl
, uint64_t features
) const
4954 object_locator_t
myoloc(soid
);
4955 map
<entity_name_t
, watch_info_t
> old_watchers
;
4956 for (map
<pair
<uint64_t, entity_name_t
>, watch_info_t
>::const_iterator i
=
4958 i
!= watchers
.end();
4960 old_watchers
.insert(make_pair(i
->first
.second
, i
->second
));
4962 ENCODE_START(16, 8, bl
);
4964 ::encode(myoloc
, bl
); //Retained for compatibility
4965 ::encode((__u32
)0, bl
); // was category, no longer used
4966 ::encode(version
, bl
);
4967 ::encode(prior_version
, bl
);
4968 ::encode(last_reqid
, bl
);
4970 ::encode(mtime
, bl
);
4971 if (soid
.snap
== CEPH_NOSNAP
)
4972 ::encode(osd_reqid_t(), bl
); // used to be wrlock_by
4974 ::encode(legacy_snaps
, bl
);
4975 ::encode(truncate_seq
, bl
);
4976 ::encode(truncate_size
, bl
);
4977 ::encode(is_lost(), bl
);
4978 ::encode(old_watchers
, bl
, features
);
4979 /* shenanigans to avoid breaking backwards compatibility in the disk format.
4980 * When we can, switch this out for simply putting the version_t on disk. */
4981 eversion_t
user_eversion(0, user_version
);
4982 ::encode(user_eversion
, bl
);
4983 ::encode(test_flag(FLAG_USES_TMAP
), bl
);
4984 ::encode(watchers
, bl
, features
);
4985 __u32 _flags
= flags
;
4986 ::encode(_flags
, bl
);
4987 ::encode(local_mtime
, bl
);
4988 ::encode(data_digest
, bl
);
4989 ::encode(omap_digest
, bl
);
4990 ::encode(expected_object_size
, bl
);
4991 ::encode(expected_write_size
, bl
);
4992 ::encode(alloc_hint_flags
, bl
);
4996 void object_info_t::decode(bufferlist::iterator
& bl
)
4998 object_locator_t myoloc
;
4999 DECODE_START_LEGACY_COMPAT_LEN(16, 8, 8, bl
);
5000 map
<entity_name_t
, watch_info_t
> old_watchers
;
5002 ::decode(myoloc
, bl
);
5005 ::decode(category
, bl
); // no longer used
5007 ::decode(version
, bl
);
5008 ::decode(prior_version
, bl
);
5009 ::decode(last_reqid
, bl
);
5011 ::decode(mtime
, bl
);
5012 if (soid
.snap
== CEPH_NOSNAP
) {
5013 osd_reqid_t wrlock_by
;
5014 ::decode(wrlock_by
, bl
);
5016 ::decode(legacy_snaps
, bl
);
5018 ::decode(truncate_seq
, bl
);
5019 ::decode(truncate_size
, bl
);
5021 // if this is struct_v >= 13, we will overwrite this
5022 // below since this field is just here for backwards
5028 ::decode(old_watchers
, bl
);
5029 eversion_t user_eversion
;
5030 ::decode(user_eversion
, bl
);
5031 user_version
= user_eversion
.version
;
5033 if (struct_v
>= 9) {
5034 bool uses_tmap
= false;
5035 ::decode(uses_tmap
, bl
);
5037 set_flag(FLAG_USES_TMAP
);
5039 set_flag(FLAG_USES_TMAP
);
5042 soid
.pool
= myoloc
.pool
;
5043 if (struct_v
>= 11) {
5044 ::decode(watchers
, bl
);
5046 for (map
<entity_name_t
, watch_info_t
>::iterator i
= old_watchers
.begin();
5047 i
!= old_watchers
.end();
5051 make_pair(i
->second
.cookie
, i
->first
), i
->second
));
5054 if (struct_v
>= 13) {
5056 ::decode(_flags
, bl
);
5057 flags
= (flag_t
)_flags
;
5059 if (struct_v
>= 14) {
5060 ::decode(local_mtime
, bl
);
5062 local_mtime
= utime_t();
5064 if (struct_v
>= 15) {
5065 ::decode(data_digest
, bl
);
5066 ::decode(omap_digest
, bl
);
5068 data_digest
= omap_digest
= -1;
5069 clear_flag(FLAG_DATA_DIGEST
);
5070 clear_flag(FLAG_OMAP_DIGEST
);
5072 if (struct_v
>= 16) {
5073 ::decode(expected_object_size
, bl
);
5074 ::decode(expected_write_size
, bl
);
5075 ::decode(alloc_hint_flags
, bl
);
5077 expected_object_size
= 0;
5078 expected_write_size
= 0;
5079 alloc_hint_flags
= 0;
5084 void object_info_t::dump(Formatter
*f
) const
5086 f
->open_object_section("oid");
5089 f
->dump_stream("version") << version
;
5090 f
->dump_stream("prior_version") << prior_version
;
5091 f
->dump_stream("last_reqid") << last_reqid
;
5092 f
->dump_unsigned("user_version", user_version
);
5093 f
->dump_unsigned("size", size
);
5094 f
->dump_stream("mtime") << mtime
;
5095 f
->dump_stream("local_mtime") << local_mtime
;
5096 f
->dump_unsigned("lost", (int)is_lost());
5097 f
->dump_unsigned("flags", (int)flags
);
5098 f
->open_array_section("legacy_snaps");
5099 for (auto s
: legacy_snaps
) {
5100 f
->dump_unsigned("snap", s
);
5103 f
->dump_unsigned("truncate_seq", truncate_seq
);
5104 f
->dump_unsigned("truncate_size", truncate_size
);
5105 f
->dump_unsigned("data_digest", data_digest
);
5106 f
->dump_unsigned("omap_digest", omap_digest
);
5107 f
->dump_unsigned("expected_object_size", expected_object_size
);
5108 f
->dump_unsigned("expected_write_size", expected_write_size
);
5109 f
->dump_unsigned("alloc_hint_flags", alloc_hint_flags
);
5110 f
->open_object_section("watchers");
5111 for (map
<pair
<uint64_t, entity_name_t
>,watch_info_t
>::const_iterator p
=
5112 watchers
.begin(); p
!= watchers
.end(); ++p
) {
5114 ss
<< p
->first
.second
;
5115 f
->open_object_section(ss
.str().c_str());
5122 void object_info_t::generate_test_instances(list
<object_info_t
*>& o
)
5124 o
.push_back(new object_info_t());
5130 ostream
& operator<<(ostream
& out
, const object_info_t
& oi
)
5132 out
<< oi
.soid
<< "(" << oi
.version
5133 << " " << oi
.last_reqid
;
5134 if (oi
.soid
.snap
!= CEPH_NOSNAP
&& !oi
.legacy_snaps
.empty())
5135 out
<< " " << oi
.legacy_snaps
;
5137 out
<< " " << oi
.get_flag_string();
5138 out
<< " s " << oi
.size
;
5139 out
<< " uv " << oi
.user_version
;
5140 if (oi
.is_data_digest())
5141 out
<< " dd " << std::hex
<< oi
.data_digest
<< std::dec
;
5142 if (oi
.is_omap_digest())
5143 out
<< " od " << std::hex
<< oi
.omap_digest
<< std::dec
;
5144 out
<< " alloc_hint [" << oi
.expected_object_size
5145 << " " << oi
.expected_write_size
5146 << " " << oi
.alloc_hint_flags
<< "]";
5152 // -- ObjectRecovery --
5153 void ObjectRecoveryProgress::encode(bufferlist
&bl
) const
5155 ENCODE_START(1, 1, bl
);
5156 ::encode(first
, bl
);
5157 ::encode(data_complete
, bl
);
5158 ::encode(data_recovered_to
, bl
);
5159 ::encode(omap_recovered_to
, bl
);
5160 ::encode(omap_complete
, bl
);
5164 void ObjectRecoveryProgress::decode(bufferlist::iterator
&bl
)
5166 DECODE_START(1, bl
);
5167 ::decode(first
, bl
);
5168 ::decode(data_complete
, bl
);
5169 ::decode(data_recovered_to
, bl
);
5170 ::decode(omap_recovered_to
, bl
);
5171 ::decode(omap_complete
, bl
);
5175 ostream
&operator<<(ostream
&out
, const ObjectRecoveryProgress
&prog
)
5177 return prog
.print(out
);
5180 void ObjectRecoveryProgress::generate_test_instances(
5181 list
<ObjectRecoveryProgress
*>& o
)
5183 o
.push_back(new ObjectRecoveryProgress
);
5184 o
.back()->first
= false;
5185 o
.back()->data_complete
= true;
5186 o
.back()->omap_complete
= true;
5187 o
.back()->data_recovered_to
= 100;
5189 o
.push_back(new ObjectRecoveryProgress
);
5190 o
.back()->first
= true;
5191 o
.back()->data_complete
= false;
5192 o
.back()->omap_complete
= false;
5193 o
.back()->data_recovered_to
= 0;
5196 ostream
&ObjectRecoveryProgress::print(ostream
&out
) const
5198 return out
<< "ObjectRecoveryProgress("
5199 << ( first
? "" : "!" ) << "first, "
5200 << "data_recovered_to:" << data_recovered_to
5201 << ", data_complete:" << ( data_complete
? "true" : "false" )
5202 << ", omap_recovered_to:" << omap_recovered_to
5203 << ", omap_complete:" << ( omap_complete
? "true" : "false" )
5207 void ObjectRecoveryProgress::dump(Formatter
*f
) const
5209 f
->dump_int("first?", first
);
5210 f
->dump_int("data_complete?", data_complete
);
5211 f
->dump_unsigned("data_recovered_to", data_recovered_to
);
5212 f
->dump_int("omap_complete?", omap_complete
);
5213 f
->dump_string("omap_recovered_to", omap_recovered_to
);
5216 void ObjectRecoveryInfo::encode(bufferlist
&bl
, uint64_t features
) const
5218 ENCODE_START(2, 1, bl
);
5220 ::encode(version
, bl
);
5222 ::encode(oi
, bl
, features
);
5224 ::encode(copy_subset
, bl
);
5225 ::encode(clone_subset
, bl
);
5229 void ObjectRecoveryInfo::decode(bufferlist::iterator
&bl
,
5232 DECODE_START(2, bl
);
5234 ::decode(version
, bl
);
5238 ::decode(copy_subset
, bl
);
5239 ::decode(clone_subset
, bl
);
5243 if (!soid
.is_max() && soid
.pool
== -1)
5245 map
<hobject_t
, interval_set
<uint64_t>> tmp
;
5246 tmp
.swap(clone_subset
);
5247 for (map
<hobject_t
, interval_set
<uint64_t>>::iterator i
= tmp
.begin();
5250 hobject_t
first(i
->first
);
5251 if (!first
.is_max() && first
.pool
== -1)
5253 clone_subset
[first
].swap(i
->second
);
5258 void ObjectRecoveryInfo::generate_test_instances(
5259 list
<ObjectRecoveryInfo
*>& o
)
5261 o
.push_back(new ObjectRecoveryInfo
);
5262 o
.back()->soid
= hobject_t(sobject_t("key", CEPH_NOSNAP
));
5263 o
.back()->version
= eversion_t(0,0);
5264 o
.back()->size
= 100;
5268 void ObjectRecoveryInfo::dump(Formatter
*f
) const
5270 f
->dump_stream("object") << soid
;
5271 f
->dump_stream("at_version") << version
;
5272 f
->dump_stream("size") << size
;
5274 f
->open_object_section("object_info");
5279 f
->open_object_section("snapset");
5283 f
->dump_stream("copy_subset") << copy_subset
;
5284 f
->dump_stream("clone_subset") << clone_subset
;
5287 ostream
& operator<<(ostream
& out
, const ObjectRecoveryInfo
&inf
)
5289 return inf
.print(out
);
5292 ostream
&ObjectRecoveryInfo::print(ostream
&out
) const
5294 return out
<< "ObjectRecoveryInfo("
5295 << soid
<< "@" << version
5296 << ", size: " << size
5297 << ", copy_subset: " << copy_subset
5298 << ", clone_subset: " << clone_subset
5299 << ", snapset: " << ss
5303 // -- PushReplyOp --
5304 void PushReplyOp::generate_test_instances(list
<PushReplyOp
*> &o
)
5306 o
.push_back(new PushReplyOp
);
5307 o
.push_back(new PushReplyOp
);
5308 o
.back()->soid
= hobject_t(sobject_t("asdf", 2));
5309 o
.push_back(new PushReplyOp
);
5310 o
.back()->soid
= hobject_t(sobject_t("asdf", CEPH_NOSNAP
));
5313 void PushReplyOp::encode(bufferlist
&bl
) const
5315 ENCODE_START(1, 1, bl
);
5320 void PushReplyOp::decode(bufferlist::iterator
&bl
)
5322 DECODE_START(1, bl
);
5327 void PushReplyOp::dump(Formatter
*f
) const
5329 f
->dump_stream("soid") << soid
;
5332 ostream
&PushReplyOp::print(ostream
&out
) const
5335 << "PushReplyOp(" << soid
5339 ostream
& operator<<(ostream
& out
, const PushReplyOp
&op
)
5341 return op
.print(out
);
5344 uint64_t PushReplyOp::cost(CephContext
*cct
) const
5347 return cct
->_conf
->osd_push_per_object_cost
+
5348 cct
->_conf
->osd_recovery_max_chunk
;
5352 void PullOp::generate_test_instances(list
<PullOp
*> &o
)
5354 o
.push_back(new PullOp
);
5355 o
.push_back(new PullOp
);
5356 o
.back()->soid
= hobject_t(sobject_t("asdf", 2));
5357 o
.back()->recovery_info
.version
= eversion_t(3, 10);
5358 o
.push_back(new PullOp
);
5359 o
.back()->soid
= hobject_t(sobject_t("asdf", CEPH_NOSNAP
));
5360 o
.back()->recovery_info
.version
= eversion_t(0, 0);
5363 void PullOp::encode(bufferlist
&bl
, uint64_t features
) const
5365 ENCODE_START(1, 1, bl
);
5367 ::encode(recovery_info
, bl
, features
);
5368 ::encode(recovery_progress
, bl
);
5372 void PullOp::decode(bufferlist::iterator
&bl
)
5374 DECODE_START(1, bl
);
5376 ::decode(recovery_info
, bl
);
5377 ::decode(recovery_progress
, bl
);
5381 void PullOp::dump(Formatter
*f
) const
5383 f
->dump_stream("soid") << soid
;
5385 f
->open_object_section("recovery_info");
5386 recovery_info
.dump(f
);
5390 f
->open_object_section("recovery_progress");
5391 recovery_progress
.dump(f
);
5396 ostream
&PullOp::print(ostream
&out
) const
5399 << "PullOp(" << soid
5400 << ", recovery_info: " << recovery_info
5401 << ", recovery_progress: " << recovery_progress
5405 ostream
& operator<<(ostream
& out
, const PullOp
&op
)
5407 return op
.print(out
);
5410 uint64_t PullOp::cost(CephContext
*cct
) const
5412 return cct
->_conf
->osd_push_per_object_cost
+
5413 cct
->_conf
->osd_recovery_max_chunk
;
5417 void PushOp::generate_test_instances(list
<PushOp
*> &o
)
5419 o
.push_back(new PushOp
);
5420 o
.push_back(new PushOp
);
5421 o
.back()->soid
= hobject_t(sobject_t("asdf", 2));
5422 o
.back()->version
= eversion_t(3, 10);
5423 o
.push_back(new PushOp
);
5424 o
.back()->soid
= hobject_t(sobject_t("asdf", CEPH_NOSNAP
));
5425 o
.back()->version
= eversion_t(0, 0);
5428 void PushOp::encode(bufferlist
&bl
, uint64_t features
) const
5430 ENCODE_START(1, 1, bl
);
5432 ::encode(version
, bl
);
5434 ::encode(data_included
, bl
);
5435 ::encode(omap_header
, bl
);
5436 ::encode(omap_entries
, bl
);
5437 ::encode(attrset
, bl
);
5438 ::encode(recovery_info
, bl
, features
);
5439 ::encode(after_progress
, bl
);
5440 ::encode(before_progress
, bl
);
5444 void PushOp::decode(bufferlist::iterator
&bl
)
5446 DECODE_START(1, bl
);
5448 ::decode(version
, bl
);
5450 ::decode(data_included
, bl
);
5451 ::decode(omap_header
, bl
);
5452 ::decode(omap_entries
, bl
);
5453 ::decode(attrset
, bl
);
5454 ::decode(recovery_info
, bl
);
5455 ::decode(after_progress
, bl
);
5456 ::decode(before_progress
, bl
);
5460 void PushOp::dump(Formatter
*f
) const
5462 f
->dump_stream("soid") << soid
;
5463 f
->dump_stream("version") << version
;
5464 f
->dump_int("data_len", data
.length());
5465 f
->dump_stream("data_included") << data_included
;
5466 f
->dump_int("omap_header_len", omap_header
.length());
5467 f
->dump_int("omap_entries_len", omap_entries
.size());
5468 f
->dump_int("attrset_len", attrset
.size());
5470 f
->open_object_section("recovery_info");
5471 recovery_info
.dump(f
);
5475 f
->open_object_section("after_progress");
5476 after_progress
.dump(f
);
5480 f
->open_object_section("before_progress");
5481 before_progress
.dump(f
);
5486 ostream
&PushOp::print(ostream
&out
) const
5489 << "PushOp(" << soid
5490 << ", version: " << version
5491 << ", data_included: " << data_included
5492 << ", data_size: " << data
.length()
5493 << ", omap_header_size: " << omap_header
.length()
5494 << ", omap_entries_size: " << omap_entries
.size()
5495 << ", attrset_size: " << attrset
.size()
5496 << ", recovery_info: " << recovery_info
5497 << ", after_progress: " << after_progress
5498 << ", before_progress: " << before_progress
5502 ostream
& operator<<(ostream
& out
, const PushOp
&op
)
5504 return op
.print(out
);
5507 uint64_t PushOp::cost(CephContext
*cct
) const
5509 uint64_t cost
= data_included
.size();
5510 for (map
<string
, bufferlist
>::const_iterator i
=
5511 omap_entries
.begin();
5512 i
!= omap_entries
.end();
5514 cost
+= i
->second
.length();
5516 cost
+= cct
->_conf
->osd_push_per_object_cost
;
5522 void ScrubMap::merge_incr(const ScrubMap
&l
)
5524 assert(valid_through
== l
.incr_since
);
5525 valid_through
= l
.valid_through
;
5527 for (map
<hobject_t
,object
>::const_iterator p
= l
.objects
.begin();
5528 p
!= l
.objects
.end();
5530 if (p
->second
.negative
) {
5531 map
<hobject_t
,object
>::iterator q
= objects
.find(p
->first
);
5532 if (q
!= objects
.end()) {
5536 objects
[p
->first
] = p
->second
;
5541 void ScrubMap::encode(bufferlist
& bl
) const
5543 ENCODE_START(3, 2, bl
);
5544 ::encode(objects
, bl
);
5545 ::encode((__u32
)0, bl
); // used to be attrs; now deprecated
5546 bufferlist old_logbl
; // not used
5547 ::encode(old_logbl
, bl
);
5548 ::encode(valid_through
, bl
);
5549 ::encode(incr_since
, bl
);
5553 void ScrubMap::decode(bufferlist::iterator
& bl
, int64_t pool
)
5555 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl
);
5556 ::decode(objects
, bl
);
5558 map
<string
,string
> attrs
; // deprecated
5559 ::decode(attrs
, bl
);
5561 bufferlist old_logbl
; // not used
5562 ::decode(old_logbl
, bl
);
5563 ::decode(valid_through
, bl
);
5564 ::decode(incr_since
, bl
);
5567 // handle hobject_t upgrade
5569 map
<hobject_t
, object
> tmp
;
5571 for (map
<hobject_t
, object
>::iterator i
= tmp
.begin();
5574 hobject_t
first(i
->first
);
5575 if (!first
.is_max() && first
.pool
== -1)
5577 objects
[first
] = i
->second
;
5582 void ScrubMap::dump(Formatter
*f
) const
5584 f
->dump_stream("valid_through") << valid_through
;
5585 f
->dump_stream("incremental_since") << incr_since
;
5586 f
->open_array_section("objects");
5587 for (map
<hobject_t
,object
>::const_iterator p
= objects
.begin(); p
!= objects
.end(); ++p
) {
5588 f
->open_object_section("object");
5589 f
->dump_string("name", p
->first
.oid
.name
);
5590 f
->dump_unsigned("hash", p
->first
.get_hash());
5591 f
->dump_string("key", p
->first
.get_key());
5592 f
->dump_int("snapid", p
->first
.snap
);
5599 void ScrubMap::generate_test_instances(list
<ScrubMap
*>& o
)
5601 o
.push_back(new ScrubMap
);
5602 o
.push_back(new ScrubMap
);
5603 o
.back()->valid_through
= eversion_t(1, 2);
5604 o
.back()->incr_since
= eversion_t(3, 4);
5606 object::generate_test_instances(obj
);
5607 o
.back()->objects
[hobject_t(object_t("foo"), "fookey", 123, 456, 0, "")] = *obj
.back();
5609 o
.back()->objects
[hobject_t(object_t("bar"), string(), 123, 456, 0, "")] = *obj
.back();
5612 // -- ScrubMap::object --
5614 void ScrubMap::object::encode(bufferlist
& bl
) const
5616 bool compat_read_error
= read_error
|| ec_hash_mismatch
|| ec_size_mismatch
;
5617 ENCODE_START(8, 7, bl
);
5619 ::encode(negative
, bl
);
5620 ::encode(attrs
, bl
);
5621 ::encode(digest
, bl
);
5622 ::encode(digest_present
, bl
);
5623 ::encode((uint32_t)0, bl
); // obsolete nlinks
5624 ::encode((uint32_t)0, bl
); // snapcolls
5625 ::encode(omap_digest
, bl
);
5626 ::encode(omap_digest_present
, bl
);
5627 ::encode(compat_read_error
, bl
);
5628 ::encode(stat_error
, bl
);
5629 ::encode(read_error
, bl
);
5630 ::encode(ec_hash_mismatch
, bl
);
5631 ::encode(ec_size_mismatch
, bl
);
5635 void ScrubMap::object::decode(bufferlist::iterator
& bl
)
5637 DECODE_START(8, bl
);
5639 bool tmp
, compat_read_error
= false;
5642 ::decode(attrs
, bl
);
5643 ::decode(digest
, bl
);
5645 digest_present
= tmp
;
5648 ::decode(nlinks
, bl
);
5649 set
<snapid_t
> snapcolls
;
5650 ::decode(snapcolls
, bl
);
5652 ::decode(omap_digest
, bl
);
5654 omap_digest_present
= tmp
;
5655 ::decode(compat_read_error
, bl
);
5658 if (struct_v
>= 8) {
5662 ec_hash_mismatch
= tmp
;
5664 ec_size_mismatch
= tmp
;
5666 // If older encoder found a read_error, set read_error
5667 if (compat_read_error
&& !read_error
&& !ec_hash_mismatch
&& !ec_size_mismatch
)
5672 void ScrubMap::object::dump(Formatter
*f
) const
5674 f
->dump_int("size", size
);
5675 f
->dump_int("negative", negative
);
5676 f
->open_array_section("attrs");
5677 for (map
<string
,bufferptr
>::const_iterator p
= attrs
.begin(); p
!= attrs
.end(); ++p
) {
5678 f
->open_object_section("attr");
5679 f
->dump_string("name", p
->first
);
5680 f
->dump_int("length", p
->second
.length());
5686 void ScrubMap::object::generate_test_instances(list
<object
*>& o
)
5688 o
.push_back(new object
);
5689 o
.push_back(new object
);
5690 o
.back()->negative
= true;
5691 o
.push_back(new object
);
5692 o
.back()->size
= 123;
5693 o
.back()->attrs
["foo"] = buffer::copy("foo", 3);
5694 o
.back()->attrs
["bar"] = buffer::copy("barval", 6);
5699 ostream
& operator<<(ostream
& out
, const OSDOp
& op
)
5701 out
<< ceph_osd_op_name(op
.op
.op
);
5702 if (ceph_osd_op_type_data(op
.op
.op
)) {
5705 case CEPH_OSD_OP_ASSERT_VER
:
5706 out
<< " v" << op
.op
.assert_ver
.ver
;
5708 case CEPH_OSD_OP_TRUNCATE
:
5709 out
<< " " << op
.op
.extent
.offset
;
5711 case CEPH_OSD_OP_MASKTRUNC
:
5712 case CEPH_OSD_OP_TRIMTRUNC
:
5713 out
<< " " << op
.op
.extent
.truncate_seq
<< "@"
5714 << (int64_t)op
.op
.extent
.truncate_size
;
5716 case CEPH_OSD_OP_ROLLBACK
:
5717 out
<< " " << snapid_t(op
.op
.snap
.snapid
);
5719 case CEPH_OSD_OP_WATCH
:
5720 out
<< " " << ceph_osd_watch_op_name(op
.op
.watch
.op
)
5721 << " cookie " << op
.op
.watch
.cookie
;
5722 if (op
.op
.watch
.gen
)
5723 out
<< " gen " << op
.op
.watch
.gen
;
5725 case CEPH_OSD_OP_NOTIFY
:
5726 case CEPH_OSD_OP_NOTIFY_ACK
:
5727 out
<< " cookie " << op
.op
.notify
.cookie
;
5729 case CEPH_OSD_OP_COPY_GET
:
5730 out
<< " max " << op
.op
.copy_get
.max
;
5732 case CEPH_OSD_OP_COPY_FROM
:
5733 out
<< " ver " << op
.op
.copy_from
.src_version
;
5735 case CEPH_OSD_OP_SETALLOCHINT
:
5736 out
<< " object_size " << op
.op
.alloc_hint
.expected_object_size
5737 << " write_size " << op
.op
.alloc_hint
.expected_write_size
;
5739 case CEPH_OSD_OP_READ
:
5740 case CEPH_OSD_OP_SPARSE_READ
:
5741 case CEPH_OSD_OP_SYNC_READ
:
5742 case CEPH_OSD_OP_WRITE
:
5743 case CEPH_OSD_OP_WRITEFULL
:
5744 case CEPH_OSD_OP_ZERO
:
5745 case CEPH_OSD_OP_APPEND
:
5746 case CEPH_OSD_OP_MAPEXT
:
5747 out
<< " " << op
.op
.extent
.offset
<< "~" << op
.op
.extent
.length
;
5748 if (op
.op
.extent
.truncate_seq
)
5749 out
<< " [" << op
.op
.extent
.truncate_seq
<< "@"
5750 << (int64_t)op
.op
.extent
.truncate_size
<< "]";
5752 out
<< " [" << ceph_osd_op_flag_string(op
.op
.flags
) << "]";
5754 // don't show any arg info
5757 } else if (ceph_osd_op_type_attr(op
.op
.op
)) {
5759 if (op
.op
.xattr
.name_len
&& op
.indata
.length()) {
5761 op
.indata
.write(0, op
.op
.xattr
.name_len
, out
);
5763 if (op
.op
.xattr
.value_len
)
5764 out
<< " (" << op
.op
.xattr
.value_len
<< ")";
5765 if (op
.op
.op
== CEPH_OSD_OP_CMPXATTR
)
5766 out
<< " op " << (int)op
.op
.xattr
.cmp_op
5767 << " mode " << (int)op
.op
.xattr
.cmp_mode
;
5768 } else if (ceph_osd_op_type_exec(op
.op
.op
)) {
5770 if (op
.op
.cls
.class_len
&& op
.indata
.length()) {
5772 op
.indata
.write(0, op
.op
.cls
.class_len
, out
);
5774 op
.indata
.write(op
.op
.cls
.class_len
, op
.op
.cls
.method_len
, out
);
5776 } else if (ceph_osd_op_type_pg(op
.op
.op
)) {
5778 case CEPH_OSD_OP_PGLS
:
5779 case CEPH_OSD_OP_PGLS_FILTER
:
5780 case CEPH_OSD_OP_PGNLS
:
5781 case CEPH_OSD_OP_PGNLS_FILTER
:
5782 out
<< " start_epoch " << op
.op
.pgls
.start_epoch
;
5784 case CEPH_OSD_OP_PG_HITSET_LS
:
5786 case CEPH_OSD_OP_PG_HITSET_GET
:
5787 out
<< " " << utime_t(op
.op
.hit_set_get
.stamp
);
5789 case CEPH_OSD_OP_SCRUBLS
:
5797 void OSDOp::split_osd_op_vector_in_data(vector
<OSDOp
>& ops
, bufferlist
& in
)
5799 bufferlist::iterator datap
= in
.begin();
5800 for (unsigned i
= 0; i
< ops
.size(); i
++) {
5801 if (ops
[i
].op
.payload_len
) {
5802 datap
.copy(ops
[i
].op
.payload_len
, ops
[i
].indata
);
5807 void OSDOp::merge_osd_op_vector_in_data(vector
<OSDOp
>& ops
, bufferlist
& out
)
5809 for (unsigned i
= 0; i
< ops
.size(); i
++) {
5810 if (ops
[i
].indata
.length()) {
5811 ops
[i
].op
.payload_len
= ops
[i
].indata
.length();
5812 out
.append(ops
[i
].indata
);
5817 void OSDOp::split_osd_op_vector_out_data(vector
<OSDOp
>& ops
, bufferlist
& in
)
5819 bufferlist::iterator datap
= in
.begin();
5820 for (unsigned i
= 0; i
< ops
.size(); i
++) {
5821 if (ops
[i
].op
.payload_len
) {
5822 datap
.copy(ops
[i
].op
.payload_len
, ops
[i
].outdata
);
5827 void OSDOp::merge_osd_op_vector_out_data(vector
<OSDOp
>& ops
, bufferlist
& out
)
5829 for (unsigned i
= 0; i
< ops
.size(); i
++) {
5830 if (ops
[i
].outdata
.length()) {
5831 ops
[i
].op
.payload_len
= ops
[i
].outdata
.length();
5832 out
.append(ops
[i
].outdata
);
5837 bool store_statfs_t::operator==(const store_statfs_t
& other
) const
5839 return total
== other
.total
5840 && available
== other
.available
5841 && allocated
== other
.allocated
5842 && stored
== other
.stored
5843 && compressed
== other
.compressed
5844 && compressed_allocated
== other
.compressed_allocated
5845 && compressed_original
== other
.compressed_original
;
5848 void store_statfs_t::dump(Formatter
*f
) const
5850 f
->dump_int("total", total
);
5851 f
->dump_int("available", available
);
5852 f
->dump_int("allocated", allocated
);
5853 f
->dump_int("stored", stored
);
5854 f
->dump_int("compressed", compressed
);
5855 f
->dump_int("compressed_allocated", compressed_allocated
);
5856 f
->dump_int("compressed_original", compressed_original
);
5859 ostream
& operator<<(ostream
& out
, const store_statfs_t
&s
)
5862 << "store_statfs(0x" << s
.available
5864 << ", stored 0x" << s
.stored
5865 << "/0x" << s
.allocated
5866 << ", compress 0x" << s
.compressed
5867 << "/0x" << s
.compressed_allocated
5868 << "/0x" << s
.compressed_original