1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2011 New Dream Network
7 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
9 * Author: Loic Dachary <loic@dachary.org>
11 * This is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License version 2.1, as published by the Free Software
14 * Foundation. See file COPYING.
18 #include <boost/assign/list_of.hpp>
20 #include "osd_types.h"
21 #include "include/ceph_features.h"
22 #include "include/stringify.h"
24 #include "crush/hash.h"
28 const char *ceph_osd_flag_name(unsigned flag
)
31 case CEPH_OSD_FLAG_ACK
: return "ack";
32 case CEPH_OSD_FLAG_ONNVRAM
: return "onnvram";
33 case CEPH_OSD_FLAG_ONDISK
: return "ondisk";
34 case CEPH_OSD_FLAG_RETRY
: return "retry";
35 case CEPH_OSD_FLAG_READ
: return "read";
36 case CEPH_OSD_FLAG_WRITE
: return "write";
37 case CEPH_OSD_FLAG_ORDERSNAP
: return "ordersnap";
38 case CEPH_OSD_FLAG_PEERSTAT_OLD
: return "peerstat_old";
39 case CEPH_OSD_FLAG_BALANCE_READS
: return "balance_reads";
40 case CEPH_OSD_FLAG_PARALLELEXEC
: return "parallelexec";
41 case CEPH_OSD_FLAG_PGOP
: return "pgop";
42 case CEPH_OSD_FLAG_EXEC
: return "exec";
43 case CEPH_OSD_FLAG_EXEC_PUBLIC
: return "exec_public";
44 case CEPH_OSD_FLAG_LOCALIZE_READS
: return "localize_reads";
45 case CEPH_OSD_FLAG_RWORDERED
: return "rwordered";
46 case CEPH_OSD_FLAG_IGNORE_CACHE
: return "ignore_cache";
47 case CEPH_OSD_FLAG_SKIPRWLOCKS
: return "skiprwlocks";
48 case CEPH_OSD_FLAG_IGNORE_OVERLAY
: return "ignore_overlay";
49 case CEPH_OSD_FLAG_FLUSH
: return "flush";
50 case CEPH_OSD_FLAG_MAP_SNAP_CLONE
: return "map_snap_clone";
51 case CEPH_OSD_FLAG_ENFORCE_SNAPC
: return "enforce_snapc";
52 case CEPH_OSD_FLAG_REDIRECTED
: return "redirected";
53 case CEPH_OSD_FLAG_KNOWN_REDIR
: return "known_if_redirected";
54 case CEPH_OSD_FLAG_FULL_TRY
: return "full_try";
55 case CEPH_OSD_FLAG_FULL_FORCE
: return "full_force";
56 case CEPH_OSD_FLAG_IGNORE_REDIRECT
: return "ignore_redirect";
57 default: return "???";
61 string
ceph_osd_flag_string(unsigned flags
)
64 for (unsigned i
=0; i
<32; ++i
) {
65 if (flags
& (1u<<i
)) {
68 s
+= ceph_osd_flag_name(1u << i
);
76 const char * ceph_osd_op_flag_name(unsigned flag
)
81 case CEPH_OSD_OP_FLAG_EXCL
:
84 case CEPH_OSD_OP_FLAG_FAILOK
:
87 case CEPH_OSD_OP_FLAG_FADVISE_RANDOM
:
88 name
= "fadvise_random";
90 case CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL
:
91 name
= "fadvise_sequential";
93 case CEPH_OSD_OP_FLAG_FADVISE_WILLNEED
:
94 name
= "favise_willneed";
96 case CEPH_OSD_OP_FLAG_FADVISE_DONTNEED
:
97 name
= "fadvise_dontneed";
99 case CEPH_OSD_OP_FLAG_FADVISE_NOCACHE
:
100 name
= "fadvise_nocache";
102 case CEPH_OSD_OP_FLAG_WITH_REFERENCE
:
103 name
= "with_reference";
105 case CEPH_OSD_OP_FLAG_BYPASS_CLEAN_CACHE
:
106 name
= "bypass_clean_cache";
115 string
ceph_osd_op_flag_string(unsigned flags
)
118 for (unsigned i
=0; i
<32; ++i
) {
119 if (flags
& (1u<<i
)) {
122 s
+= ceph_osd_op_flag_name(1u << i
);
130 string
ceph_osd_alloc_hint_flag_string(unsigned flags
)
133 for (unsigned i
=0; i
<32; ++i
) {
134 if (flags
& (1u<<i
)) {
137 s
+= ceph_osd_alloc_hint_flag_name(1u << i
);
145 void pg_shard_t::encode(bufferlist
&bl
) const
147 ENCODE_START(1, 1, bl
);
152 void pg_shard_t::decode(bufferlist::const_iterator
&bl
)
160 ostream
&operator<<(ostream
&lhs
, const pg_shard_t
&rhs
)
162 if (rhs
.is_undefined())
164 if (rhs
.shard
== shard_id_t::NO_SHARD
)
165 return lhs
<< rhs
.get_osd();
166 return lhs
<< rhs
.get_osd() << '(' << (unsigned)(rhs
.shard
) << ')';
169 void dump(Formatter
* f
, const osd_alerts_t
& alerts
)
171 for (auto& a
: alerts
) {
172 string s0
= " osd: ";
173 s0
+= stringify(a
.first
);
175 for (auto& aa
: a
.second
) {
181 f
->dump_string("alert", s
);
187 void osd_reqid_t::dump(Formatter
*f
) const
189 f
->dump_stream("name") << name
;
190 f
->dump_int("inc", inc
);
191 f
->dump_unsigned("tid", tid
);
194 void osd_reqid_t::generate_test_instances(list
<osd_reqid_t
*>& o
)
196 o
.push_back(new osd_reqid_t
);
197 o
.push_back(new osd_reqid_t(entity_name_t::CLIENT(123), 1, 45678));
200 // -- object_locator_t --
202 void object_locator_t::encode(bufferlist
& bl
) const
204 // verify that nobody's corrupted the locator
205 ceph_assert(hash
== -1 || key
.empty());
206 __u8 encode_compat
= 3;
207 ENCODE_START(6, encode_compat
, bl
);
209 int32_t preferred
= -1; // tell old code there is no preferred osd (-1).
210 encode(preferred
, bl
);
215 encode_compat
= std::max
<std::uint8_t>(encode_compat
, 6); // need to interpret the hash
216 ENCODE_FINISH_NEW_COMPAT(bl
, encode_compat
);
219 void object_locator_t::decode(bufferlist::const_iterator
& p
)
221 DECODE_START_LEGACY_COMPAT_LEN(6, 3, 3, p
);
231 decode(preferred
, p
);
241 // verify that nobody's corrupted the locator
242 ceph_assert(hash
== -1 || key
.empty());
245 void object_locator_t::dump(Formatter
*f
) const
247 f
->dump_int("pool", pool
);
248 f
->dump_string("key", key
);
249 f
->dump_string("namespace", nspace
);
250 f
->dump_int("hash", hash
);
253 void object_locator_t::generate_test_instances(list
<object_locator_t
*>& o
)
255 o
.push_back(new object_locator_t
);
256 o
.push_back(new object_locator_t(123));
257 o
.push_back(new object_locator_t(123, 876));
258 o
.push_back(new object_locator_t(1, "n2"));
259 o
.push_back(new object_locator_t(1234, "", "key"));
260 o
.push_back(new object_locator_t(12, "n1", "key2"));
263 // -- request_redirect_t --
264 void request_redirect_t::encode(bufferlist
& bl
) const
266 ENCODE_START(1, 1, bl
);
267 encode(redirect_locator
, bl
);
268 encode(redirect_object
, bl
);
269 // legacy of the removed osd_instructions member
270 encode((uint32_t)0, bl
);
274 void request_redirect_t::decode(bufferlist::const_iterator
& bl
)
277 uint32_t legacy_osd_instructions_len
;
278 decode(redirect_locator
, bl
);
279 decode(redirect_object
, bl
);
280 decode(legacy_osd_instructions_len
, bl
);
281 if (legacy_osd_instructions_len
) {
282 bl
.advance(legacy_osd_instructions_len
);
287 void request_redirect_t::dump(Formatter
*f
) const
289 f
->dump_string("object", redirect_object
);
290 f
->open_object_section("locator");
291 redirect_locator
.dump(f
);
292 f
->close_section(); // locator
295 void request_redirect_t::generate_test_instances(list
<request_redirect_t
*>& o
)
297 object_locator_t
loc(1, "redir_obj");
298 o
.push_back(new request_redirect_t());
299 o
.push_back(new request_redirect_t(loc
, 0));
300 o
.push_back(new request_redirect_t(loc
, "redir_obj"));
301 o
.push_back(new request_redirect_t(loc
));
304 void objectstore_perf_stat_t::dump(Formatter
*f
) const
306 // *_ms values just for compatibility.
307 f
->dump_float("commit_latency_ms", os_commit_latency_ns
/ 1000000.0);
308 f
->dump_float("apply_latency_ms", os_apply_latency_ns
/ 1000000.0);
309 f
->dump_unsigned("commit_latency_ns", os_commit_latency_ns
);
310 f
->dump_unsigned("apply_latency_ns", os_apply_latency_ns
);
313 void objectstore_perf_stat_t::encode(bufferlist
&bl
, uint64_t features
) const
315 uint8_t target_v
= 2;
316 if (!HAVE_FEATURE(features
, OS_PERF_STAT_NS
)) {
319 ENCODE_START(target_v
, target_v
, bl
);
321 encode(os_commit_latency_ns
, bl
);
322 encode(os_apply_latency_ns
, bl
);
324 constexpr auto NS_PER_MS
= std::chrono::nanoseconds(1ms
).count();
325 uint32_t commit_latency_ms
= os_commit_latency_ns
/ NS_PER_MS
;
326 uint32_t apply_latency_ms
= os_apply_latency_ns
/ NS_PER_MS
;
327 encode(commit_latency_ms
, bl
); // for compatibility with older monitor.
328 encode(apply_latency_ms
, bl
); // for compatibility with older monitor.
333 void objectstore_perf_stat_t::decode(bufferlist::const_iterator
&bl
)
337 decode(os_commit_latency_ns
, bl
);
338 decode(os_apply_latency_ns
, bl
);
340 uint32_t commit_latency_ms
;
341 uint32_t apply_latency_ms
;
342 decode(commit_latency_ms
, bl
);
343 decode(apply_latency_ms
, bl
);
344 constexpr auto NS_PER_MS
= std::chrono::nanoseconds(1ms
).count();
345 os_commit_latency_ns
= commit_latency_ms
* NS_PER_MS
;
346 os_apply_latency_ns
= apply_latency_ms
* NS_PER_MS
;
351 void objectstore_perf_stat_t::generate_test_instances(std::list
<objectstore_perf_stat_t
*>& o
)
353 o
.push_back(new objectstore_perf_stat_t());
354 o
.push_back(new objectstore_perf_stat_t());
355 o
.back()->os_commit_latency_ns
= 20000000;
356 o
.back()->os_apply_latency_ns
= 30000000;
360 void osd_stat_t::dump(Formatter
*f
) const
362 f
->dump_unsigned("up_from", up_from
);
363 f
->dump_unsigned("seq", seq
);
364 f
->dump_unsigned("num_pgs", num_pgs
);
365 f
->dump_unsigned("num_osds", num_osds
);
366 f
->dump_unsigned("num_per_pool_osds", num_per_pool_osds
);
368 /// dump legacy stats fields to ensure backward compatibility.
369 f
->dump_unsigned("kb", statfs
.kb());
370 f
->dump_unsigned("kb_used", statfs
.kb_used_raw());
371 f
->dump_unsigned("kb_used_data", statfs
.kb_used_data());
372 f
->dump_unsigned("kb_used_omap", statfs
.kb_used_omap());
373 f
->dump_unsigned("kb_used_meta", statfs
.kb_used_internal_metadata());
374 f
->dump_unsigned("kb_avail", statfs
.kb_avail());
377 f
->open_object_section("statfs");
380 f
->open_array_section("hb_peers");
381 for (auto p
: hb_peers
)
382 f
->dump_int("osd", p
);
384 f
->dump_int("snap_trim_queue_len", snap_trim_queue_len
);
385 f
->dump_int("num_snap_trimming", num_snap_trimming
);
386 f
->dump_int("num_shards_repaired", num_shards_repaired
);
387 f
->open_object_section("op_queue_age_hist");
388 op_queue_age_hist
.dump(f
);
390 f
->open_object_section("perf_stat");
391 os_perf_stat
.dump(f
);
393 f
->open_array_section("alerts");
394 ::dump(f
, os_alerts
);
396 f
->open_array_section("network_ping_times");
397 for (auto &i
: hb_pingtime
) {
398 f
->open_object_section("entry");
399 f
->dump_int("osd", i
.first
);
400 const time_t lu(i
.second
.last_update
);
402 string
lustr(ctime_r(&lu
, buffer
));
403 lustr
.pop_back(); // Remove trailing \n
404 f
->dump_string("last update", lustr
);
405 f
->open_array_section("interfaces");
406 f
->open_object_section("interface");
407 f
->dump_string("interface", "back");
408 f
->open_object_section("average");
409 f
->dump_format_unquoted("1min", "%s", fixed_u_to_string(i
.second
.back_pingtime
[0],3).c_str());
410 f
->dump_format_unquoted("5min", "%s", fixed_u_to_string(i
.second
.back_pingtime
[1],3).c_str());
411 f
->dump_format_unquoted("15min", "%s", fixed_u_to_string(i
.second
.back_pingtime
[2],3).c_str());
412 f
->close_section(); // average
413 f
->open_object_section("min");
414 f
->dump_format_unquoted("1min", "%s", fixed_u_to_string(i
.second
.back_min
[0],3).c_str());
415 f
->dump_format_unquoted("5min", "%s", fixed_u_to_string(i
.second
.back_min
[1],3).c_str());
416 f
->dump_format_unquoted("15min", "%s", fixed_u_to_string(i
.second
.back_min
[2],3).c_str());
417 f
->close_section(); // min
418 f
->open_object_section("max");
419 f
->dump_format_unquoted("1min", "%s", fixed_u_to_string(i
.second
.back_max
[0],3).c_str());
420 f
->dump_format_unquoted("5min", "%s", fixed_u_to_string(i
.second
.back_max
[1],3).c_str());
421 f
->dump_format_unquoted("15min", "%s", fixed_u_to_string(i
.second
.back_max
[2],3).c_str());
422 f
->close_section(); // max
423 f
->dump_format_unquoted("last", "%s", fixed_u_to_string(i
.second
.back_last
,3).c_str());
424 f
->close_section(); // interface
426 if (i
.second
.front_pingtime
[0] != 0) {
427 f
->open_object_section("interface");
428 f
->dump_string("interface", "front");
429 f
->open_object_section("average");
430 f
->dump_format_unquoted("1min", "%s", fixed_u_to_string(i
.second
.front_pingtime
[0],3).c_str());
431 f
->dump_format_unquoted("5min", "%s", fixed_u_to_string(i
.second
.front_pingtime
[1],3).c_str());
432 f
->dump_format_unquoted("15min", "%s", fixed_u_to_string(i
.second
.front_pingtime
[2],3).c_str());
433 f
->close_section(); // average
434 f
->open_object_section("min");
435 f
->dump_format_unquoted("1min", "%s", fixed_u_to_string(i
.second
.front_min
[0],3).c_str());
436 f
->dump_format_unquoted("5min", "%s", fixed_u_to_string(i
.second
.front_min
[1],3).c_str());
437 f
->dump_format_unquoted("15min", "%s", fixed_u_to_string(i
.second
.front_min
[2],3).c_str());
438 f
->close_section(); // min
439 f
->open_object_section("max");
440 f
->dump_format_unquoted("1min", "%s", fixed_u_to_string(i
.second
.front_max
[0],3).c_str());
441 f
->dump_format_unquoted("5min", "%s", fixed_u_to_string(i
.second
.front_max
[1],3).c_str());
442 f
->dump_format_unquoted("15min", "%s", fixed_u_to_string(i
.second
.front_max
[2],3).c_str());
443 f
->close_section(); // max
444 f
->dump_format_unquoted("last", "%s", fixed_u_to_string(i
.second
.front_last
,3).c_str());
445 f
->close_section(); // interface
447 f
->close_section(); // interfaces
448 f
->close_section(); // entry
450 f
->close_section(); // network_ping_time
453 void osd_stat_t::encode(bufferlist
&bl
, uint64_t features
) const
455 ENCODE_START(14, 2, bl
);
457 //////// for compatibility ////////
458 int64_t kb
= statfs
.kb();
459 int64_t kb_used
= statfs
.kb_used_raw();
460 int64_t kb_avail
= statfs
.kb_avail();
463 encode(kb_avail
, bl
);
464 ///////////////////////////////////
466 encode(snap_trim_queue_len
, bl
);
467 encode(num_snap_trimming
, bl
);
468 encode(hb_peers
, bl
);
469 encode((uint32_t)0, bl
);
470 encode(op_queue_age_hist
, bl
);
471 encode(os_perf_stat
, bl
, features
);
476 //////// for compatibility ////////
477 int64_t kb_used_data
= statfs
.kb_used_data();
478 int64_t kb_used_omap
= statfs
.kb_used_omap();
479 int64_t kb_used_meta
= statfs
.kb_used_internal_metadata();
480 encode(kb_used_data
, bl
);
481 encode(kb_used_omap
, bl
);
482 encode(kb_used_meta
, bl
);
484 ///////////////////////////////////
485 encode(os_alerts
, bl
);
486 encode(num_shards_repaired
, bl
);
487 encode(num_osds
, bl
);
488 encode(num_per_pool_osds
, bl
);
490 encode((uint32_t)0, bl
); // compatibility
493 encode((int)hb_pingtime
.size(), bl
);
494 for (auto i
: hb_pingtime
) {
495 encode(i
.first
, bl
); // osd
496 encode(i
.second
.last_update
, bl
);
497 encode(i
.second
.back_pingtime
[0], bl
);
498 encode(i
.second
.back_pingtime
[1], bl
);
499 encode(i
.second
.back_pingtime
[2], bl
);
500 encode(i
.second
.back_min
[0], bl
);
501 encode(i
.second
.back_min
[1], bl
);
502 encode(i
.second
.back_min
[2], bl
);
503 encode(i
.second
.back_max
[0], bl
);
504 encode(i
.second
.back_max
[1], bl
);
505 encode(i
.second
.back_max
[2], bl
);
506 encode(i
.second
.back_last
, bl
);
507 encode(i
.second
.front_pingtime
[0], bl
);
508 encode(i
.second
.front_pingtime
[1], bl
);
509 encode(i
.second
.front_pingtime
[2], bl
);
510 encode(i
.second
.front_min
[0], bl
);
511 encode(i
.second
.front_min
[1], bl
);
512 encode(i
.second
.front_min
[2], bl
);
513 encode(i
.second
.front_max
[0], bl
);
514 encode(i
.second
.front_max
[1], bl
);
515 encode(i
.second
.front_max
[2], bl
);
516 encode(i
.second
.front_last
, bl
);
521 void osd_stat_t::decode(bufferlist::const_iterator
&bl
)
523 int64_t kb
, kb_used
,kb_avail
;
524 int64_t kb_used_data
, kb_used_omap
, kb_used_meta
;
525 DECODE_START_LEGACY_COMPAT_LEN(14, 2, 2, bl
);
528 decode(kb_avail
, bl
);
529 decode(snap_trim_queue_len
, bl
);
530 decode(num_snap_trimming
, bl
);
531 decode(hb_peers
, bl
);
532 vector
<int> num_hb_out
;
533 decode(num_hb_out
, bl
);
535 decode(op_queue_age_hist
, bl
);
537 decode(os_perf_stat
, bl
);
546 decode(kb_used_data
, bl
);
547 decode(kb_used_omap
, bl
);
548 decode(kb_used_meta
, bl
);
550 kb_used_data
= kb_used
;
558 statfs
.total
= kb
<< 10;
559 statfs
.available
= kb_avail
<< 10;
560 // actually it's totally unexpected to have ststfs.total < statfs.available
561 // here but unfortunately legacy generate_test_instances produced such a
562 // case hence inserting some handling rather than assert
563 statfs
.internally_reserved
=
564 statfs
.total
> statfs
.available
? statfs
.total
- statfs
.available
: 0;
566 if ((int64_t)statfs
.internally_reserved
> kb_used
) {
567 statfs
.internally_reserved
-= kb_used
;
569 statfs
.internally_reserved
= 0;
571 statfs
.allocated
= kb_used_data
<< 10;
572 statfs
.omap_allocated
= kb_used_omap
<< 10;
573 statfs
.internal_metadata
= kb_used_meta
<< 10;
575 if (struct_v
>= 10) {
576 decode(os_alerts
, bl
);
580 if (struct_v
>= 11) {
581 decode(num_shards_repaired
, bl
);
583 num_shards_repaired
= 0;
585 if (struct_v
>= 12) {
586 decode(num_osds
, bl
);
587 decode(num_per_pool_osds
, bl
);
590 num_per_pool_osds
= 0;
592 // Compatibility num_per_pool_omap_osds
593 if (struct_v
>= 13) {
598 if (struct_v
>= 14) {
601 for (int i
= 0 ; i
< count
; i
++) {
604 struct Interfaces ifs
;
605 decode(ifs
.last_update
, bl
);
606 decode(ifs
.back_pingtime
[0],bl
);
607 decode(ifs
.back_pingtime
[1], bl
);
608 decode(ifs
.back_pingtime
[2], bl
);
609 decode(ifs
.back_min
[0],bl
);
610 decode(ifs
.back_min
[1], bl
);
611 decode(ifs
.back_min
[2], bl
);
612 decode(ifs
.back_max
[0],bl
);
613 decode(ifs
.back_max
[1], bl
);
614 decode(ifs
.back_max
[2], bl
);
615 decode(ifs
.back_last
, bl
);
616 decode(ifs
.front_pingtime
[0], bl
);
617 decode(ifs
.front_pingtime
[1], bl
);
618 decode(ifs
.front_pingtime
[2], bl
);
619 decode(ifs
.front_min
[0], bl
);
620 decode(ifs
.front_min
[1], bl
);
621 decode(ifs
.front_min
[2], bl
);
622 decode(ifs
.front_max
[0], bl
);
623 decode(ifs
.front_max
[1], bl
);
624 decode(ifs
.front_max
[2], bl
);
625 decode(ifs
.front_last
, bl
);
626 hb_pingtime
[osd
] = ifs
;
632 void osd_stat_t::generate_test_instances(std::list
<osd_stat_t
*>& o
)
634 o
.push_back(new osd_stat_t
);
636 o
.push_back(new osd_stat_t
);
637 list
<store_statfs_t
*> ll
;
638 store_statfs_t::generate_test_instances(ll
);
639 o
.back()->statfs
= *ll
.back();
640 o
.back()->hb_peers
.push_back(7);
641 o
.back()->snap_trim_queue_len
= 8;
642 o
.back()->num_snap_trimming
= 99;
643 o
.back()->num_shards_repaired
= 101;
644 o
.back()->os_alerts
[0].emplace(
645 "some alert", "some alert details");
646 o
.back()->os_alerts
[1].emplace(
647 "some alert2", "some alert2 details");
648 struct Interfaces gen_interfaces
= {
649 123456789, { 1000, 900, 800 }, { 990, 890, 790 }, { 1010, 910, 810 }, 1001,
650 { 1100, 1000, 900 }, { 1090, 990, 890 }, { 1110, 1010, 910 }, 1101 };
651 o
.back()->hb_pingtime
[20] = gen_interfaces
;
653 987654321, { 100, 200, 300 }, { 90, 190, 290 }, { 110, 210, 310 }, 101 };
654 o
.back()->hb_pingtime
[30] = gen_interfaces
;
659 int pg_t::print(char *o
, int maxlen
) const
661 return snprintf(o
, maxlen
, "%llu.%x", (unsigned long long)pool(), ps());
664 bool pg_t::parse(const char *s
)
668 int r
= sscanf(s
, "%llu.%x", (long long unsigned *)&ppool
, &pseed
);
676 bool spg_t::parse(const char *s
)
678 shard
= shard_id_t::NO_SHARD
;
682 int r
= sscanf(s
, "%llu.%x", (long long unsigned *)&ppool
, &pseed
);
685 pgid
.set_pool(ppool
);
688 const char *p
= strchr(s
, 's');
690 r
= sscanf(p
, "s%u", &pshard
);
692 shard
= shard_id_t(pshard
);
700 char *spg_t::calc_name(char *buf
, const char *suffix_backwords
) const
702 while (*suffix_backwords
)
703 *--buf
= *suffix_backwords
++;
705 if (!is_no_shard()) {
706 buf
= ritoa
<uint8_t, 10>((uint8_t)shard
.id
, buf
);
710 return pgid
.calc_name(buf
, "");
713 ostream
& operator<<(ostream
& out
, const spg_t
&pg
)
715 char buf
[spg_t::calc_name_buf_size
];
716 buf
[spg_t::calc_name_buf_size
- 1] = '\0';
717 out
<< pg
.calc_name(buf
+ spg_t::calc_name_buf_size
- 1, "");
721 pg_t
pg_t::get_ancestor(unsigned old_pg_num
) const
723 int old_bits
= cbits(old_pg_num
);
724 int old_mask
= (1 << old_bits
) - 1;
726 ret
.m_seed
= ceph_stable_mod(m_seed
, old_pg_num
, old_mask
);
730 bool pg_t::is_split(unsigned old_pg_num
, unsigned new_pg_num
, set
<pg_t
> *children
) const
732 //ceph_assert(m_seed < old_pg_num);
733 if (m_seed
>= old_pg_num
) {
737 if (new_pg_num
<= old_pg_num
)
742 unsigned old_bits
= cbits(old_pg_num
);
743 unsigned old_mask
= (1 << old_bits
) - 1;
744 for (unsigned n
= 1; ; n
++) {
745 unsigned next_bit
= (n
<< (old_bits
-1));
746 unsigned s
= next_bit
| m_seed
;
748 if (s
< old_pg_num
|| s
== m_seed
)
752 if ((unsigned)ceph_stable_mod(s
, old_pg_num
, old_mask
) == m_seed
) {
755 children
->insert(pg_t(s
, m_pool
));
761 int old_bits
= cbits(old_pg_num
);
762 int old_mask
= (1 << old_bits
) - 1;
763 for (unsigned x
= old_pg_num
; x
< new_pg_num
; ++x
) {
764 unsigned o
= ceph_stable_mod(x
, old_pg_num
, old_mask
);
767 children
->insert(pg_t(x
, m_pool
));
774 unsigned pg_t::get_split_bits(unsigned pg_num
) const {
777 ceph_assert(pg_num
> 1);
779 // Find unique p such that pg_num \in [2^(p-1), 2^p)
780 unsigned p
= cbits(pg_num
);
781 ceph_assert(p
); // silence coverity #751330
783 if ((m_seed
% (1<<(p
-1))) < (pg_num
% (1<<(p
-1))))
789 bool pg_t::is_merge_source(
794 if (m_seed
< old_pg_num
&&
795 m_seed
>= new_pg_num
) {
798 while (t
.m_seed
>= new_pg_num
) {
808 pg_t
pg_t::get_parent() const
810 unsigned bits
= cbits(m_seed
);
813 retval
.m_seed
&= ~((~0)<<(bits
- 1));
817 hobject_t
pg_t::get_hobj_start() const
819 return hobject_t(object_t(), string(), 0, m_seed
, m_pool
,
823 hobject_t
pg_t::get_hobj_end(unsigned pg_num
) const
825 // note: this assumes a bitwise sort; with the legacy nibblewise
826 // sort a PG did not always cover a single contiguous range of the
827 // (bit-reversed) hash range.
828 unsigned bits
= get_split_bits(pg_num
);
829 uint64_t rev_start
= hobject_t::_reverse_bits(m_seed
);
830 uint64_t rev_end
= (rev_start
| (0xffffffff >> bits
)) + 1;
831 if (rev_end
>= 0x100000000) {
832 ceph_assert(rev_end
== 0x100000000);
833 return hobject_t::get_max();
835 return hobject_t(object_t(), string(), CEPH_NOSNAP
,
836 hobject_t::_reverse_bits(rev_end
), m_pool
,
841 void pg_t::dump(Formatter
*f
) const
843 f
->dump_unsigned("pool", m_pool
);
844 f
->dump_unsigned("seed", m_seed
);
847 void pg_t::generate_test_instances(list
<pg_t
*>& o
)
849 o
.push_back(new pg_t
);
850 o
.push_back(new pg_t(1, 2));
851 o
.push_back(new pg_t(13123, 3));
852 o
.push_back(new pg_t(131223, 4));
855 char *pg_t::calc_name(char *buf
, const char *suffix_backwords
) const
857 while (*suffix_backwords
)
858 *--buf
= *suffix_backwords
++;
860 buf
= ritoa
<uint32_t, 16>(m_seed
, buf
);
864 return ritoa
<uint64_t, 10>(m_pool
, buf
);
867 ostream
& operator<<(ostream
& out
, const pg_t
&pg
)
869 char buf
[pg_t::calc_name_buf_size
];
870 buf
[pg_t::calc_name_buf_size
- 1] = '\0';
871 out
<< pg
.calc_name(buf
+ pg_t::calc_name_buf_size
- 1, "");
878 void coll_t::calc_str()
882 strcpy(_str_buff
, "meta");
886 _str_buff
[spg_t::calc_name_buf_size
- 1] = '\0';
887 _str
= pgid
.calc_name(_str_buff
+ spg_t::calc_name_buf_size
- 1, "daeh_");
890 _str_buff
[spg_t::calc_name_buf_size
- 1] = '\0';
891 _str
= pgid
.calc_name(_str_buff
+ spg_t::calc_name_buf_size
- 1, "PMET_");
894 ceph_abort_msg("unknown collection type");
898 bool coll_t::parse(const std::string
& s
)
905 ceph_assert(s
== _str
);
908 if (s
.find("_head") == s
.length() - 5 &&
909 pgid
.parse(s
.substr(0, s
.length() - 5))) {
913 ceph_assert(s
== _str
);
916 if (s
.find("_TEMP") == s
.length() - 5 &&
917 pgid
.parse(s
.substr(0, s
.length() - 5))) {
921 ceph_assert(s
== _str
);
927 void coll_t::encode(bufferlist
& bl
) const
930 // when changing this, remember to update encoded_size() too.
932 // can't express this as v2...
934 encode(struct_v
, bl
);
935 encode(to_str(), bl
);
938 encode(struct_v
, bl
);
939 encode((__u8
)type
, bl
);
941 snapid_t snap
= CEPH_NOSNAP
;
946 size_t coll_t::encoded_size() const
948 size_t r
= sizeof(__u8
);
961 r
+= sizeof(ceph_le32
) + 2 * sizeof(__u8
);
963 r
+= sizeof(__u8
) + sizeof(uint64_t) + 2 * sizeof(uint32_t);
967 r
+= sizeof(uint64_t);
973 void coll_t::decode(bufferlist::const_iterator
& bl
)
977 decode(struct_v
, bl
);
986 if (pgid
== spg_t() && snap
== 0) {
1002 type
= (type_t
)_type
;
1011 bool ok
= parse(str
);
1013 throw std::domain_error(std::string("unable to parse pg ") + str
);
1020 oss
<< "coll_t::decode(): don't know how to decode version "
1022 throw std::domain_error(oss
.str());
1027 void coll_t::dump(Formatter
*f
) const
1029 f
->dump_unsigned("type_id", (unsigned)type
);
1030 if (type
!= TYPE_META
)
1031 f
->dump_stream("pgid") << pgid
;
1032 f
->dump_string("name", to_str());
1035 void coll_t::generate_test_instances(list
<coll_t
*>& o
)
1037 o
.push_back(new coll_t());
1038 o
.push_back(new coll_t(spg_t(pg_t(1, 0), shard_id_t::NO_SHARD
)));
1039 o
.push_back(new coll_t(o
.back()->get_temp()));
1040 o
.push_back(new coll_t(spg_t(pg_t(3, 2), shard_id_t(12))));
1041 o
.push_back(new coll_t(o
.back()->get_temp()));
1042 o
.push_back(new coll_t());
1047 std::string
pg_vector_string(const vector
<int32_t> &a
)
1051 for (vector
<int32_t>::const_iterator i
= a
.begin(); i
!= a
.end(); ++i
) {
1054 if (*i
!= CRUSH_ITEM_NONE
)
1063 std::string
pg_state_string(uint64_t state
)
1066 if (state
& PG_STATE_STALE
)
1068 if (state
& PG_STATE_CREATING
)
1070 if (state
& PG_STATE_ACTIVE
)
1072 if (state
& PG_STATE_ACTIVATING
)
1073 oss
<< "activating+";
1074 if (state
& PG_STATE_CLEAN
)
1076 if (state
& PG_STATE_RECOVERY_WAIT
)
1077 oss
<< "recovery_wait+";
1078 if (state
& PG_STATE_RECOVERY_TOOFULL
)
1079 oss
<< "recovery_toofull+";
1080 if (state
& PG_STATE_RECOVERING
)
1081 oss
<< "recovering+";
1082 if (state
& PG_STATE_FORCED_RECOVERY
)
1083 oss
<< "forced_recovery+";
1084 if (state
& PG_STATE_DOWN
)
1086 if (state
& PG_STATE_RECOVERY_UNFOUND
)
1087 oss
<< "recovery_unfound+";
1088 if (state
& PG_STATE_BACKFILL_UNFOUND
)
1089 oss
<< "backfill_unfound+";
1090 if (state
& PG_STATE_UNDERSIZED
)
1091 oss
<< "undersized+";
1092 if (state
& PG_STATE_DEGRADED
)
1094 if (state
& PG_STATE_REMAPPED
)
1096 if (state
& PG_STATE_PREMERGE
)
1098 if (state
& PG_STATE_SCRUBBING
)
1099 oss
<< "scrubbing+";
1100 if (state
& PG_STATE_DEEP_SCRUB
)
1102 if (state
& PG_STATE_INCONSISTENT
)
1103 oss
<< "inconsistent+";
1104 if (state
& PG_STATE_PEERING
)
1106 if (state
& PG_STATE_REPAIR
)
1108 if (state
& PG_STATE_BACKFILL_WAIT
)
1109 oss
<< "backfill_wait+";
1110 if (state
& PG_STATE_BACKFILLING
)
1111 oss
<< "backfilling+";
1112 if (state
& PG_STATE_FORCED_BACKFILL
)
1113 oss
<< "forced_backfill+";
1114 if (state
& PG_STATE_BACKFILL_TOOFULL
)
1115 oss
<< "backfill_toofull+";
1116 if (state
& PG_STATE_INCOMPLETE
)
1117 oss
<< "incomplete+";
1118 if (state
& PG_STATE_PEERED
)
1120 if (state
& PG_STATE_SNAPTRIM
)
1122 if (state
& PG_STATE_SNAPTRIM_WAIT
)
1123 oss
<< "snaptrim_wait+";
1124 if (state
& PG_STATE_SNAPTRIM_ERROR
)
1125 oss
<< "snaptrim_error+";
1126 if (state
& PG_STATE_FAILED_REPAIR
)
1127 oss
<< "failed_repair+";
1128 string
ret(oss
.str());
1129 if (ret
.length() > 0)
1130 ret
.resize(ret
.length() - 1);
1136 boost::optional
<uint64_t> pg_string_state(const std::string
& state
)
1138 boost::optional
<uint64_t> type
;
1139 if (state
== "active")
1140 type
= PG_STATE_ACTIVE
;
1141 else if (state
== "clean")
1142 type
= PG_STATE_CLEAN
;
1143 else if (state
== "down")
1144 type
= PG_STATE_DOWN
;
1145 else if (state
== "recovery_unfound")
1146 type
= PG_STATE_RECOVERY_UNFOUND
;
1147 else if (state
== "backfill_unfound")
1148 type
= PG_STATE_BACKFILL_UNFOUND
;
1149 else if (state
== "premerge")
1150 type
= PG_STATE_PREMERGE
;
1151 else if (state
== "scrubbing")
1152 type
= PG_STATE_SCRUBBING
;
1153 else if (state
== "degraded")
1154 type
= PG_STATE_DEGRADED
;
1155 else if (state
== "inconsistent")
1156 type
= PG_STATE_INCONSISTENT
;
1157 else if (state
== "peering")
1158 type
= PG_STATE_PEERING
;
1159 else if (state
== "repair")
1160 type
= PG_STATE_REPAIR
;
1161 else if (state
== "recovering")
1162 type
= PG_STATE_RECOVERING
;
1163 else if (state
== "forced_recovery")
1164 type
= PG_STATE_FORCED_RECOVERY
;
1165 else if (state
== "backfill_wait")
1166 type
= PG_STATE_BACKFILL_WAIT
;
1167 else if (state
== "incomplete")
1168 type
= PG_STATE_INCOMPLETE
;
1169 else if (state
== "stale")
1170 type
= PG_STATE_STALE
;
1171 else if (state
== "remapped")
1172 type
= PG_STATE_REMAPPED
;
1173 else if (state
== "deep")
1174 type
= PG_STATE_DEEP_SCRUB
;
1175 else if (state
== "backfilling")
1176 type
= PG_STATE_BACKFILLING
;
1177 else if (state
== "forced_backfill")
1178 type
= PG_STATE_FORCED_BACKFILL
;
1179 else if (state
== "backfill_toofull")
1180 type
= PG_STATE_BACKFILL_TOOFULL
;
1181 else if (state
== "recovery_wait")
1182 type
= PG_STATE_RECOVERY_WAIT
;
1183 else if (state
== "recovery_toofull")
1184 type
= PG_STATE_RECOVERY_TOOFULL
;
1185 else if (state
== "undersized")
1186 type
= PG_STATE_UNDERSIZED
;
1187 else if (state
== "activating")
1188 type
= PG_STATE_ACTIVATING
;
1189 else if (state
== "peered")
1190 type
= PG_STATE_PEERED
;
1191 else if (state
== "snaptrim")
1192 type
= PG_STATE_SNAPTRIM
;
1193 else if (state
== "snaptrim_wait")
1194 type
= PG_STATE_SNAPTRIM_WAIT
;
1195 else if (state
== "snaptrim_error")
1196 type
= PG_STATE_SNAPTRIM_ERROR
;
1197 else if (state
== "creating")
1198 type
= PG_STATE_CREATING
;
1199 else if (state
== "failed_repair")
1200 type
= PG_STATE_FAILED_REPAIR
;
1201 else if (state
== "unknown")
1209 string
eversion_t::get_key_name() const
1211 std::string
key(32, ' ');
1212 get_key_name(&key
[0]);
1213 key
.resize(31); // remove the null terminator
1217 // -- pool_snap_info_t --
1218 void pool_snap_info_t::dump(Formatter
*f
) const
1220 f
->dump_unsigned("snapid", snapid
);
1221 f
->dump_stream("stamp") << stamp
;
1222 f
->dump_string("name", name
);
1225 void pool_snap_info_t::encode(bufferlist
& bl
, uint64_t features
) const
1228 if ((features
& CEPH_FEATURE_PGPOOL3
) == 0) {
1230 encode(struct_v
, bl
);
1236 ENCODE_START(2, 2, bl
);
1243 void pool_snap_info_t::decode(bufferlist::const_iterator
& bl
)
1245 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl
);
1252 void pool_snap_info_t::generate_test_instances(list
<pool_snap_info_t
*>& o
)
1254 o
.push_back(new pool_snap_info_t
);
1255 o
.push_back(new pool_snap_info_t
);
1256 o
.back()->snapid
= 1;
1257 o
.back()->stamp
= utime_t(1, 2);
1258 o
.back()->name
= "foo";
1261 // -- pool_opts_t --
1263 typedef std::map
<std::string
, pool_opts_t::opt_desc_t
> opt_mapping_t
;
1264 static opt_mapping_t opt_mapping
= boost::assign::map_list_of
1265 ("scrub_min_interval", pool_opts_t::opt_desc_t(
1266 pool_opts_t::SCRUB_MIN_INTERVAL
, pool_opts_t::DOUBLE
))
1267 ("scrub_max_interval", pool_opts_t::opt_desc_t(
1268 pool_opts_t::SCRUB_MAX_INTERVAL
, pool_opts_t::DOUBLE
))
1269 ("deep_scrub_interval", pool_opts_t::opt_desc_t(
1270 pool_opts_t::DEEP_SCRUB_INTERVAL
, pool_opts_t::DOUBLE
))
1271 ("recovery_priority", pool_opts_t::opt_desc_t(
1272 pool_opts_t::RECOVERY_PRIORITY
, pool_opts_t::INT
))
1273 ("recovery_op_priority", pool_opts_t::opt_desc_t(
1274 pool_opts_t::RECOVERY_OP_PRIORITY
, pool_opts_t::INT
))
1275 ("scrub_priority", pool_opts_t::opt_desc_t(
1276 pool_opts_t::SCRUB_PRIORITY
, pool_opts_t::INT
))
1277 ("compression_mode", pool_opts_t::opt_desc_t(
1278 pool_opts_t::COMPRESSION_MODE
, pool_opts_t::STR
))
1279 ("compression_algorithm", pool_opts_t::opt_desc_t(
1280 pool_opts_t::COMPRESSION_ALGORITHM
, pool_opts_t::STR
))
1281 ("compression_required_ratio", pool_opts_t::opt_desc_t(
1282 pool_opts_t::COMPRESSION_REQUIRED_RATIO
, pool_opts_t::DOUBLE
))
1283 ("compression_max_blob_size", pool_opts_t::opt_desc_t(
1284 pool_opts_t::COMPRESSION_MAX_BLOB_SIZE
, pool_opts_t::INT
))
1285 ("compression_min_blob_size", pool_opts_t::opt_desc_t(
1286 pool_opts_t::COMPRESSION_MIN_BLOB_SIZE
, pool_opts_t::INT
))
1287 ("csum_type", pool_opts_t::opt_desc_t(
1288 pool_opts_t::CSUM_TYPE
, pool_opts_t::INT
))
1289 ("csum_max_block", pool_opts_t::opt_desc_t(
1290 pool_opts_t::CSUM_MAX_BLOCK
, pool_opts_t::INT
))
1291 ("csum_min_block", pool_opts_t::opt_desc_t(
1292 pool_opts_t::CSUM_MIN_BLOCK
, pool_opts_t::INT
))
1293 ("fingerprint_algorithm", pool_opts_t::opt_desc_t(
1294 pool_opts_t::FINGERPRINT_ALGORITHM
, pool_opts_t::STR
))
1295 ("pg_num_min", pool_opts_t::opt_desc_t(
1296 pool_opts_t::PG_NUM_MIN
, pool_opts_t::INT
))
1297 ("target_size_bytes", pool_opts_t::opt_desc_t(
1298 pool_opts_t::TARGET_SIZE_BYTES
, pool_opts_t::INT
))
1299 ("target_size_ratio", pool_opts_t::opt_desc_t(
1300 pool_opts_t::TARGET_SIZE_RATIO
, pool_opts_t::DOUBLE
))
1301 ("pg_autoscale_bias", pool_opts_t::opt_desc_t(
1302 pool_opts_t::PG_AUTOSCALE_BIAS
, pool_opts_t::DOUBLE
));
1304 bool pool_opts_t::is_opt_name(const std::string
& name
)
1306 return opt_mapping
.count(name
);
1309 pool_opts_t::opt_desc_t
pool_opts_t::get_opt_desc(const std::string
& name
)
1311 opt_mapping_t::iterator i
= opt_mapping
.find(name
);
1312 ceph_assert(i
!= opt_mapping
.end());
1316 bool pool_opts_t::is_set(pool_opts_t::key_t key
) const
1318 return opts
.count(key
);
1321 const pool_opts_t::value_t
& pool_opts_t::get(pool_opts_t::key_t key
) const
1323 opts_t::const_iterator i
= opts
.find(key
);
1324 ceph_assert(i
!= opts
.end());
1328 bool pool_opts_t::unset(pool_opts_t::key_t key
) {
1329 return opts
.erase(key
) > 0;
1332 class pool_opts_dumper_t
: public boost::static_visitor
<> {
1334 pool_opts_dumper_t(const std::string
& name_
, Formatter
* f_
) :
1335 name(name_
.c_str()), f(f_
) {}
1337 void operator()(std::string s
) const {
1338 f
->dump_string(name
, s
);
1340 void operator()(int64_t i
) const {
1341 f
->dump_int(name
, i
);
1343 void operator()(double d
) const {
1344 f
->dump_float(name
, d
);
1352 void pool_opts_t::dump(const std::string
& name
, Formatter
* f
) const
1354 const opt_desc_t
& desc
= get_opt_desc(name
);
1355 opts_t::const_iterator i
= opts
.find(desc
.key
);
1356 if (i
== opts
.end()) {
1359 boost::apply_visitor(pool_opts_dumper_t(name
, f
), i
->second
);
1362 void pool_opts_t::dump(Formatter
* f
) const
1364 for (opt_mapping_t::iterator i
= opt_mapping
.begin(); i
!= opt_mapping
.end();
1366 const std::string
& name
= i
->first
;
1367 const opt_desc_t
& desc
= i
->second
;
1368 opts_t::const_iterator j
= opts
.find(desc
.key
);
1369 if (j
== opts
.end()) {
1372 boost::apply_visitor(pool_opts_dumper_t(name
, f
), j
->second
);
1376 class pool_opts_encoder_t
: public boost::static_visitor
<> {
1378 explicit pool_opts_encoder_t(bufferlist
& bl_
, uint64_t features
)
1380 features(features
) {}
1382 void operator()(const std::string
&s
) const {
1383 encode(static_cast<int32_t>(pool_opts_t::STR
), bl
);
1386 void operator()(int64_t i
) const {
1387 encode(static_cast<int32_t>(pool_opts_t::INT
), bl
);
1388 if (HAVE_FEATURE(features
, SERVER_NAUTILUS
)) {
1391 encode(static_cast<int32_t>(i
), bl
);
1394 void operator()(double d
) const {
1395 encode(static_cast<int32_t>(pool_opts_t::DOUBLE
), bl
);
1404 void pool_opts_t::encode(bufferlist
& bl
, uint64_t features
) const
1407 if (!HAVE_FEATURE(features
, SERVER_NAUTILUS
)) {
1410 ENCODE_START(v
, 1, bl
);
1411 uint32_t n
= static_cast<uint32_t>(opts
.size());
1413 for (opts_t::const_iterator i
= opts
.begin(); i
!= opts
.end(); ++i
) {
1414 encode(static_cast<int32_t>(i
->first
), bl
);
1415 boost::apply_visitor(pool_opts_encoder_t(bl
, features
), i
->second
);
1420 void pool_opts_t::decode(bufferlist::const_iterator
& bl
)
1422 DECODE_START(1, bl
);
1433 opts
[static_cast<key_t
>(k
)] = s
;
1434 } else if (t
== INT
) {
1436 if (struct_v
>= 2) {
1443 opts
[static_cast<key_t
>(k
)] = i
;
1444 } else if (t
== DOUBLE
) {
1447 opts
[static_cast<key_t
>(k
)] = d
;
1449 ceph_assert(!"invalid type");
1455 ostream
& operator<<(ostream
& out
, const pool_opts_t
& opts
)
1457 for (opt_mapping_t::iterator i
= opt_mapping
.begin(); i
!= opt_mapping
.end();
1459 const std::string
& name
= i
->first
;
1460 const pool_opts_t::opt_desc_t
& desc
= i
->second
;
1461 pool_opts_t::opts_t::const_iterator j
= opts
.opts
.find(desc
.key
);
1462 if (j
== opts
.opts
.end()) {
1465 out
<< " " << name
<< " " << j
->second
;
1472 const char *pg_pool_t::APPLICATION_NAME_CEPHFS("cephfs");
1473 const char *pg_pool_t::APPLICATION_NAME_RBD("rbd");
1474 const char *pg_pool_t::APPLICATION_NAME_RGW("rgw");
1476 void pg_pool_t::dump(Formatter
*f
) const
1478 f
->dump_stream("create_time") << get_create_time();
1479 f
->dump_unsigned("flags", get_flags());
1480 f
->dump_string("flags_names", get_flags_string());
1481 f
->dump_int("type", get_type());
1482 f
->dump_int("size", get_size());
1483 f
->dump_int("min_size", get_min_size());
1484 f
->dump_int("crush_rule", get_crush_rule());
1485 f
->dump_int("object_hash", get_object_hash());
1486 f
->dump_string("pg_autoscale_mode",
1487 get_pg_autoscale_mode_name(pg_autoscale_mode
));
1488 f
->dump_unsigned("pg_num", get_pg_num());
1489 f
->dump_unsigned("pg_placement_num", get_pgp_num());
1490 f
->dump_unsigned("pg_placement_num_target", get_pgp_num_target());
1491 f
->dump_unsigned("pg_num_target", get_pg_num_target());
1492 f
->dump_unsigned("pg_num_pending", get_pg_num_pending());
1493 f
->dump_object("last_pg_merge_meta", last_pg_merge_meta
);
1494 f
->dump_stream("last_change") << get_last_change();
1495 f
->dump_stream("last_force_op_resend") << get_last_force_op_resend();
1496 f
->dump_stream("last_force_op_resend_prenautilus")
1497 << get_last_force_op_resend_prenautilus();
1498 f
->dump_stream("last_force_op_resend_preluminous")
1499 << get_last_force_op_resend_preluminous();
1500 f
->dump_unsigned("auid", get_auid());
1501 f
->dump_string("snap_mode", is_pool_snaps_mode() ? "pool" : "selfmanaged");
1502 f
->dump_unsigned("snap_seq", get_snap_seq());
1503 f
->dump_unsigned("snap_epoch", get_snap_epoch());
1504 f
->open_array_section("pool_snaps");
1505 for (map
<snapid_t
, pool_snap_info_t
>::const_iterator p
= snaps
.begin(); p
!= snaps
.end(); ++p
) {
1506 f
->open_object_section("pool_snap_info");
1511 f
->dump_stream("removed_snaps") << removed_snaps
;
1512 f
->dump_unsigned("quota_max_bytes", quota_max_bytes
);
1513 f
->dump_unsigned("quota_max_objects", quota_max_objects
);
1514 f
->open_array_section("tiers");
1515 for (set
<uint64_t>::const_iterator p
= tiers
.begin(); p
!= tiers
.end(); ++p
)
1516 f
->dump_unsigned("pool_id", *p
);
1518 f
->dump_int("tier_of", tier_of
);
1519 f
->dump_int("read_tier", read_tier
);
1520 f
->dump_int("write_tier", write_tier
);
1521 f
->dump_string("cache_mode", get_cache_mode_name());
1522 f
->dump_unsigned("target_max_bytes", target_max_bytes
);
1523 f
->dump_unsigned("target_max_objects", target_max_objects
);
1524 f
->dump_unsigned("cache_target_dirty_ratio_micro",
1525 cache_target_dirty_ratio_micro
);
1526 f
->dump_unsigned("cache_target_dirty_high_ratio_micro",
1527 cache_target_dirty_high_ratio_micro
);
1528 f
->dump_unsigned("cache_target_full_ratio_micro",
1529 cache_target_full_ratio_micro
);
1530 f
->dump_unsigned("cache_min_flush_age", cache_min_flush_age
);
1531 f
->dump_unsigned("cache_min_evict_age", cache_min_evict_age
);
1532 f
->dump_string("erasure_code_profile", erasure_code_profile
);
1533 f
->open_object_section("hit_set_params");
1534 hit_set_params
.dump(f
);
1535 f
->close_section(); // hit_set_params
1536 f
->dump_unsigned("hit_set_period", hit_set_period
);
1537 f
->dump_unsigned("hit_set_count", hit_set_count
);
1538 f
->dump_bool("use_gmt_hitset", use_gmt_hitset
);
1539 f
->dump_unsigned("min_read_recency_for_promote", min_read_recency_for_promote
);
1540 f
->dump_unsigned("min_write_recency_for_promote", min_write_recency_for_promote
);
1541 f
->dump_unsigned("hit_set_grade_decay_rate", hit_set_grade_decay_rate
);
1542 f
->dump_unsigned("hit_set_search_last_n", hit_set_search_last_n
);
1543 f
->open_array_section("grade_table");
1544 for (unsigned i
= 0; i
< hit_set_count
; ++i
)
1545 f
->dump_unsigned("value", get_grade(i
));
1547 f
->dump_unsigned("stripe_width", get_stripe_width());
1548 f
->dump_unsigned("expected_num_objects", expected_num_objects
);
1549 f
->dump_bool("fast_read", fast_read
);
1550 f
->open_object_section("options");
1552 f
->close_section(); // options
1553 f
->open_object_section("application_metadata");
1554 for (auto &app_pair
: application_metadata
) {
1555 f
->open_object_section(app_pair
.first
.c_str());
1556 for (auto &kv_pair
: app_pair
.second
) {
1557 f
->dump_string(kv_pair
.first
.c_str(), kv_pair
.second
);
1559 f
->close_section(); // application
1561 f
->close_section(); // application_metadata
1564 void pg_pool_t::convert_to_pg_shards(const vector
<int> &from
, set
<pg_shard_t
>* to
) const {
1565 for (size_t i
= 0; i
< from
.size(); ++i
) {
1566 if (from
[i
] != CRUSH_ITEM_NONE
) {
1570 is_erasure() ? shard_id_t(i
) : shard_id_t::NO_SHARD
));
1575 void pg_pool_t::calc_pg_masks()
1577 pg_num_mask
= (1 << cbits(pg_num
-1)) - 1;
1578 pgp_num_mask
= (1 << cbits(pgp_num
-1)) - 1;
1581 unsigned pg_pool_t::get_pg_num_divisor(pg_t pgid
) const
1583 if (pg_num
== pg_num_mask
+ 1)
1584 return pg_num
; // power-of-2 split
1585 unsigned mask
= pg_num_mask
>> 1;
1586 if ((pgid
.ps() & mask
) < (pg_num
& mask
))
1587 return pg_num_mask
+ 1; // smaller bin size (already split)
1589 return (pg_num_mask
+ 1) >> 1; // bigger bin (not yet split)
1592 bool pg_pool_t::is_pending_merge(pg_t pgid
, bool *target
) const
1594 if (pg_num_pending
>= pg_num
) {
1597 if (pgid
.ps() >= pg_num_pending
&& pgid
.ps() < pg_num
) {
1603 for (unsigned ps
= pg_num_pending
; ps
< pg_num
; ++ps
) {
1604 if (pg_t(ps
, pgid
.pool()).get_parent() == pgid
) {
1615 * we have two snap modes:
1617 * - snap existence/non-existence defined by snaps[] and snap_seq
1618 * - user managed snaps
1619 * - existence tracked by librados user
1621 bool pg_pool_t::is_pool_snaps_mode() const
1623 return has_flag(FLAG_POOL_SNAPS
);
1626 bool pg_pool_t::is_unmanaged_snaps_mode() const
1628 return has_flag(FLAG_SELFMANAGED_SNAPS
);
1631 bool pg_pool_t::is_removed_snap(snapid_t s
) const
1633 if (is_pool_snaps_mode())
1634 return s
<= get_snap_seq() && snaps
.count(s
) == 0;
1636 return removed_snaps
.contains(s
);
1640 * build set of known-removed sets from either pool snaps or
1641 * explicit removed_snaps set.
1643 void pg_pool_t::build_removed_snaps(interval_set
<snapid_t
>& rs
) const
1645 if (is_pool_snaps_mode()) {
1647 for (snapid_t s
= 1; s
<= get_snap_seq(); s
= s
+ 1)
1648 if (snaps
.count(s
) == 0)
1655 bool pg_pool_t::maybe_updated_removed_snaps(const interval_set
<snapid_t
>& cached
) const
1657 if (is_unmanaged_snaps_mode()) { // remove_unmanaged_snap increments range_end
1658 if (removed_snaps
.empty() || cached
.empty()) // range_end is undefined
1659 return removed_snaps
.empty() != cached
.empty();
1660 return removed_snaps
.range_end() != cached
.range_end();
1665 snapid_t
pg_pool_t::snap_exists(const char *s
) const
1667 for (map
<snapid_t
,pool_snap_info_t
>::const_iterator p
= snaps
.begin();
1670 if (p
->second
.name
== s
)
1671 return p
->second
.snapid
;
1675 void pg_pool_t::add_snap(const char *n
, utime_t stamp
)
1677 ceph_assert(!is_unmanaged_snaps_mode());
1678 flags
|= FLAG_POOL_SNAPS
;
1679 snapid_t s
= get_snap_seq() + 1;
1681 snaps
[s
].snapid
= s
;
1683 snaps
[s
].stamp
= stamp
;
1686 void pg_pool_t::add_unmanaged_snap(uint64_t& snapid
)
1688 ceph_assert(!is_pool_snaps_mode());
1689 if (snap_seq
== 0) {
1690 // kludge for pre-mimic tracking of pool vs selfmanaged snaps. after
1691 // mimic this field is not decoded but our flag is set; pre-mimic, we
1692 // have a non-empty removed_snaps to signifiy a non-pool-snaps pool.
1693 removed_snaps
.insert(snapid_t(1));
1696 flags
|= FLAG_SELFMANAGED_SNAPS
;
1697 snapid
= snap_seq
= snap_seq
+ 1;
1700 void pg_pool_t::remove_snap(snapid_t s
)
1702 ceph_assert(snaps
.count(s
));
1704 snap_seq
= snap_seq
+ 1;
1707 void pg_pool_t::remove_unmanaged_snap(snapid_t s
)
1709 ceph_assert(is_unmanaged_snaps_mode());
1710 removed_snaps
.insert(s
);
1711 snap_seq
= snap_seq
+ 1;
1712 // try to add in the new seq, just to try to keep the interval_set contiguous
1713 if (!removed_snaps
.contains(get_snap_seq())) {
1714 removed_snaps
.insert(get_snap_seq());
1718 SnapContext
pg_pool_t::get_snap_context() const
1720 vector
<snapid_t
> s(snaps
.size());
1722 for (map
<snapid_t
, pool_snap_info_t
>::const_reverse_iterator p
= snaps
.rbegin();
1726 return SnapContext(get_snap_seq(), s
);
1729 uint32_t pg_pool_t::hash_key(const string
& key
, const string
& ns
) const
1732 return ceph_str_hash(object_hash
, key
.data(), key
.length());
1733 int nsl
= ns
.length();
1734 int len
= key
.length() + nsl
+ 1;
1736 memcpy(&buf
[0], ns
.data(), nsl
);
1738 memcpy(&buf
[nsl
+1], key
.data(), key
.length());
1739 return ceph_str_hash(object_hash
, &buf
[0], len
);
1742 uint32_t pg_pool_t::raw_hash_to_pg(uint32_t v
) const
1744 return ceph_stable_mod(v
, pg_num
, pg_num_mask
);
1748 * map a raw pg (with full precision ps) into an actual pg, for storage
1750 pg_t
pg_pool_t::raw_pg_to_pg(pg_t pg
) const
1752 pg
.set_ps(ceph_stable_mod(pg
.ps(), pg_num
, pg_num_mask
));
1757 * map raw pg (full precision ps) into a placement seed. include
1758 * pool id in that value so that different pools don't use the same
1761 ps_t
pg_pool_t::raw_pg_to_pps(pg_t pg
) const
1763 if (flags
& FLAG_HASHPSPOOL
) {
1764 // Hash the pool id so that pool PGs do not overlap.
1766 crush_hash32_2(CRUSH_HASH_RJENKINS1
,
1767 ceph_stable_mod(pg
.ps(), pgp_num
, pgp_num_mask
),
1770 // Legacy behavior; add ps and pool together. This is not a great
1771 // idea because the PGs from each pool will essentially overlap on
1772 // top of each other: 0.5 == 1.4 == 2.3 == ...
1774 ceph_stable_mod(pg
.ps(), pgp_num
, pgp_num_mask
) +
1779 uint32_t pg_pool_t::get_random_pg_position(pg_t pg
, uint32_t seed
) const
1781 uint32_t r
= crush_hash32_2(CRUSH_HASH_RJENKINS1
, seed
, 123);
1782 if (pg_num
== pg_num_mask
+ 1) {
1785 unsigned smaller_mask
= pg_num_mask
>> 1;
1786 if ((pg
.ps() & smaller_mask
) < (pg_num
& smaller_mask
)) {
1796 void pg_pool_t::encode(bufferlist
& bl
, uint64_t features
) const
1799 if ((features
& CEPH_FEATURE_PGPOOL3
) == 0) {
1800 // this encoding matches the old struct ceph_pg_pool
1802 encode(struct_v
, bl
);
1805 encode(crush_rule
, bl
);
1806 encode(object_hash
, bl
);
1808 encode(pgp_num
, bl
);
1809 __u32 lpg_num
= 0, lpgp_num
= 0; // tell old code that there are no localized pgs.
1810 encode(lpg_num
, bl
);
1811 encode(lpgp_num
, bl
);
1812 encode(last_change
, bl
);
1813 encode(snap_seq
, bl
);
1814 encode(snap_epoch
, bl
);
1816 __u32 n
= snaps
.size();
1818 n
= removed_snaps
.num_intervals();
1823 encode_nohead(snaps
, bl
, features
);
1824 encode_nohead(removed_snaps
, bl
);
1828 if ((features
& CEPH_FEATURE_OSDENC
) == 0) {
1830 encode(struct_v
, bl
);
1833 encode(crush_rule
, bl
);
1834 encode(object_hash
, bl
);
1836 encode(pgp_num
, bl
);
1837 __u32 lpg_num
= 0, lpgp_num
= 0; // tell old code that there are no localized pgs.
1838 encode(lpg_num
, bl
);
1839 encode(lpgp_num
, bl
);
1840 encode(last_change
, bl
);
1841 encode(snap_seq
, bl
);
1842 encode(snap_epoch
, bl
);
1843 encode(snaps
, bl
, features
);
1844 encode(removed_snaps
, bl
);
1847 encode((uint32_t)0, bl
); // crash_replay_interval
1851 if ((features
& CEPH_FEATURE_OSD_POOLRESEND
) == 0) {
1852 // we simply added last_force_op_resend here, which is a fully
1853 // backward compatible change. however, encoding the same map
1854 // differently between monitors triggers scrub noise (even though
1855 // they are decodable without the feature), so let's be pendantic
1857 ENCODE_START(14, 5, bl
);
1860 encode(crush_rule
, bl
);
1861 encode(object_hash
, bl
);
1863 encode(pgp_num
, bl
);
1864 __u32 lpg_num
= 0, lpgp_num
= 0; // tell old code that there are no localized pgs.
1865 encode(lpg_num
, bl
);
1866 encode(lpgp_num
, bl
);
1867 encode(last_change
, bl
);
1868 encode(snap_seq
, bl
);
1869 encode(snap_epoch
, bl
);
1870 encode(snaps
, bl
, features
);
1871 encode(removed_snaps
, bl
);
1874 encode((uint32_t)0, bl
); // crash_replay_interval
1875 encode(min_size
, bl
);
1876 encode(quota_max_bytes
, bl
);
1877 encode(quota_max_objects
, bl
);
1879 encode(tier_of
, bl
);
1880 __u8 c
= cache_mode
;
1882 encode(read_tier
, bl
);
1883 encode(write_tier
, bl
);
1884 encode(properties
, bl
);
1885 encode(hit_set_params
, bl
);
1886 encode(hit_set_period
, bl
);
1887 encode(hit_set_count
, bl
);
1888 encode(stripe_width
, bl
);
1889 encode(target_max_bytes
, bl
);
1890 encode(target_max_objects
, bl
);
1891 encode(cache_target_dirty_ratio_micro
, bl
);
1892 encode(cache_target_full_ratio_micro
, bl
);
1893 encode(cache_min_flush_age
, bl
);
1894 encode(cache_min_evict_age
, bl
);
1895 encode(erasure_code_profile
, bl
);
1901 // NOTE: any new encoding dependencies must be reflected by
1902 // SIGNIFICANT_FEATURES
1903 if (!(features
& CEPH_FEATURE_NEW_OSDOP_ENCODING
)) {
1904 // this was the first post-hammer thing we added; if it's missing, encode
1907 } else if (!HAVE_FEATURE(features
, SERVER_LUMINOUS
)) {
1909 } else if (!HAVE_FEATURE(features
, SERVER_MIMIC
)) {
1911 } else if (!HAVE_FEATURE(features
, SERVER_NAUTILUS
)) {
1915 ENCODE_START(v
, 5, bl
);
1918 encode(crush_rule
, bl
);
1919 encode(object_hash
, bl
);
1921 encode(pgp_num
, bl
);
1922 __u32 lpg_num
= 0, lpgp_num
= 0; // tell old code that there are no localized pgs.
1923 encode(lpg_num
, bl
);
1924 encode(lpgp_num
, bl
);
1925 encode(last_change
, bl
);
1926 encode(snap_seq
, bl
);
1927 encode(snap_epoch
, bl
);
1928 encode(snaps
, bl
, features
);
1929 encode(removed_snaps
, bl
);
1935 tmp
&= ~(FLAG_SELFMANAGED_SNAPS
| FLAG_POOL_SNAPS
| FLAG_CREATING
);
1938 encode((uint32_t)0, bl
); // crash_replay_interval
1939 encode(min_size
, bl
);
1940 encode(quota_max_bytes
, bl
);
1941 encode(quota_max_objects
, bl
);
1943 encode(tier_of
, bl
);
1944 __u8 c
= cache_mode
;
1946 encode(read_tier
, bl
);
1947 encode(write_tier
, bl
);
1948 encode(properties
, bl
);
1949 encode(hit_set_params
, bl
);
1950 encode(hit_set_period
, bl
);
1951 encode(hit_set_count
, bl
);
1952 encode(stripe_width
, bl
);
1953 encode(target_max_bytes
, bl
);
1954 encode(target_max_objects
, bl
);
1955 encode(cache_target_dirty_ratio_micro
, bl
);
1956 encode(cache_target_full_ratio_micro
, bl
);
1957 encode(cache_min_flush_age
, bl
);
1958 encode(cache_min_evict_age
, bl
);
1959 encode(erasure_code_profile
, bl
);
1960 encode(last_force_op_resend_preluminous
, bl
);
1961 encode(min_read_recency_for_promote
, bl
);
1962 encode(expected_num_objects
, bl
);
1964 encode(cache_target_dirty_high_ratio_micro
, bl
);
1967 encode(min_write_recency_for_promote
, bl
);
1970 encode(use_gmt_hitset
, bl
);
1973 encode(fast_read
, bl
);
1976 encode(hit_set_grade_decay_rate
, bl
);
1977 encode(hit_set_search_last_n
, bl
);
1980 encode(opts
, bl
, features
);
1983 encode(last_force_op_resend_prenautilus
, bl
);
1986 encode(application_metadata
, bl
);
1989 encode(create_time
, bl
);
1992 encode(pg_num_target
, bl
);
1993 encode(pgp_num_target
, bl
);
1994 encode(pg_num_pending
, bl
);
1995 encode((epoch_t
)0, bl
); // pg_num_dec_last_epoch_started from 14.1.[01]
1996 encode((epoch_t
)0, bl
); // pg_num_dec_last_epoch_clean from 14.1.[01]
1997 encode(last_force_op_resend
, bl
);
1998 encode(pg_autoscale_mode
, bl
);
2001 encode(last_pg_merge_meta
, bl
);
2006 void pg_pool_t::decode(bufferlist::const_iterator
& bl
)
2008 DECODE_START_LEGACY_COMPAT_LEN(29, 5, 5, bl
);
2011 decode(crush_rule
, bl
);
2012 decode(object_hash
, bl
);
2014 decode(pgp_num
, bl
);
2016 __u32 lpg_num
, lpgp_num
;
2017 decode(lpg_num
, bl
);
2018 decode(lpgp_num
, bl
);
2020 decode(last_change
, bl
);
2021 decode(snap_seq
, bl
);
2022 decode(snap_epoch
, bl
);
2024 if (struct_v
>= 3) {
2026 decode(removed_snaps
, bl
);
2033 decode_nohead(n
, snaps
, bl
);
2034 decode_nohead(m
, removed_snaps
, bl
);
2037 if (struct_v
>= 4) {
2039 uint32_t crash_replay_interval
;
2040 decode(crash_replay_interval
, bl
);
2044 // upgrade path for selfmanaged vs pool snaps
2045 if (snap_seq
> 0 && (flags
& (FLAG_SELFMANAGED_SNAPS
|FLAG_POOL_SNAPS
)) == 0) {
2046 if (!removed_snaps
.empty()) {
2047 flags
|= FLAG_SELFMANAGED_SNAPS
;
2049 flags
|= FLAG_POOL_SNAPS
;
2052 if (struct_v
>= 7) {
2053 decode(min_size
, bl
);
2055 min_size
= size
- size
/2;
2057 if (struct_v
>= 8) {
2058 decode(quota_max_bytes
, bl
);
2059 decode(quota_max_objects
, bl
);
2061 if (struct_v
>= 9) {
2063 decode(tier_of
, bl
);
2066 cache_mode
= (cache_mode_t
)v
;
2067 decode(read_tier
, bl
);
2068 decode(write_tier
, bl
);
2070 if (struct_v
>= 10) {
2071 decode(properties
, bl
);
2073 if (struct_v
>= 11) {
2074 decode(hit_set_params
, bl
);
2075 decode(hit_set_period
, bl
);
2076 decode(hit_set_count
, bl
);
2079 hit_set_period
= def
.hit_set_period
;
2080 hit_set_count
= def
.hit_set_count
;
2082 if (struct_v
>= 12) {
2083 decode(stripe_width
, bl
);
2085 set_stripe_width(0);
2087 if (struct_v
>= 13) {
2088 decode(target_max_bytes
, bl
);
2089 decode(target_max_objects
, bl
);
2090 decode(cache_target_dirty_ratio_micro
, bl
);
2091 decode(cache_target_full_ratio_micro
, bl
);
2092 decode(cache_min_flush_age
, bl
);
2093 decode(cache_min_evict_age
, bl
);
2095 target_max_bytes
= 0;
2096 target_max_objects
= 0;
2097 cache_target_dirty_ratio_micro
= 0;
2098 cache_target_full_ratio_micro
= 0;
2099 cache_min_flush_age
= 0;
2100 cache_min_evict_age
= 0;
2102 if (struct_v
>= 14) {
2103 decode(erasure_code_profile
, bl
);
2105 if (struct_v
>= 15) {
2106 decode(last_force_op_resend_preluminous
, bl
);
2108 last_force_op_resend_preluminous
= 0;
2110 if (struct_v
>= 16) {
2111 decode(min_read_recency_for_promote
, bl
);
2113 min_read_recency_for_promote
= 1;
2115 if (struct_v
>= 17) {
2116 decode(expected_num_objects
, bl
);
2118 expected_num_objects
= 0;
2120 if (struct_v
>= 19) {
2121 decode(cache_target_dirty_high_ratio_micro
, bl
);
2123 cache_target_dirty_high_ratio_micro
= cache_target_dirty_ratio_micro
;
2125 if (struct_v
>= 20) {
2126 decode(min_write_recency_for_promote
, bl
);
2128 min_write_recency_for_promote
= 1;
2130 if (struct_v
>= 21) {
2131 decode(use_gmt_hitset
, bl
);
2133 use_gmt_hitset
= false;
2135 if (struct_v
>= 22) {
2136 decode(fast_read
, bl
);
2140 if (struct_v
>= 23) {
2141 decode(hit_set_grade_decay_rate
, bl
);
2142 decode(hit_set_search_last_n
, bl
);
2144 hit_set_grade_decay_rate
= 0;
2145 hit_set_search_last_n
= 1;
2147 if (struct_v
>= 24) {
2150 if (struct_v
>= 25) {
2151 decode(last_force_op_resend_prenautilus
, bl
);
2153 last_force_op_resend_prenautilus
= last_force_op_resend_preluminous
;
2155 if (struct_v
>= 26) {
2156 decode(application_metadata
, bl
);
2158 if (struct_v
>= 27) {
2159 decode(create_time
, bl
);
2161 if (struct_v
>= 28) {
2162 decode(pg_num_target
, bl
);
2163 decode(pgp_num_target
, bl
);
2164 decode(pg_num_pending
, bl
);
2165 epoch_t old_merge_last_epoch_clean
, old_merge_last_epoch_started
;
2166 decode(old_merge_last_epoch_started
, bl
);
2167 decode(old_merge_last_epoch_clean
, bl
);
2168 decode(last_force_op_resend
, bl
);
2169 decode(pg_autoscale_mode
, bl
);
2170 if (struct_v
>= 29) {
2171 decode(last_pg_merge_meta
, bl
);
2173 last_pg_merge_meta
.last_epoch_clean
= old_merge_last_epoch_clean
;
2174 last_pg_merge_meta
.last_epoch_started
= old_merge_last_epoch_started
;
2177 pg_num_target
= pg_num
;
2178 pgp_num_target
= pgp_num
;
2179 pg_num_pending
= pg_num
;
2180 last_force_op_resend
= last_force_op_resend_prenautilus
;
2181 pg_autoscale_mode
= PG_AUTOSCALE_MODE_WARN
; // default to warn on upgrade
2188 void pg_pool_t::generate_test_instances(list
<pg_pool_t
*>& o
)
2191 o
.push_back(new pg_pool_t(a
));
2193 a
.create_time
= utime_t(4,5);
2194 a
.type
= TYPE_REPLICATED
;
2200 a
.pgp_num_target
= 4;
2201 a
.pg_num_target
= 5;
2202 a
.pg_num_pending
= 5;
2203 a
.last_pg_merge_meta
.last_epoch_started
= 2;
2204 a
.last_pg_merge_meta
.last_epoch_clean
= 2;
2206 a
.last_force_op_resend
= 123823;
2207 a
.last_force_op_resend_preluminous
= 123824;
2210 a
.flags
= FLAG_POOL_SNAPS
;
2212 a
.quota_max_bytes
= 473;
2213 a
.quota_max_objects
= 474;
2214 o
.push_back(new pg_pool_t(a
));
2216 a
.snaps
[3].name
= "asdf";
2217 a
.snaps
[3].snapid
= 3;
2218 a
.snaps
[3].stamp
= utime_t(123, 4);
2219 a
.snaps
[6].name
= "qwer";
2220 a
.snaps
[6].snapid
= 6;
2221 a
.snaps
[6].stamp
= utime_t(23423, 4);
2222 o
.push_back(new pg_pool_t(a
));
2224 a
.flags
= FLAG_SELFMANAGED_SNAPS
;
2226 a
.removed_snaps
.insert(2);
2227 a
.quota_max_bytes
= 2473;
2228 a
.quota_max_objects
= 4374;
2232 a
.cache_mode
= CACHEMODE_WRITEBACK
;
2235 a
.hit_set_params
= HitSet::Params(new BloomHitSet::Params
);
2236 a
.hit_set_period
= 3600;
2237 a
.hit_set_count
= 8;
2238 a
.min_read_recency_for_promote
= 1;
2239 a
.min_write_recency_for_promote
= 1;
2240 a
.hit_set_grade_decay_rate
= 50;
2241 a
.hit_set_search_last_n
= 1;
2242 a
.calc_grade_table();
2243 a
.set_stripe_width(12345);
2244 a
.target_max_bytes
= 1238132132;
2245 a
.target_max_objects
= 1232132;
2246 a
.cache_target_dirty_ratio_micro
= 187232;
2247 a
.cache_target_dirty_high_ratio_micro
= 309856;
2248 a
.cache_target_full_ratio_micro
= 987222;
2249 a
.cache_min_flush_age
= 231;
2250 a
.cache_min_evict_age
= 2321;
2251 a
.erasure_code_profile
= "profile in osdmap";
2252 a
.expected_num_objects
= 123456;
2253 a
.fast_read
= false;
2254 a
.application_metadata
= {{"rbd", {{"key", "value"}}}};
2255 o
.push_back(new pg_pool_t(a
));
2258 ostream
& operator<<(ostream
& out
, const pg_pool_t
& p
)
2260 out
<< p
.get_type_name()
2261 << " size " << p
.get_size()
2262 << " min_size " << p
.get_min_size()
2263 << " crush_rule " << p
.get_crush_rule()
2264 << " object_hash " << p
.get_object_hash_name()
2265 << " pg_num " << p
.get_pg_num()
2266 << " pgp_num " << p
.get_pgp_num();
2267 if (p
.get_pg_num_target() != p
.get_pg_num()) {
2268 out
<< " pg_num_target " << p
.get_pg_num_target();
2270 if (p
.get_pgp_num_target() != p
.get_pgp_num()) {
2271 out
<< " pgp_num_target " << p
.get_pgp_num_target();
2273 if (p
.get_pg_num_pending() != p
.get_pg_num()) {
2274 out
<< " pg_num_pending " << p
.get_pg_num_pending();
2276 if (p
.pg_autoscale_mode
) {
2277 out
<< " autoscale_mode " << p
.get_pg_autoscale_mode_name(p
.pg_autoscale_mode
);
2279 out
<< " last_change " << p
.get_last_change();
2280 if (p
.get_last_force_op_resend() ||
2281 p
.get_last_force_op_resend_prenautilus() ||
2282 p
.get_last_force_op_resend_preluminous())
2283 out
<< " lfor " << p
.get_last_force_op_resend() << "/"
2284 << p
.get_last_force_op_resend_prenautilus() << "/"
2285 << p
.get_last_force_op_resend_preluminous();
2287 out
<< " owner " << p
.get_auid();
2289 out
<< " flags " << p
.get_flags_string();
2290 if (p
.quota_max_bytes
)
2291 out
<< " max_bytes " << p
.quota_max_bytes
;
2292 if (p
.quota_max_objects
)
2293 out
<< " max_objects " << p
.quota_max_objects
;
2294 if (!p
.tiers
.empty())
2295 out
<< " tiers " << p
.tiers
;
2297 out
<< " tier_of " << p
.tier_of
;
2298 if (p
.has_read_tier())
2299 out
<< " read_tier " << p
.read_tier
;
2300 if (p
.has_write_tier())
2301 out
<< " write_tier " << p
.write_tier
;
2303 out
<< " cache_mode " << p
.get_cache_mode_name();
2304 if (p
.target_max_bytes
)
2305 out
<< " target_bytes " << p
.target_max_bytes
;
2306 if (p
.target_max_objects
)
2307 out
<< " target_objects " << p
.target_max_objects
;
2308 if (p
.hit_set_params
.get_type() != HitSet::TYPE_NONE
) {
2309 out
<< " hit_set " << p
.hit_set_params
2310 << " " << p
.hit_set_period
<< "s"
2311 << " x" << p
.hit_set_count
<< " decay_rate "
2312 << p
.hit_set_grade_decay_rate
2313 << " search_last_n " << p
.hit_set_search_last_n
;
2315 if (p
.min_read_recency_for_promote
)
2316 out
<< " min_read_recency_for_promote " << p
.min_read_recency_for_promote
;
2317 if (p
.min_write_recency_for_promote
)
2318 out
<< " min_write_recency_for_promote " << p
.min_write_recency_for_promote
;
2319 out
<< " stripe_width " << p
.get_stripe_width();
2320 if (p
.expected_num_objects
)
2321 out
<< " expected_num_objects " << p
.expected_num_objects
;
2323 out
<< " fast_read " << p
.fast_read
;
2325 if (!p
.application_metadata
.empty()) {
2326 out
<< " application ";
2327 for (auto it
= p
.application_metadata
.begin();
2328 it
!= p
.application_metadata
.end(); ++it
) {
2329 if (it
!= p
.application_metadata
.begin())
2338 // -- object_stat_sum_t --
2340 void object_stat_sum_t::dump(Formatter
*f
) const
2342 f
->dump_int("num_bytes", num_bytes
);
2343 f
->dump_int("num_objects", num_objects
);
2344 f
->dump_int("num_object_clones", num_object_clones
);
2345 f
->dump_int("num_object_copies", num_object_copies
);
2346 f
->dump_int("num_objects_missing_on_primary", num_objects_missing_on_primary
);
2347 f
->dump_int("num_objects_missing", num_objects_missing
);
2348 f
->dump_int("num_objects_degraded", num_objects_degraded
);
2349 f
->dump_int("num_objects_misplaced", num_objects_misplaced
);
2350 f
->dump_int("num_objects_unfound", num_objects_unfound
);
2351 f
->dump_int("num_objects_dirty", num_objects_dirty
);
2352 f
->dump_int("num_whiteouts", num_whiteouts
);
2353 f
->dump_int("num_read", num_rd
);
2354 f
->dump_int("num_read_kb", num_rd_kb
);
2355 f
->dump_int("num_write", num_wr
);
2356 f
->dump_int("num_write_kb", num_wr_kb
);
2357 f
->dump_int("num_scrub_errors", num_scrub_errors
);
2358 f
->dump_int("num_shallow_scrub_errors", num_shallow_scrub_errors
);
2359 f
->dump_int("num_deep_scrub_errors", num_deep_scrub_errors
);
2360 f
->dump_int("num_objects_recovered", num_objects_recovered
);
2361 f
->dump_int("num_bytes_recovered", num_bytes_recovered
);
2362 f
->dump_int("num_keys_recovered", num_keys_recovered
);
2363 f
->dump_int("num_objects_omap", num_objects_omap
);
2364 f
->dump_int("num_objects_hit_set_archive", num_objects_hit_set_archive
);
2365 f
->dump_int("num_bytes_hit_set_archive", num_bytes_hit_set_archive
);
2366 f
->dump_int("num_flush", num_flush
);
2367 f
->dump_int("num_flush_kb", num_flush_kb
);
2368 f
->dump_int("num_evict", num_evict
);
2369 f
->dump_int("num_evict_kb", num_evict_kb
);
2370 f
->dump_int("num_promote", num_promote
);
2371 f
->dump_int("num_flush_mode_high", num_flush_mode_high
);
2372 f
->dump_int("num_flush_mode_low", num_flush_mode_low
);
2373 f
->dump_int("num_evict_mode_some", num_evict_mode_some
);
2374 f
->dump_int("num_evict_mode_full", num_evict_mode_full
);
2375 f
->dump_int("num_objects_pinned", num_objects_pinned
);
2376 f
->dump_int("num_legacy_snapsets", num_legacy_snapsets
);
2377 f
->dump_int("num_large_omap_objects", num_large_omap_objects
);
2378 f
->dump_int("num_objects_manifest", num_objects_manifest
);
2379 f
->dump_int("num_omap_bytes", num_omap_bytes
);
2380 f
->dump_int("num_omap_keys", num_omap_keys
);
2381 f
->dump_int("num_objects_repaired", num_objects_repaired
);
2384 void object_stat_sum_t::encode(bufferlist
& bl
) const
2386 ENCODE_START(20, 14, bl
);
2387 #if defined(CEPH_LITTLE_ENDIAN)
2388 bl
.append((char *)(&num_bytes
), sizeof(object_stat_sum_t
));
2390 encode(num_bytes
, bl
);
2391 encode(num_objects
, bl
);
2392 encode(num_object_clones
, bl
);
2393 encode(num_object_copies
, bl
);
2394 encode(num_objects_missing_on_primary
, bl
);
2395 encode(num_objects_degraded
, bl
);
2396 encode(num_objects_unfound
, bl
);
2398 encode(num_rd_kb
, bl
);
2400 encode(num_wr_kb
, bl
);
2401 encode(num_scrub_errors
, bl
);
2402 encode(num_objects_recovered
, bl
);
2403 encode(num_bytes_recovered
, bl
);
2404 encode(num_keys_recovered
, bl
);
2405 encode(num_shallow_scrub_errors
, bl
);
2406 encode(num_deep_scrub_errors
, bl
);
2407 encode(num_objects_dirty
, bl
);
2408 encode(num_whiteouts
, bl
);
2409 encode(num_objects_omap
, bl
);
2410 encode(num_objects_hit_set_archive
, bl
);
2411 encode(num_objects_misplaced
, bl
);
2412 encode(num_bytes_hit_set_archive
, bl
);
2413 encode(num_flush
, bl
);
2414 encode(num_flush_kb
, bl
);
2415 encode(num_evict
, bl
);
2416 encode(num_evict_kb
, bl
);
2417 encode(num_promote
, bl
);
2418 encode(num_flush_mode_high
, bl
);
2419 encode(num_flush_mode_low
, bl
);
2420 encode(num_evict_mode_some
, bl
);
2421 encode(num_evict_mode_full
, bl
);
2422 encode(num_objects_pinned
, bl
);
2423 encode(num_objects_missing
, bl
);
2424 encode(num_legacy_snapsets
, bl
);
2425 encode(num_large_omap_objects
, bl
);
2426 encode(num_objects_manifest
, bl
);
2427 encode(num_omap_bytes
, bl
);
2428 encode(num_omap_keys
, bl
);
2429 encode(num_objects_repaired
, bl
);
2434 void object_stat_sum_t::decode(bufferlist::const_iterator
& bl
)
2436 bool decode_finish
= false;
2437 static const int STAT_SUM_DECODE_VERSION
= 20;
2438 DECODE_START(STAT_SUM_DECODE_VERSION
, bl
);
2439 #if defined(CEPH_LITTLE_ENDIAN)
2440 if (struct_v
== STAT_SUM_DECODE_VERSION
) {
2441 bl
.copy(sizeof(object_stat_sum_t
), (char*)(&num_bytes
));
2442 decode_finish
= true;
2445 if (!decode_finish
) {
2446 decode(num_bytes
, bl
);
2447 decode(num_objects
, bl
);
2448 decode(num_object_clones
, bl
);
2449 decode(num_object_copies
, bl
);
2450 decode(num_objects_missing_on_primary
, bl
);
2451 decode(num_objects_degraded
, bl
);
2452 decode(num_objects_unfound
, bl
);
2454 decode(num_rd_kb
, bl
);
2456 decode(num_wr_kb
, bl
);
2457 decode(num_scrub_errors
, bl
);
2458 decode(num_objects_recovered
, bl
);
2459 decode(num_bytes_recovered
, bl
);
2460 decode(num_keys_recovered
, bl
);
2461 decode(num_shallow_scrub_errors
, bl
);
2462 decode(num_deep_scrub_errors
, bl
);
2463 decode(num_objects_dirty
, bl
);
2464 decode(num_whiteouts
, bl
);
2465 decode(num_objects_omap
, bl
);
2466 decode(num_objects_hit_set_archive
, bl
);
2467 decode(num_objects_misplaced
, bl
);
2468 decode(num_bytes_hit_set_archive
, bl
);
2469 decode(num_flush
, bl
);
2470 decode(num_flush_kb
, bl
);
2471 decode(num_evict
, bl
);
2472 decode(num_evict_kb
, bl
);
2473 decode(num_promote
, bl
);
2474 decode(num_flush_mode_high
, bl
);
2475 decode(num_flush_mode_low
, bl
);
2476 decode(num_evict_mode_some
, bl
);
2477 decode(num_evict_mode_full
, bl
);
2478 decode(num_objects_pinned
, bl
);
2479 decode(num_objects_missing
, bl
);
2480 if (struct_v
>= 16) {
2481 decode(num_legacy_snapsets
, bl
);
2483 num_legacy_snapsets
= num_object_clones
; // upper bound
2485 if (struct_v
>= 17) {
2486 decode(num_large_omap_objects
, bl
);
2488 if (struct_v
>= 18) {
2489 decode(num_objects_manifest
, bl
);
2491 if (struct_v
>= 19) {
2492 decode(num_omap_bytes
, bl
);
2493 decode(num_omap_keys
, bl
);
2495 if (struct_v
>= 20) {
2496 decode(num_objects_repaired
, bl
);
2502 void object_stat_sum_t::generate_test_instances(list
<object_stat_sum_t
*>& o
)
2504 object_stat_sum_t a
;
2508 a
.num_object_clones
= 4;
2509 a
.num_object_copies
= 5;
2510 a
.num_objects_missing_on_primary
= 6;
2511 a
.num_objects_missing
= 123;
2512 a
.num_objects_degraded
= 7;
2513 a
.num_objects_unfound
= 8;
2514 a
.num_rd
= 9; a
.num_rd_kb
= 10;
2515 a
.num_wr
= 11; a
.num_wr_kb
= 12;
2516 a
.num_objects_recovered
= 14;
2517 a
.num_bytes_recovered
= 15;
2518 a
.num_keys_recovered
= 16;
2519 a
.num_deep_scrub_errors
= 17;
2520 a
.num_shallow_scrub_errors
= 18;
2521 a
.num_scrub_errors
= a
.num_deep_scrub_errors
+ a
.num_shallow_scrub_errors
;
2522 a
.num_objects_dirty
= 21;
2523 a
.num_whiteouts
= 22;
2524 a
.num_objects_misplaced
= 1232;
2525 a
.num_objects_hit_set_archive
= 2;
2526 a
.num_bytes_hit_set_archive
= 27;
2532 a
.num_flush_mode_high
= 0;
2533 a
.num_flush_mode_low
= 1;
2534 a
.num_evict_mode_some
= 1;
2535 a
.num_evict_mode_full
= 0;
2536 a
.num_objects_pinned
= 20;
2537 a
.num_large_omap_objects
= 5;
2538 a
.num_objects_manifest
= 2;
2539 a
.num_omap_bytes
= 20000;
2540 a
.num_omap_keys
= 200;
2541 a
.num_objects_repaired
= 300;
2542 o
.push_back(new object_stat_sum_t(a
));
2545 void object_stat_sum_t::add(const object_stat_sum_t
& o
)
2547 num_bytes
+= o
.num_bytes
;
2548 num_objects
+= o
.num_objects
;
2549 num_object_clones
+= o
.num_object_clones
;
2550 num_object_copies
+= o
.num_object_copies
;
2551 num_objects_missing_on_primary
+= o
.num_objects_missing_on_primary
;
2552 num_objects_missing
+= o
.num_objects_missing
;
2553 num_objects_degraded
+= o
.num_objects_degraded
;
2554 num_objects_misplaced
+= o
.num_objects_misplaced
;
2556 num_rd_kb
+= o
.num_rd_kb
;
2558 num_wr_kb
+= o
.num_wr_kb
;
2559 num_objects_unfound
+= o
.num_objects_unfound
;
2560 num_scrub_errors
+= o
.num_scrub_errors
;
2561 num_shallow_scrub_errors
+= o
.num_shallow_scrub_errors
;
2562 num_deep_scrub_errors
+= o
.num_deep_scrub_errors
;
2563 num_objects_recovered
+= o
.num_objects_recovered
;
2564 num_bytes_recovered
+= o
.num_bytes_recovered
;
2565 num_keys_recovered
+= o
.num_keys_recovered
;
2566 num_objects_dirty
+= o
.num_objects_dirty
;
2567 num_whiteouts
+= o
.num_whiteouts
;
2568 num_objects_omap
+= o
.num_objects_omap
;
2569 num_objects_hit_set_archive
+= o
.num_objects_hit_set_archive
;
2570 num_bytes_hit_set_archive
+= o
.num_bytes_hit_set_archive
;
2571 num_flush
+= o
.num_flush
;
2572 num_flush_kb
+= o
.num_flush_kb
;
2573 num_evict
+= o
.num_evict
;
2574 num_evict_kb
+= o
.num_evict_kb
;
2575 num_promote
+= o
.num_promote
;
2576 num_flush_mode_high
+= o
.num_flush_mode_high
;
2577 num_flush_mode_low
+= o
.num_flush_mode_low
;
2578 num_evict_mode_some
+= o
.num_evict_mode_some
;
2579 num_evict_mode_full
+= o
.num_evict_mode_full
;
2580 num_objects_pinned
+= o
.num_objects_pinned
;
2581 num_legacy_snapsets
+= o
.num_legacy_snapsets
;
2582 num_large_omap_objects
+= o
.num_large_omap_objects
;
2583 num_objects_manifest
+= o
.num_objects_manifest
;
2584 num_omap_bytes
+= o
.num_omap_bytes
;
2585 num_omap_keys
+= o
.num_omap_keys
;
2586 num_objects_repaired
+= o
.num_objects_repaired
;
2589 void object_stat_sum_t::sub(const object_stat_sum_t
& o
)
2591 num_bytes
-= o
.num_bytes
;
2592 num_objects
-= o
.num_objects
;
2593 num_object_clones
-= o
.num_object_clones
;
2594 num_object_copies
-= o
.num_object_copies
;
2595 num_objects_missing_on_primary
-= o
.num_objects_missing_on_primary
;
2596 num_objects_missing
-= o
.num_objects_missing
;
2597 num_objects_degraded
-= o
.num_objects_degraded
;
2598 num_objects_misplaced
-= o
.num_objects_misplaced
;
2600 num_rd_kb
-= o
.num_rd_kb
;
2602 num_wr_kb
-= o
.num_wr_kb
;
2603 num_objects_unfound
-= o
.num_objects_unfound
;
2604 num_scrub_errors
-= o
.num_scrub_errors
;
2605 num_shallow_scrub_errors
-= o
.num_shallow_scrub_errors
;
2606 num_deep_scrub_errors
-= o
.num_deep_scrub_errors
;
2607 num_objects_recovered
-= o
.num_objects_recovered
;
2608 num_bytes_recovered
-= o
.num_bytes_recovered
;
2609 num_keys_recovered
-= o
.num_keys_recovered
;
2610 num_objects_dirty
-= o
.num_objects_dirty
;
2611 num_whiteouts
-= o
.num_whiteouts
;
2612 num_objects_omap
-= o
.num_objects_omap
;
2613 num_objects_hit_set_archive
-= o
.num_objects_hit_set_archive
;
2614 num_bytes_hit_set_archive
-= o
.num_bytes_hit_set_archive
;
2615 num_flush
-= o
.num_flush
;
2616 num_flush_kb
-= o
.num_flush_kb
;
2617 num_evict
-= o
.num_evict
;
2618 num_evict_kb
-= o
.num_evict_kb
;
2619 num_promote
-= o
.num_promote
;
2620 num_flush_mode_high
-= o
.num_flush_mode_high
;
2621 num_flush_mode_low
-= o
.num_flush_mode_low
;
2622 num_evict_mode_some
-= o
.num_evict_mode_some
;
2623 num_evict_mode_full
-= o
.num_evict_mode_full
;
2624 num_objects_pinned
-= o
.num_objects_pinned
;
2625 num_legacy_snapsets
-= o
.num_legacy_snapsets
;
2626 num_large_omap_objects
-= o
.num_large_omap_objects
;
2627 num_objects_manifest
-= o
.num_objects_manifest
;
2628 num_omap_bytes
-= o
.num_omap_bytes
;
2629 num_omap_keys
-= o
.num_omap_keys
;
2630 num_objects_repaired
-= o
.num_objects_repaired
;
2633 bool operator==(const object_stat_sum_t
& l
, const object_stat_sum_t
& r
)
2636 l
.num_bytes
== r
.num_bytes
&&
2637 l
.num_objects
== r
.num_objects
&&
2638 l
.num_object_clones
== r
.num_object_clones
&&
2639 l
.num_object_copies
== r
.num_object_copies
&&
2640 l
.num_objects_missing_on_primary
== r
.num_objects_missing_on_primary
&&
2641 l
.num_objects_missing
== r
.num_objects_missing
&&
2642 l
.num_objects_degraded
== r
.num_objects_degraded
&&
2643 l
.num_objects_misplaced
== r
.num_objects_misplaced
&&
2644 l
.num_objects_unfound
== r
.num_objects_unfound
&&
2645 l
.num_rd
== r
.num_rd
&&
2646 l
.num_rd_kb
== r
.num_rd_kb
&&
2647 l
.num_wr
== r
.num_wr
&&
2648 l
.num_wr_kb
== r
.num_wr_kb
&&
2649 l
.num_scrub_errors
== r
.num_scrub_errors
&&
2650 l
.num_shallow_scrub_errors
== r
.num_shallow_scrub_errors
&&
2651 l
.num_deep_scrub_errors
== r
.num_deep_scrub_errors
&&
2652 l
.num_objects_recovered
== r
.num_objects_recovered
&&
2653 l
.num_bytes_recovered
== r
.num_bytes_recovered
&&
2654 l
.num_keys_recovered
== r
.num_keys_recovered
&&
2655 l
.num_objects_dirty
== r
.num_objects_dirty
&&
2656 l
.num_whiteouts
== r
.num_whiteouts
&&
2657 l
.num_objects_omap
== r
.num_objects_omap
&&
2658 l
.num_objects_hit_set_archive
== r
.num_objects_hit_set_archive
&&
2659 l
.num_bytes_hit_set_archive
== r
.num_bytes_hit_set_archive
&&
2660 l
.num_flush
== r
.num_flush
&&
2661 l
.num_flush_kb
== r
.num_flush_kb
&&
2662 l
.num_evict
== r
.num_evict
&&
2663 l
.num_evict_kb
== r
.num_evict_kb
&&
2664 l
.num_promote
== r
.num_promote
&&
2665 l
.num_flush_mode_high
== r
.num_flush_mode_high
&&
2666 l
.num_flush_mode_low
== r
.num_flush_mode_low
&&
2667 l
.num_evict_mode_some
== r
.num_evict_mode_some
&&
2668 l
.num_evict_mode_full
== r
.num_evict_mode_full
&&
2669 l
.num_objects_pinned
== r
.num_objects_pinned
&&
2670 l
.num_legacy_snapsets
== r
.num_legacy_snapsets
&&
2671 l
.num_large_omap_objects
== r
.num_large_omap_objects
&&
2672 l
.num_objects_manifest
== r
.num_objects_manifest
&&
2673 l
.num_omap_bytes
== r
.num_omap_bytes
&&
2674 l
.num_omap_keys
== r
.num_omap_keys
&&
2675 l
.num_objects_repaired
== r
.num_objects_repaired
;
2678 // -- object_stat_collection_t --
2680 void object_stat_collection_t::dump(Formatter
*f
) const
2682 f
->open_object_section("stat_sum");
2687 void object_stat_collection_t::encode(bufferlist
& bl
) const
2689 ENCODE_START(2, 2, bl
);
2691 encode((__u32
)0, bl
);
2695 void object_stat_collection_t::decode(bufferlist::const_iterator
& bl
)
2697 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl
);
2700 map
<string
,object_stat_sum_t
> cat_sum
;
2701 decode(cat_sum
, bl
);
2706 void object_stat_collection_t::generate_test_instances(list
<object_stat_collection_t
*>& o
)
2708 object_stat_collection_t a
;
2709 o
.push_back(new object_stat_collection_t(a
));
2710 list
<object_stat_sum_t
*> l
;
2711 object_stat_sum_t::generate_test_instances(l
);
2712 for (list
<object_stat_sum_t
*>::iterator p
= l
.begin(); p
!= l
.end(); ++p
) {
2714 o
.push_back(new object_stat_collection_t(a
));
2721 bool pg_stat_t::is_acting_osd(int32_t osd
, bool primary
) const
2723 if (primary
&& osd
== acting_primary
) {
2725 } else if (!primary
) {
2726 for(vector
<int32_t>::const_iterator it
= acting
.begin();
2727 it
!= acting
.end(); ++it
)
2736 void pg_stat_t::dump(Formatter
*f
) const
2738 f
->dump_stream("version") << version
;
2739 f
->dump_stream("reported_seq") << reported_seq
;
2740 f
->dump_stream("reported_epoch") << reported_epoch
;
2741 f
->dump_string("state", pg_state_string(state
));
2742 f
->dump_stream("last_fresh") << last_fresh
;
2743 f
->dump_stream("last_change") << last_change
;
2744 f
->dump_stream("last_active") << last_active
;
2745 f
->dump_stream("last_peered") << last_peered
;
2746 f
->dump_stream("last_clean") << last_clean
;
2747 f
->dump_stream("last_became_active") << last_became_active
;
2748 f
->dump_stream("last_became_peered") << last_became_peered
;
2749 f
->dump_stream("last_unstale") << last_unstale
;
2750 f
->dump_stream("last_undegraded") << last_undegraded
;
2751 f
->dump_stream("last_fullsized") << last_fullsized
;
2752 f
->dump_unsigned("mapping_epoch", mapping_epoch
);
2753 f
->dump_stream("log_start") << log_start
;
2754 f
->dump_stream("ondisk_log_start") << ondisk_log_start
;
2755 f
->dump_unsigned("created", created
);
2756 f
->dump_unsigned("last_epoch_clean", last_epoch_clean
);
2757 f
->dump_stream("parent") << parent
;
2758 f
->dump_unsigned("parent_split_bits", parent_split_bits
);
2759 f
->dump_stream("last_scrub") << last_scrub
;
2760 f
->dump_stream("last_scrub_stamp") << last_scrub_stamp
;
2761 f
->dump_stream("last_deep_scrub") << last_deep_scrub
;
2762 f
->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp
;
2763 f
->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp
;
2764 f
->dump_int("log_size", log_size
);
2765 f
->dump_int("ondisk_log_size", ondisk_log_size
);
2766 f
->dump_bool("stats_invalid", stats_invalid
);
2767 f
->dump_bool("dirty_stats_invalid", dirty_stats_invalid
);
2768 f
->dump_bool("omap_stats_invalid", omap_stats_invalid
);
2769 f
->dump_bool("hitset_stats_invalid", hitset_stats_invalid
);
2770 f
->dump_bool("hitset_bytes_stats_invalid", hitset_bytes_stats_invalid
);
2771 f
->dump_bool("pin_stats_invalid", pin_stats_invalid
);
2772 f
->dump_bool("manifest_stats_invalid", manifest_stats_invalid
);
2773 f
->dump_unsigned("snaptrimq_len", snaptrimq_len
);
2775 f
->open_array_section("up");
2776 for (vector
<int32_t>::const_iterator p
= up
.begin(); p
!= up
.end(); ++p
)
2777 f
->dump_int("osd", *p
);
2779 f
->open_array_section("acting");
2780 for (vector
<int32_t>::const_iterator p
= acting
.begin(); p
!= acting
.end(); ++p
)
2781 f
->dump_int("osd", *p
);
2783 f
->open_array_section("avail_no_missing");
2784 for (auto p
= avail_no_missing
.cbegin(); p
!= avail_no_missing
.cend(); ++p
)
2785 f
->dump_stream("shard") << *p
;
2787 f
->open_array_section("object_location_counts");
2788 for (auto p
= object_location_counts
.cbegin(); p
!= object_location_counts
.cend(); ++p
) {
2789 f
->open_object_section("entry");
2790 f
->dump_stream("shards") << p
->first
;
2791 f
->dump_int("objects", p
->second
);
2795 f
->open_array_section("blocked_by");
2796 for (vector
<int32_t>::const_iterator p
= blocked_by
.begin();
2797 p
!= blocked_by
.end(); ++p
)
2798 f
->dump_int("osd", *p
);
2800 f
->dump_int("up_primary", up_primary
);
2801 f
->dump_int("acting_primary", acting_primary
);
2802 f
->open_array_section("purged_snaps");
2803 for (interval_set
<snapid_t
>::const_iterator i
= purged_snaps
.begin();
2804 i
!= purged_snaps
.end();
2806 f
->open_object_section("interval");
2807 f
->dump_stream("start") << i
.get_start();
2808 f
->dump_stream("length") << i
.get_len();
2814 void pg_stat_t::dump_brief(Formatter
*f
) const
2816 f
->dump_string("state", pg_state_string(state
));
2817 f
->open_array_section("up");
2818 for (vector
<int32_t>::const_iterator p
= up
.begin(); p
!= up
.end(); ++p
)
2819 f
->dump_int("osd", *p
);
2821 f
->open_array_section("acting");
2822 for (vector
<int32_t>::const_iterator p
= acting
.begin(); p
!= acting
.end(); ++p
)
2823 f
->dump_int("osd", *p
);
2825 f
->dump_int("up_primary", up_primary
);
2826 f
->dump_int("acting_primary", acting_primary
);
2829 void pg_stat_t::encode(bufferlist
&bl
) const
2831 ENCODE_START(26, 22, bl
);
2832 encode(version
, bl
);
2833 encode(reported_seq
, bl
);
2834 encode(reported_epoch
, bl
);
2835 encode((__u32
)state
, bl
); // for older peers
2836 encode(log_start
, bl
);
2837 encode(ondisk_log_start
, bl
);
2838 encode(created
, bl
);
2839 encode(last_epoch_clean
, bl
);
2841 encode(parent_split_bits
, bl
);
2842 encode(last_scrub
, bl
);
2843 encode(last_scrub_stamp
, bl
);
2845 encode(log_size
, bl
);
2846 encode(ondisk_log_size
, bl
);
2849 encode(last_fresh
, bl
);
2850 encode(last_change
, bl
);
2851 encode(last_active
, bl
);
2852 encode(last_clean
, bl
);
2853 encode(last_unstale
, bl
);
2854 encode(mapping_epoch
, bl
);
2855 encode(last_deep_scrub
, bl
);
2856 encode(last_deep_scrub_stamp
, bl
);
2857 encode(stats_invalid
, bl
);
2858 encode(last_clean_scrub_stamp
, bl
);
2859 encode(last_became_active
, bl
);
2860 encode(dirty_stats_invalid
, bl
);
2861 encode(up_primary
, bl
);
2862 encode(acting_primary
, bl
);
2863 encode(omap_stats_invalid
, bl
);
2864 encode(hitset_stats_invalid
, bl
);
2865 encode(blocked_by
, bl
);
2866 encode(last_undegraded
, bl
);
2867 encode(last_fullsized
, bl
);
2868 encode(hitset_bytes_stats_invalid
, bl
);
2869 encode(last_peered
, bl
);
2870 encode(last_became_peered
, bl
);
2871 encode(pin_stats_invalid
, bl
);
2872 encode(snaptrimq_len
, bl
);
2873 __u32 top_state
= (state
>> 32);
2874 encode(top_state
, bl
);
2875 encode(purged_snaps
, bl
);
2876 encode(manifest_stats_invalid
, bl
);
2877 encode(avail_no_missing
, bl
);
2878 encode(object_location_counts
, bl
);
2882 void pg_stat_t::decode(bufferlist::const_iterator
&bl
)
2886 DECODE_START(26, bl
);
2887 decode(version
, bl
);
2888 decode(reported_seq
, bl
);
2889 decode(reported_epoch
, bl
);
2890 decode(old_state
, bl
);
2891 decode(log_start
, bl
);
2892 decode(ondisk_log_start
, bl
);
2893 decode(created
, bl
);
2894 decode(last_epoch_clean
, bl
);
2896 decode(parent_split_bits
, bl
);
2897 decode(last_scrub
, bl
);
2898 decode(last_scrub_stamp
, bl
);
2900 decode(log_size
, bl
);
2901 decode(ondisk_log_size
, bl
);
2904 decode(last_fresh
, bl
);
2905 decode(last_change
, bl
);
2906 decode(last_active
, bl
);
2907 decode(last_clean
, bl
);
2908 decode(last_unstale
, bl
);
2909 decode(mapping_epoch
, bl
);
2910 decode(last_deep_scrub
, bl
);
2911 decode(last_deep_scrub_stamp
, bl
);
2913 stats_invalid
= tmp
;
2914 decode(last_clean_scrub_stamp
, bl
);
2915 decode(last_became_active
, bl
);
2917 dirty_stats_invalid
= tmp
;
2918 decode(up_primary
, bl
);
2919 decode(acting_primary
, bl
);
2921 omap_stats_invalid
= tmp
;
2923 hitset_stats_invalid
= tmp
;
2924 decode(blocked_by
, bl
);
2925 decode(last_undegraded
, bl
);
2926 decode(last_fullsized
, bl
);
2928 hitset_bytes_stats_invalid
= tmp
;
2929 decode(last_peered
, bl
);
2930 decode(last_became_peered
, bl
);
2932 pin_stats_invalid
= tmp
;
2933 if (struct_v
>= 23) {
2934 decode(snaptrimq_len
, bl
);
2935 if (struct_v
>= 24) {
2937 decode(top_state
, bl
);
2938 state
= (uint64_t)old_state
| ((uint64_t)top_state
<< 32);
2939 decode(purged_snaps
, bl
);
2943 if (struct_v
>= 25) {
2945 manifest_stats_invalid
= tmp
;
2947 manifest_stats_invalid
= true;
2949 if (struct_v
>= 26) {
2950 decode(avail_no_missing
, bl
);
2951 decode(object_location_counts
, bl
);
2957 void pg_stat_t::generate_test_instances(list
<pg_stat_t
*>& o
)
2960 o
.push_back(new pg_stat_t(a
));
2962 a
.version
= eversion_t(1, 3);
2963 a
.reported_epoch
= 1;
2966 a
.mapping_epoch
= 998;
2967 a
.last_fresh
= utime_t(1002, 1);
2968 a
.last_change
= utime_t(1002, 2);
2969 a
.last_active
= utime_t(1002, 3);
2970 a
.last_clean
= utime_t(1002, 4);
2971 a
.last_unstale
= utime_t(1002, 5);
2972 a
.last_undegraded
= utime_t(1002, 7);
2973 a
.last_fullsized
= utime_t(1002, 8);
2974 a
.log_start
= eversion_t(1, 4);
2975 a
.ondisk_log_start
= eversion_t(1, 5);
2977 a
.last_epoch_clean
= 7;
2978 a
.parent
= pg_t(1, 2);
2979 a
.parent_split_bits
= 12;
2980 a
.last_scrub
= eversion_t(9, 10);
2981 a
.last_scrub_stamp
= utime_t(11, 12);
2982 a
.last_deep_scrub
= eversion_t(13, 14);
2983 a
.last_deep_scrub_stamp
= utime_t(15, 16);
2984 a
.last_clean_scrub_stamp
= utime_t(17, 18);
2985 a
.snaptrimq_len
= 1048576;
2986 list
<object_stat_collection_t
*> l
;
2987 object_stat_collection_t::generate_test_instances(l
);
2988 a
.stats
= *l
.back();
2990 a
.ondisk_log_size
= 88;
2991 a
.up
.push_back(123);
2993 a
.acting
.push_back(456);
2994 a
.avail_no_missing
.push_back(pg_shard_t(456, shard_id_t::NO_SHARD
));
2995 set
<pg_shard_t
> sset
= { pg_shard_t(0), pg_shard_t(1) };
2996 a
.object_location_counts
.insert(make_pair(sset
, 10));
2997 sset
.insert(pg_shard_t(2));
2998 a
.object_location_counts
.insert(make_pair(sset
, 5));
2999 a
.acting_primary
= 456;
3000 o
.push_back(new pg_stat_t(a
));
3002 a
.up
.push_back(124);
3004 a
.acting
.push_back(124);
3005 a
.acting_primary
= 124;
3006 a
.blocked_by
.push_back(155);
3007 a
.blocked_by
.push_back(156);
3008 o
.push_back(new pg_stat_t(a
));
3011 bool operator==(const pg_stat_t
& l
, const pg_stat_t
& r
)
3014 l
.version
== r
.version
&&
3015 l
.reported_seq
== r
.reported_seq
&&
3016 l
.reported_epoch
== r
.reported_epoch
&&
3017 l
.state
== r
.state
&&
3018 l
.last_fresh
== r
.last_fresh
&&
3019 l
.last_change
== r
.last_change
&&
3020 l
.last_active
== r
.last_active
&&
3021 l
.last_peered
== r
.last_peered
&&
3022 l
.last_clean
== r
.last_clean
&&
3023 l
.last_unstale
== r
.last_unstale
&&
3024 l
.last_undegraded
== r
.last_undegraded
&&
3025 l
.last_fullsized
== r
.last_fullsized
&&
3026 l
.log_start
== r
.log_start
&&
3027 l
.ondisk_log_start
== r
.ondisk_log_start
&&
3028 l
.created
== r
.created
&&
3029 l
.last_epoch_clean
== r
.last_epoch_clean
&&
3030 l
.parent
== r
.parent
&&
3031 l
.parent_split_bits
== r
.parent_split_bits
&&
3032 l
.last_scrub
== r
.last_scrub
&&
3033 l
.last_deep_scrub
== r
.last_deep_scrub
&&
3034 l
.last_scrub_stamp
== r
.last_scrub_stamp
&&
3035 l
.last_deep_scrub_stamp
== r
.last_deep_scrub_stamp
&&
3036 l
.last_clean_scrub_stamp
== r
.last_clean_scrub_stamp
&&
3037 l
.stats
== r
.stats
&&
3038 l
.stats_invalid
== r
.stats_invalid
&&
3039 l
.log_size
== r
.log_size
&&
3040 l
.ondisk_log_size
== r
.ondisk_log_size
&&
3042 l
.acting
== r
.acting
&&
3043 l
.avail_no_missing
== r
.avail_no_missing
&&
3044 l
.object_location_counts
== r
.object_location_counts
&&
3045 l
.mapping_epoch
== r
.mapping_epoch
&&
3046 l
.blocked_by
== r
.blocked_by
&&
3047 l
.last_became_active
== r
.last_became_active
&&
3048 l
.last_became_peered
== r
.last_became_peered
&&
3049 l
.dirty_stats_invalid
== r
.dirty_stats_invalid
&&
3050 l
.omap_stats_invalid
== r
.omap_stats_invalid
&&
3051 l
.hitset_stats_invalid
== r
.hitset_stats_invalid
&&
3052 l
.hitset_bytes_stats_invalid
== r
.hitset_bytes_stats_invalid
&&
3053 l
.up_primary
== r
.up_primary
&&
3054 l
.acting_primary
== r
.acting_primary
&&
3055 l
.pin_stats_invalid
== r
.pin_stats_invalid
&&
3056 l
.manifest_stats_invalid
== r
.manifest_stats_invalid
&&
3057 l
.purged_snaps
== r
.purged_snaps
&&
3058 l
.snaptrimq_len
== r
.snaptrimq_len
;
3061 // -- store_statfs_t --
3063 bool store_statfs_t::operator==(const store_statfs_t
& other
) const
3065 return total
== other
.total
3066 && available
== other
.available
3067 && allocated
== other
.allocated
3068 && internally_reserved
== other
.internally_reserved
3069 && data_stored
== other
.data_stored
3070 && data_compressed
== other
.data_compressed
3071 && data_compressed_allocated
== other
.data_compressed_allocated
3072 && data_compressed_original
== other
.data_compressed_original
3073 && omap_allocated
== other
.omap_allocated
3074 && internal_metadata
== other
.internal_metadata
;
3077 void store_statfs_t::dump(Formatter
*f
) const
3079 f
->dump_int("total", total
);
3080 f
->dump_int("available", available
);
3081 f
->dump_int("internally_reserved", internally_reserved
);
3082 f
->dump_int("allocated", allocated
);
3083 f
->dump_int("data_stored", data_stored
);
3084 f
->dump_int("data_compressed", data_compressed
);
3085 f
->dump_int("data_compressed_allocated", data_compressed_allocated
);
3086 f
->dump_int("data_compressed_original", data_compressed_original
);
3087 f
->dump_int("omap_allocated", omap_allocated
);
3088 f
->dump_int("internal_metadata", internal_metadata
);
3091 ostream
& operator<<(ostream
& out
, const store_statfs_t
&s
)
3094 << "store_statfs(0x" << s
.available
3095 << "/0x" << s
.internally_reserved
3097 << ", data 0x" << s
.data_stored
3098 << "/0x" << s
.allocated
3099 << ", compress 0x" << s
.data_compressed
3100 << "/0x" << s
.data_compressed_allocated
3101 << "/0x" << s
.data_compressed_original
3102 << ", omap 0x" << s
.omap_allocated
3103 << ", meta 0x" << s
.internal_metadata
3109 void store_statfs_t::generate_test_instances(list
<store_statfs_t
*>& o
)
3112 o
.push_back(new store_statfs_t(a
));
3115 a
.internally_reserved
= 33;
3118 a
.data_compressed
= 21;
3119 a
.data_compressed_allocated
= 12;
3120 a
.data_compressed_original
= 13;
3121 a
.omap_allocated
= 14;
3122 a
.internal_metadata
= 15;
3123 o
.push_back(new store_statfs_t(a
));
3126 // -- pool_stat_t --
3128 void pool_stat_t::dump(Formatter
*f
) const
3131 f
->open_object_section("store_stats");
3132 store_stats
.dump(f
);
3134 f
->dump_int("log_size", log_size
);
3135 f
->dump_int("ondisk_log_size", ondisk_log_size
);
3136 f
->dump_int("up", up
);
3137 f
->dump_int("acting", acting
);
3138 f
->dump_int("num_store_stats", num_store_stats
);
3141 void pool_stat_t::encode(bufferlist
&bl
, uint64_t features
) const
3144 if ((features
& CEPH_FEATURE_OSDENC
) == 0) {
3148 encode(log_size
, bl
);
3149 encode(ondisk_log_size
, bl
);
3153 ENCODE_START(7, 5, bl
);
3155 encode(log_size
, bl
);
3156 encode(ondisk_log_size
, bl
);
3159 encode(store_stats
, bl
);
3160 encode(num_store_stats
, bl
);
3164 void pool_stat_t::decode(bufferlist::const_iterator
&bl
)
3166 DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl
);
3167 if (struct_v
>= 4) {
3169 decode(log_size
, bl
);
3170 decode(ondisk_log_size
, bl
);
3171 if (struct_v
>= 6) {
3178 if (struct_v
>= 7) {
3179 decode(store_stats
, bl
);
3180 decode(num_store_stats
, bl
);
3182 store_stats
.reset();
3183 num_store_stats
= 0;
3187 decode(stats
.sum
.num_bytes
, bl
);
3190 decode(stats
.sum
.num_objects
, bl
);
3191 decode(stats
.sum
.num_object_clones
, bl
);
3192 decode(stats
.sum
.num_object_copies
, bl
);
3193 decode(stats
.sum
.num_objects_missing_on_primary
, bl
);
3194 decode(stats
.sum
.num_objects_degraded
, bl
);
3195 decode(log_size
, bl
);
3196 decode(ondisk_log_size
, bl
);
3197 if (struct_v
>= 2) {
3198 decode(stats
.sum
.num_rd
, bl
);
3199 decode(stats
.sum
.num_rd_kb
, bl
);
3200 decode(stats
.sum
.num_wr
, bl
);
3201 decode(stats
.sum
.num_wr_kb
, bl
);
3203 if (struct_v
>= 3) {
3204 decode(stats
.sum
.num_objects_unfound
, bl
);
3210 void pool_stat_t::generate_test_instances(list
<pool_stat_t
*>& o
)
3213 o
.push_back(new pool_stat_t(a
));
3215 list
<object_stat_collection_t
*> l
;
3216 object_stat_collection_t::generate_test_instances(l
);
3217 list
<store_statfs_t
*> ll
;
3218 store_statfs_t::generate_test_instances(ll
);
3219 a
.stats
= *l
.back();
3220 a
.store_stats
= *ll
.back();
3222 a
.ondisk_log_size
= 456;
3225 a
.num_store_stats
= 1;
3226 o
.push_back(new pool_stat_t(a
));
3230 // -- pg_history_t --
3232 void pg_history_t::encode(bufferlist
&bl
) const
3234 ENCODE_START(9, 4, bl
);
3235 encode(epoch_created
, bl
);
3236 encode(last_epoch_started
, bl
);
3237 encode(last_epoch_clean
, bl
);
3238 encode(last_epoch_split
, bl
);
3239 encode(same_interval_since
, bl
);
3240 encode(same_up_since
, bl
);
3241 encode(same_primary_since
, bl
);
3242 encode(last_scrub
, bl
);
3243 encode(last_scrub_stamp
, bl
);
3244 encode(last_deep_scrub
, bl
);
3245 encode(last_deep_scrub_stamp
, bl
);
3246 encode(last_clean_scrub_stamp
, bl
);
3247 encode(last_epoch_marked_full
, bl
);
3248 encode(last_interval_started
, bl
);
3249 encode(last_interval_clean
, bl
);
3250 encode(epoch_pool_created
, bl
);
3254 void pg_history_t::decode(bufferlist::const_iterator
&bl
)
3256 DECODE_START_LEGACY_COMPAT_LEN(9, 4, 4, bl
);
3257 decode(epoch_created
, bl
);
3258 decode(last_epoch_started
, bl
);
3260 decode(last_epoch_clean
, bl
);
3262 last_epoch_clean
= last_epoch_started
; // careful, it's a lie!
3263 decode(last_epoch_split
, bl
);
3264 decode(same_interval_since
, bl
);
3265 decode(same_up_since
, bl
);
3266 decode(same_primary_since
, bl
);
3267 if (struct_v
>= 2) {
3268 decode(last_scrub
, bl
);
3269 decode(last_scrub_stamp
, bl
);
3271 if (struct_v
>= 5) {
3272 decode(last_deep_scrub
, bl
);
3273 decode(last_deep_scrub_stamp
, bl
);
3275 if (struct_v
>= 6) {
3276 decode(last_clean_scrub_stamp
, bl
);
3278 if (struct_v
>= 7) {
3279 decode(last_epoch_marked_full
, bl
);
3281 if (struct_v
>= 8) {
3282 decode(last_interval_started
, bl
);
3283 decode(last_interval_clean
, bl
);
3285 if (last_epoch_started
>= same_interval_since
) {
3286 last_interval_started
= same_interval_since
;
3288 last_interval_started
= last_epoch_started
; // best guess
3290 if (last_epoch_clean
>= same_interval_since
) {
3291 last_interval_clean
= same_interval_since
;
3293 last_interval_clean
= last_epoch_clean
; // best guess
3296 if (struct_v
>= 9) {
3297 decode(epoch_pool_created
, bl
);
3299 epoch_pool_created
= epoch_created
;
3304 void pg_history_t::dump(Formatter
*f
) const
3306 f
->dump_int("epoch_created", epoch_created
);
3307 f
->dump_int("epoch_pool_created", epoch_pool_created
);
3308 f
->dump_int("last_epoch_started", last_epoch_started
);
3309 f
->dump_int("last_interval_started", last_interval_started
);
3310 f
->dump_int("last_epoch_clean", last_epoch_clean
);
3311 f
->dump_int("last_interval_clean", last_interval_clean
);
3312 f
->dump_int("last_epoch_split", last_epoch_split
);
3313 f
->dump_int("last_epoch_marked_full", last_epoch_marked_full
);
3314 f
->dump_int("same_up_since", same_up_since
);
3315 f
->dump_int("same_interval_since", same_interval_since
);
3316 f
->dump_int("same_primary_since", same_primary_since
);
3317 f
->dump_stream("last_scrub") << last_scrub
;
3318 f
->dump_stream("last_scrub_stamp") << last_scrub_stamp
;
3319 f
->dump_stream("last_deep_scrub") << last_deep_scrub
;
3320 f
->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp
;
3321 f
->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp
;
3324 void pg_history_t::generate_test_instances(list
<pg_history_t
*>& o
)
3326 o
.push_back(new pg_history_t
);
3327 o
.push_back(new pg_history_t
);
3328 o
.back()->epoch_created
= 1;
3329 o
.back()->epoch_pool_created
= 1;
3330 o
.back()->last_epoch_started
= 2;
3331 o
.back()->last_interval_started
= 2;
3332 o
.back()->last_epoch_clean
= 3;
3333 o
.back()->last_interval_clean
= 2;
3334 o
.back()->last_epoch_split
= 4;
3335 o
.back()->same_up_since
= 5;
3336 o
.back()->same_interval_since
= 6;
3337 o
.back()->same_primary_since
= 7;
3338 o
.back()->last_scrub
= eversion_t(8, 9);
3339 o
.back()->last_scrub_stamp
= utime_t(10, 11);
3340 o
.back()->last_deep_scrub
= eversion_t(12, 13);
3341 o
.back()->last_deep_scrub_stamp
= utime_t(14, 15);
3342 o
.back()->last_clean_scrub_stamp
= utime_t(16, 17);
3343 o
.back()->last_epoch_marked_full
= 18;
3349 void pg_info_t::encode(bufferlist
&bl
) const
3351 ENCODE_START(32, 26, bl
);
3352 encode(pgid
.pgid
, bl
);
3353 encode(last_update
, bl
);
3354 encode(last_complete
, bl
);
3355 encode(log_tail
, bl
);
3356 if (last_backfill_bitwise
&& !last_backfill
.is_max()) {
3357 encode(hobject_t(), bl
);
3359 encode(last_backfill
, bl
);
3363 encode(purged_snaps
, bl
);
3364 encode(last_epoch_started
, bl
);
3365 encode(last_user_version
, bl
);
3366 encode(hit_set
, bl
);
3367 encode(pgid
.shard
, bl
);
3368 encode(last_backfill
, bl
);
3369 encode(last_backfill_bitwise
, bl
);
3370 encode(last_interval_started
, bl
);
3374 void pg_info_t::decode(bufferlist::const_iterator
&bl
)
3376 DECODE_START(32, bl
);
3377 decode(pgid
.pgid
, bl
);
3378 decode(last_update
, bl
);
3379 decode(last_complete
, bl
);
3380 decode(log_tail
, bl
);
3382 hobject_t old_last_backfill
;
3383 decode(old_last_backfill
, bl
);
3387 decode(purged_snaps
, bl
);
3388 decode(last_epoch_started
, bl
);
3389 decode(last_user_version
, bl
);
3390 decode(hit_set
, bl
);
3391 decode(pgid
.shard
, bl
);
3392 decode(last_backfill
, bl
);
3393 decode(last_backfill_bitwise
, bl
);
3394 if (struct_v
>= 32) {
3395 decode(last_interval_started
, bl
);
3397 last_interval_started
= last_epoch_started
;
3404 void pg_info_t::dump(Formatter
*f
) const
3406 f
->dump_stream("pgid") << pgid
;
3407 f
->dump_stream("last_update") << last_update
;
3408 f
->dump_stream("last_complete") << last_complete
;
3409 f
->dump_stream("log_tail") << log_tail
;
3410 f
->dump_int("last_user_version", last_user_version
);
3411 f
->dump_stream("last_backfill") << last_backfill
;
3412 f
->dump_int("last_backfill_bitwise", (int)last_backfill_bitwise
);
3413 f
->open_array_section("purged_snaps");
3414 for (interval_set
<snapid_t
>::const_iterator i
=purged_snaps
.begin();
3415 i
!= purged_snaps
.end();
3417 f
->open_object_section("purged_snap_interval");
3418 f
->dump_stream("start") << i
.get_start();
3419 f
->dump_stream("length") << i
.get_len();
3423 f
->open_object_section("history");
3426 f
->open_object_section("stats");
3430 f
->dump_int("empty", is_empty());
3431 f
->dump_int("dne", dne());
3432 f
->dump_int("incomplete", is_incomplete());
3433 f
->dump_int("last_epoch_started", last_epoch_started
);
3435 f
->open_object_section("hit_set_history");
3440 void pg_info_t::generate_test_instances(list
<pg_info_t
*>& o
)
3442 o
.push_back(new pg_info_t
);
3443 o
.push_back(new pg_info_t
);
3444 list
<pg_history_t
*> h
;
3445 pg_history_t::generate_test_instances(h
);
3446 o
.back()->history
= *h
.back();
3447 o
.back()->pgid
= spg_t(pg_t(1, 2), shard_id_t::NO_SHARD
);
3448 o
.back()->last_update
= eversion_t(3, 4);
3449 o
.back()->last_complete
= eversion_t(5, 6);
3450 o
.back()->last_user_version
= 2;
3451 o
.back()->log_tail
= eversion_t(7, 8);
3452 o
.back()->last_backfill
= hobject_t(object_t("objname"), "key", 123, 456, -1, "");
3453 o
.back()->last_backfill_bitwise
= true;
3456 pg_stat_t::generate_test_instances(s
);
3457 o
.back()->stats
= *s
.back();
3460 list
<pg_hit_set_history_t
*> s
;
3461 pg_hit_set_history_t::generate_test_instances(s
);
3462 o
.back()->hit_set
= *s
.back();
3466 // -- pg_notify_t --
3467 void pg_notify_t::encode(bufferlist
&bl
) const
3469 ENCODE_START(2, 2, bl
);
3470 encode(query_epoch
, bl
);
3471 encode(epoch_sent
, bl
);
3478 void pg_notify_t::decode(bufferlist::const_iterator
&bl
)
3480 DECODE_START(2, bl
);
3481 decode(query_epoch
, bl
);
3482 decode(epoch_sent
, bl
);
3489 void pg_notify_t::dump(Formatter
*f
) const
3491 f
->dump_int("from", from
);
3492 f
->dump_int("to", to
);
3493 f
->dump_unsigned("query_epoch", query_epoch
);
3494 f
->dump_unsigned("epoch_sent", epoch_sent
);
3496 f
->open_object_section("info");
3502 void pg_notify_t::generate_test_instances(list
<pg_notify_t
*>& o
)
3504 o
.push_back(new pg_notify_t(shard_id_t(3), shard_id_t::NO_SHARD
, 1, 1, pg_info_t()));
3505 o
.push_back(new pg_notify_t(shard_id_t(0), shard_id_t(0), 3, 10, pg_info_t()));
3508 ostream
&operator<<(ostream
&lhs
, const pg_notify_t
¬ify
)
3510 lhs
<< "(query:" << notify
.query_epoch
3511 << " sent:" << notify
.epoch_sent
3512 << " " << notify
.info
;
3513 if (notify
.from
!= shard_id_t::NO_SHARD
||
3514 notify
.to
!= shard_id_t::NO_SHARD
)
3515 lhs
<< " " << (unsigned)notify
.from
3516 << "->" << (unsigned)notify
.to
;
3520 // -- pg_interval_t --
3522 void PastIntervals::pg_interval_t::encode(bufferlist
& bl
) const
3524 ENCODE_START(4, 2, bl
);
3529 encode(maybe_went_rw
, bl
);
3530 encode(primary
, bl
);
3531 encode(up_primary
, bl
);
3535 void PastIntervals::pg_interval_t::decode(bufferlist::const_iterator
& bl
)
3537 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl
);
3542 decode(maybe_went_rw
, bl
);
3543 if (struct_v
>= 3) {
3544 decode(primary
, bl
);
3547 primary
= acting
[0];
3549 if (struct_v
>= 4) {
3550 decode(up_primary
, bl
);
3558 void PastIntervals::pg_interval_t::dump(Formatter
*f
) const
3560 f
->dump_unsigned("first", first
);
3561 f
->dump_unsigned("last", last
);
3562 f
->dump_int("maybe_went_rw", maybe_went_rw
? 1 : 0);
3563 f
->open_array_section("up");
3564 for (vector
<int>::const_iterator p
= up
.begin(); p
!= up
.end(); ++p
)
3565 f
->dump_int("osd", *p
);
3567 f
->open_array_section("acting");
3568 for (vector
<int>::const_iterator p
= acting
.begin(); p
!= acting
.end(); ++p
)
3569 f
->dump_int("osd", *p
);
3571 f
->dump_int("primary", primary
);
3572 f
->dump_int("up_primary", up_primary
);
3575 void PastIntervals::pg_interval_t::generate_test_instances(list
<pg_interval_t
*>& o
)
3577 o
.push_back(new pg_interval_t
);
3578 o
.push_back(new pg_interval_t
);
3579 o
.back()->up
.push_back(1);
3580 o
.back()->acting
.push_back(2);
3581 o
.back()->acting
.push_back(3);
3582 o
.back()->first
= 4;
3584 o
.back()->maybe_went_rw
= true;
3587 WRITE_CLASS_ENCODER(PastIntervals::pg_interval_t
)
3593 * PastIntervals only needs to be able to answer two questions:
3594 * 1) Where should the primary look for unfound objects?
3595 * 2) List a set of subsets of the OSDs such that contacting at least
3596 * one from each subset guarantees we speak to at least one witness
3597 * of any completed write.
3599 * Crucially, 2) does not require keeping *all* past intervals. Certainly,
3600 * we don't need to keep any where maybe_went_rw would be false. We also
3601 * needn't keep two intervals where the actingset in one is a subset
3602 * of the other (only need to keep the smaller of the two sets). In order
3603 * to accurately trim the set of intervals as last_epoch_started changes
3604 * without rebuilding the set from scratch, we'll retain the larger set
3605 * if it in an older interval.
3607 struct compact_interval_t
{
3610 set
<pg_shard_t
> acting
;
3611 bool supersedes(const compact_interval_t
&other
) {
3612 for (auto &&i
: acting
) {
3613 if (!other
.acting
.count(i
))
3618 void dump(Formatter
*f
) const {
3619 f
->open_object_section("compact_interval_t");
3620 f
->dump_stream("first") << first
;
3621 f
->dump_stream("last") << last
;
3622 f
->dump_stream("acting") << acting
;
3625 void encode(bufferlist
&bl
) const {
3626 ENCODE_START(1, 1, bl
);
3632 void decode(bufferlist::const_iterator
&bl
) {
3633 DECODE_START(1, bl
);
3639 static void generate_test_instances(list
<compact_interval_t
*> & o
) {
3640 /* Not going to be used, we'll generate pi_compact_rep directly */
3643 ostream
&operator<<(ostream
&o
, const compact_interval_t
&rhs
)
3645 return o
<< "([" << rhs
.first
<< "," << rhs
.last
3646 << "] acting " << rhs
.acting
<< ")";
3648 WRITE_CLASS_ENCODER(compact_interval_t
)
3650 class pi_compact_rep
: public PastIntervals::interval_rep
{
3652 epoch_t last
= 0; // inclusive
3653 set
<pg_shard_t
> all_participants
;
3654 list
<compact_interval_t
> intervals
;
3657 std::list
<PastIntervals::pg_interval_t
> &&intervals
) {
3658 for (auto &&i
: intervals
)
3659 add_interval(ec_pool
, i
);
3662 pi_compact_rep() = default;
3663 pi_compact_rep(const pi_compact_rep
&) = default;
3664 pi_compact_rep(pi_compact_rep
&&) = default;
3665 pi_compact_rep
&operator=(const pi_compact_rep
&) = default;
3666 pi_compact_rep
&operator=(pi_compact_rep
&&) = default;
3668 size_t size() const override
{ return intervals
.size(); }
3669 bool empty() const override
{
3670 return first
> last
|| (first
== 0 && last
== 0);
3672 void clear() override
{
3673 *this = pi_compact_rep();
3675 pair
<epoch_t
, epoch_t
> get_bounds() const override
{
3676 return make_pair(first
, last
+ 1);
3678 void adjust_start_backwards(epoch_t last_epoch_clean
) {
3679 first
= last_epoch_clean
;
3682 set
<pg_shard_t
> get_all_participants(
3683 bool ec_pool
) const override
{
3684 return all_participants
;
3687 bool ec_pool
, const PastIntervals::pg_interval_t
&interval
) override
{
3689 first
= interval
.first
;
3690 ceph_assert(interval
.last
> last
);
3691 last
= interval
.last
;
3692 set
<pg_shard_t
> acting
;
3693 for (unsigned i
= 0; i
< interval
.acting
.size(); ++i
) {
3694 if (interval
.acting
[i
] == CRUSH_ITEM_NONE
)
3699 ec_pool
? shard_id_t(i
) : shard_id_t::NO_SHARD
));
3701 all_participants
.insert(acting
.begin(), acting
.end());
3702 if (!interval
.maybe_went_rw
)
3704 intervals
.push_back(
3705 compact_interval_t
{interval
.first
, interval
.last
, acting
});
3706 auto plast
= intervals
.end();
3708 for (auto cur
= intervals
.begin(); cur
!= plast
; ) {
3709 if (plast
->supersedes(*cur
)) {
3710 intervals
.erase(cur
++);
3716 unique_ptr
<PastIntervals::interval_rep
> clone() const override
{
3717 return unique_ptr
<PastIntervals::interval_rep
>(new pi_compact_rep(*this));
3719 ostream
&print(ostream
&out
) const override
{
3720 return out
<< "([" << first
<< "," << last
3721 << "] intervals=" << intervals
<< ")";
3723 void encode(bufferlist
&bl
) const override
{
3724 ENCODE_START(1, 1, bl
);
3727 encode(all_participants
, bl
);
3728 encode(intervals
, bl
);
3731 void decode(bufferlist::const_iterator
&bl
) override
{
3732 DECODE_START(1, bl
);
3735 decode(all_participants
, bl
);
3736 decode(intervals
, bl
);
3739 void dump(Formatter
*f
) const override
{
3740 f
->open_object_section("PastIntervals::compact_rep");
3741 f
->dump_stream("first") << first
;
3742 f
->dump_stream("last") << last
;
3743 f
->open_array_section("all_participants");
3744 for (auto& i
: all_participants
) {
3745 f
->dump_object("pg_shard", i
);
3748 f
->open_array_section("intervals");
3749 for (auto &&i
: intervals
) {
3755 static void generate_test_instances(list
<pi_compact_rep
*> &o
) {
3756 using ival
= PastIntervals::pg_interval_t
;
3757 using ivallst
= std::list
<ival
>;
3761 { ival
{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3762 , ival
{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3763 , ival
{{ 2}, { 2}, 31, 35, false, 2, 2}
3764 , ival
{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3769 { ival
{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3770 , ival
{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3771 , ival
{{ 2}, { 2}, 31, 35, false, 2, 2}
3772 , ival
{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3777 { ival
{{2, 1, 0}, {2, 1, 0}, 10, 20, true, 1, 1}
3778 , ival
{{ 0, 2}, { 0, 2}, 21, 30, true, 0, 0}
3779 , ival
{{ 0, 2}, {2, 0}, 31, 35, true, 2, 2}
3780 , ival
{{ 0, 2}, { 0, 2}, 36, 50, true, 0, 0}
3783 void iterate_mayberw_back_to(
3785 std::function
<void(epoch_t
, const set
<pg_shard_t
> &)> &&f
) const override
{
3786 for (auto i
= intervals
.rbegin(); i
!= intervals
.rend(); ++i
) {
3789 f(i
->first
, i
->acting
);
3792 virtual ~pi_compact_rep() override
{}
3794 WRITE_CLASS_ENCODER(pi_compact_rep
)
3796 PastIntervals::PastIntervals()
3798 past_intervals
.reset(new pi_compact_rep
);
3801 PastIntervals::PastIntervals(const PastIntervals
&rhs
)
3802 : past_intervals(rhs
.past_intervals
?
3803 rhs
.past_intervals
->clone() :
3806 PastIntervals
&PastIntervals::operator=(const PastIntervals
&rhs
)
3808 PastIntervals
other(rhs
);
3813 ostream
& operator<<(ostream
& out
, const PastIntervals
&i
)
3815 if (i
.past_intervals
) {
3816 return i
.past_intervals
->print(out
);
3818 return out
<< "(empty)";
3822 ostream
& operator<<(ostream
& out
, const PastIntervals::PriorSet
&i
)
3824 return out
<< "PriorSet("
3825 << "ec_pool: " << i
.ec_pool
3826 << ", probe: " << i
.probe
3827 << ", down: " << i
.down
3828 << ", blocked_by: " << i
.blocked_by
3829 << ", pg_down: " << i
.pg_down
3833 void PastIntervals::decode(bufferlist::const_iterator
&bl
)
3835 DECODE_START(1, bl
);
3842 ceph_abort_msg("pi_simple_rep support removed post-luminous");
3845 past_intervals
.reset(new pi_compact_rep
);
3846 past_intervals
->decode(bl
);
3852 void PastIntervals::generate_test_instances(list
<PastIntervals
*> &o
)
3855 list
<pi_compact_rep
*> compact
;
3856 pi_compact_rep::generate_test_instances(compact
);
3857 for (auto &&i
: compact
) {
3858 // takes ownership of contents
3859 o
.push_back(new PastIntervals(i
));
3865 bool PastIntervals::is_new_interval(
3866 int old_acting_primary
,
3867 int new_acting_primary
,
3868 const vector
<int> &old_acting
,
3869 const vector
<int> &new_acting
,
3872 const vector
<int> &old_up
,
3873 const vector
<int> &new_up
,
3878 unsigned old_pg_num
,
3879 unsigned new_pg_num
,
3880 unsigned old_pg_num_pending
,
3881 unsigned new_pg_num_pending
,
3882 bool old_sort_bitwise
,
3883 bool new_sort_bitwise
,
3884 bool old_recovery_deletes
,
3885 bool new_recovery_deletes
,
3887 return old_acting_primary
!= new_acting_primary
||
3888 new_acting
!= old_acting
||
3889 old_up_primary
!= new_up_primary
||
3891 old_min_size
!= new_min_size
||
3892 old_size
!= new_size
||
3893 pgid
.is_split(old_pg_num
, new_pg_num
, 0) ||
3894 // (is or was) pre-merge source
3895 pgid
.is_merge_source(old_pg_num_pending
, new_pg_num_pending
, 0) ||
3896 pgid
.is_merge_source(new_pg_num_pending
, old_pg_num_pending
, 0) ||
3898 pgid
.is_merge_source(old_pg_num
, new_pg_num
, 0) ||
3899 // (is or was) pre-merge target
3900 pgid
.is_merge_target(old_pg_num_pending
, new_pg_num_pending
) ||
3901 pgid
.is_merge_target(new_pg_num_pending
, old_pg_num_pending
) ||
3903 pgid
.is_merge_target(old_pg_num
, new_pg_num
) ||
3904 old_sort_bitwise
!= new_sort_bitwise
||
3905 old_recovery_deletes
!= new_recovery_deletes
;
3908 bool PastIntervals::is_new_interval(
3909 int old_acting_primary
,
3910 int new_acting_primary
,
3911 const vector
<int> &old_acting
,
3912 const vector
<int> &new_acting
,
3915 const vector
<int> &old_up
,
3916 const vector
<int> &new_up
,
3921 const pg_pool_t
*plast
= lastmap
->get_pg_pool(pgid
.pool());
3923 return false; // after pool is deleted there are no more interval changes
3925 const pg_pool_t
*pi
= osdmap
->get_pg_pool(pgid
.pool());
3927 return true; // pool was deleted this epoch -> (final!) interval change
3930 is_new_interval(old_acting_primary
,
3942 plast
->get_pg_num(),
3944 plast
->get_pg_num_pending(),
3945 pi
->get_pg_num_pending(),
3946 lastmap
->test_flag(CEPH_OSDMAP_SORTBITWISE
),
3947 osdmap
->test_flag(CEPH_OSDMAP_SORTBITWISE
),
3948 lastmap
->test_flag(CEPH_OSDMAP_RECOVERY_DELETES
),
3949 osdmap
->test_flag(CEPH_OSDMAP_RECOVERY_DELETES
),
3953 bool PastIntervals::check_new_interval(
3954 int old_acting_primary
,
3955 int new_acting_primary
,
3956 const vector
<int> &old_acting
,
3957 const vector
<int> &new_acting
,
3960 const vector
<int> &old_up
,
3961 const vector
<int> &new_up
,
3962 epoch_t same_interval_since
,
3963 epoch_t last_epoch_clean
,
3967 IsPGRecoverablePredicate
*could_have_gone_active
,
3968 PastIntervals
*past_intervals
,
3972 * We have to be careful to gracefully deal with situations like
3973 * so. Say we have a power outage or something that takes out both
3974 * OSDs, but the monitor doesn't mark them down in the same epoch.
3975 * The history may look like
3979 * 3: let's say B dies for good, too (say, from the power spike)
3982 * which makes it look like B may have applied updates to the PG
3983 * that we need in order to proceed. This sucks...
3985 * To minimize the risk of this happening, we CANNOT go active if
3986 * _any_ OSDs in the prior set are down until we send an MOSDAlive
3987 * to the monitor such that the OSDMap sets osd_up_thru to an epoch.
3988 * Then, we have something like
3995 * -> we can ignore B, bc it couldn't have gone active (up_thru still 0).
4005 * -> we must wait for B, bc it was alive through 2, and could have
4006 * written to the pg.
4008 * If B is really dead, then an administrator will need to manually
4009 * intervene by marking the OSD as "lost."
4012 // remember past interval
4013 // NOTE: a change in the up set primary triggers an interval
4014 // change, even though the interval members in the pg_interval_t
4016 ceph_assert(past_intervals
);
4017 ceph_assert(past_intervals
->past_intervals
);
4018 if (is_new_interval(
4031 i
.first
= same_interval_since
;
4032 i
.last
= osdmap
->get_epoch() - 1;
4033 ceph_assert(i
.first
<= i
.last
);
4034 i
.acting
= old_acting
;
4036 i
.primary
= old_acting_primary
;
4037 i
.up_primary
= old_up_primary
;
4039 unsigned num_acting
= 0;
4040 for (vector
<int>::const_iterator p
= i
.acting
.begin(); p
!= i
.acting
.end();
4042 if (*p
!= CRUSH_ITEM_NONE
)
4045 ceph_assert(lastmap
->get_pools().count(pgid
.pool()));
4046 const pg_pool_t
& old_pg_pool
= lastmap
->get_pools().find(pgid
.pool())->second
;
4047 set
<pg_shard_t
> old_acting_shards
;
4048 old_pg_pool
.convert_to_pg_shards(old_acting
, &old_acting_shards
);
4052 num_acting
>= old_pg_pool
.min_size
&&
4053 (*could_have_gone_active
)(old_acting_shards
)) {
4055 *out
<< __func__
<< " " << i
4056 << " up_thru " << lastmap
->get_up_thru(i
.primary
)
4057 << " up_from " << lastmap
->get_up_from(i
.primary
)
4058 << " last_epoch_clean " << last_epoch_clean
;
4059 if (lastmap
->get_up_thru(i
.primary
) >= i
.first
&&
4060 lastmap
->get_up_from(i
.primary
) <= i
.first
) {
4061 i
.maybe_went_rw
= true;
4064 << " : primary up " << lastmap
->get_up_from(i
.primary
)
4065 << "-" << lastmap
->get_up_thru(i
.primary
)
4066 << " includes interval"
4068 } else if (last_epoch_clean
>= i
.first
&&
4069 last_epoch_clean
<= i
.last
) {
4070 // If the last_epoch_clean is included in this interval, then
4071 // the pg must have been rw (for recovery to have completed).
4072 // This is important because we won't know the _real_
4073 // first_epoch because we stop at last_epoch_clean, and we
4074 // don't want the oldest interval to randomly have
4075 // maybe_went_rw false depending on the relative up_thru vs
4076 // last_epoch_clean timing.
4077 i
.maybe_went_rw
= true;
4080 << " : includes last_epoch_clean " << last_epoch_clean
4081 << " and presumed to have been rw"
4084 i
.maybe_went_rw
= false;
4087 << " : primary up " << lastmap
->get_up_from(i
.primary
)
4088 << "-" << lastmap
->get_up_thru(i
.primary
)
4089 << " does not include interval"
4093 i
.maybe_went_rw
= false;
4095 *out
<< __func__
<< " " << i
<< " : acting set is too small" << std::endl
;
4097 past_intervals
->past_intervals
->add_interval(old_pg_pool
.is_erasure(), i
);
4105 // true if the given map affects the prior set
4106 bool PastIntervals::PriorSet::affected_by_map(
4107 const OSDMap
&osdmap
,
4108 const DoutPrefixProvider
*dpp
) const
4110 for (set
<pg_shard_t
>::iterator p
= probe
.begin();
4115 // did someone in the prior set go down?
4116 if (osdmap
.is_down(o
) && down
.count(o
) == 0) {
4117 ldpp_dout(dpp
, 10) << "affected_by_map osd." << o
<< " now down" << dendl
;
4121 // did a down osd in cur get (re)marked as lost?
4122 map
<int, epoch_t
>::const_iterator r
= blocked_by
.find(o
);
4123 if (r
!= blocked_by
.end()) {
4124 if (!osdmap
.exists(o
)) {
4125 ldpp_dout(dpp
, 10) << "affected_by_map osd." << o
<< " no longer exists" << dendl
;
4128 if (osdmap
.get_info(o
).lost_at
!= r
->second
) {
4129 ldpp_dout(dpp
, 10) << "affected_by_map osd." << o
<< " (re)marked as lost" << dendl
;
4135 // did someone in the prior down set go up?
4136 for (set
<int>::const_iterator p
= down
.begin();
4141 if (osdmap
.is_up(o
)) {
4142 ldpp_dout(dpp
, 10) << "affected_by_map osd." << o
<< " now up" << dendl
;
4146 // did someone in the prior set get lost or destroyed?
4147 if (!osdmap
.exists(o
)) {
4148 ldpp_dout(dpp
, 10) << "affected_by_map osd." << o
<< " no longer exists" << dendl
;
4151 // did a down osd in down get (re)marked as lost?
4152 map
<int, epoch_t
>::const_iterator r
= blocked_by
.find(o
);
4153 if (r
!= blocked_by
.end()) {
4154 if (osdmap
.get_info(o
).lost_at
!= r
->second
) {
4155 ldpp_dout(dpp
, 10) << "affected_by_map osd." << o
<< " (re)marked as lost" << dendl
;
4164 ostream
& operator<<(ostream
& out
, const PastIntervals::pg_interval_t
& i
)
4166 out
<< "interval(" << i
.first
<< "-" << i
.last
4167 << " up " << i
.up
<< "(" << i
.up_primary
<< ")"
4168 << " acting " << i
.acting
<< "(" << i
.primary
<< ")";
4169 if (i
.maybe_went_rw
)
4170 out
<< " maybe_went_rw";
4179 void pg_query_t::encode(bufferlist
&bl
, uint64_t features
) const {
4180 ENCODE_START(3, 3, bl
);
4184 encode(epoch_sent
, bl
);
4190 void pg_query_t::decode(bufferlist::const_iterator
&bl
) {
4191 DECODE_START(3, bl
);
4195 decode(epoch_sent
, bl
);
4201 void pg_query_t::dump(Formatter
*f
) const
4203 f
->dump_int("from", from
);
4204 f
->dump_int("to", to
);
4205 f
->dump_string("type", get_type_name());
4206 f
->dump_stream("since") << since
;
4207 f
->dump_stream("epoch_sent") << epoch_sent
;
4208 f
->open_object_section("history");
4212 void pg_query_t::generate_test_instances(list
<pg_query_t
*>& o
)
4214 o
.push_back(new pg_query_t());
4215 list
<pg_history_t
*> h
;
4216 pg_history_t::generate_test_instances(h
);
4217 o
.push_back(new pg_query_t(pg_query_t::INFO
, shard_id_t(1), shard_id_t(2), *h
.back(), 4));
4218 o
.push_back(new pg_query_t(pg_query_t::MISSING
, shard_id_t(2), shard_id_t(3), *h
.back(), 4));
4219 o
.push_back(new pg_query_t(pg_query_t::LOG
, shard_id_t(0), shard_id_t(0),
4220 eversion_t(4, 5), *h
.back(), 4));
4221 o
.push_back(new pg_query_t(pg_query_t::FULLLOG
,
4222 shard_id_t::NO_SHARD
, shard_id_t::NO_SHARD
,
4226 // -- ObjectModDesc --
4227 void ObjectModDesc::visit(Visitor
*visitor
) const
4229 auto bp
= bl
.cbegin();
4232 DECODE_START(max_required_version
, bp
);
4239 visitor
->append(size
);
4243 map
<string
, boost::optional
<bufferlist
> > attrs
;
4245 visitor
->setattrs(attrs
);
4249 version_t old_version
;
4250 decode(old_version
, bp
);
4251 visitor
->rmobject(old_version
);
4258 case UPDATE_SNAPS
: {
4259 set
<snapid_t
> snaps
;
4261 visitor
->update_snaps(snaps
);
4265 version_t old_version
;
4266 decode(old_version
, bp
);
4267 visitor
->try_rmobject(old_version
);
4270 case ROLLBACK_EXTENTS
: {
4271 vector
<pair
<uint64_t, uint64_t> > extents
;
4274 decode(extents
, bp
);
4275 visitor
->rollback_extents(gen
,extents
);
4279 ceph_abort_msg("Invalid rollback code");
4284 ceph_abort_msg("Invalid encoding");
4288 struct DumpVisitor
: public ObjectModDesc::Visitor
{
4290 explicit DumpVisitor(Formatter
*f
) : f(f
) {}
4291 void append(uint64_t old_size
) override
{
4292 f
->open_object_section("op");
4293 f
->dump_string("code", "APPEND");
4294 f
->dump_unsigned("old_size", old_size
);
4297 void setattrs(map
<string
, boost::optional
<bufferlist
> > &attrs
) override
{
4298 f
->open_object_section("op");
4299 f
->dump_string("code", "SETATTRS");
4300 f
->open_array_section("attrs");
4301 for (map
<string
, boost::optional
<bufferlist
> >::iterator i
= attrs
.begin();
4304 f
->dump_string("attr_name", i
->first
);
4309 void rmobject(version_t old_version
) override
{
4310 f
->open_object_section("op");
4311 f
->dump_string("code", "RMOBJECT");
4312 f
->dump_unsigned("old_version", old_version
);
4315 void try_rmobject(version_t old_version
) override
{
4316 f
->open_object_section("op");
4317 f
->dump_string("code", "TRY_RMOBJECT");
4318 f
->dump_unsigned("old_version", old_version
);
4321 void create() override
{
4322 f
->open_object_section("op");
4323 f
->dump_string("code", "CREATE");
4326 void update_snaps(const set
<snapid_t
> &snaps
) override
{
4327 f
->open_object_section("op");
4328 f
->dump_string("code", "UPDATE_SNAPS");
4329 f
->dump_stream("snaps") << snaps
;
4332 void rollback_extents(
4334 const vector
<pair
<uint64_t, uint64_t> > &extents
) override
{
4335 f
->open_object_section("op");
4336 f
->dump_string("code", "ROLLBACK_EXTENTS");
4337 f
->dump_unsigned("gen", gen
);
4338 f
->dump_stream("snaps") << extents
;
4343 void ObjectModDesc::dump(Formatter
*f
) const
4345 f
->open_object_section("object_mod_desc");
4346 f
->dump_bool("can_local_rollback", can_local_rollback
);
4347 f
->dump_bool("rollback_info_completed", rollback_info_completed
);
4349 f
->open_array_section("ops");
4357 void ObjectModDesc::generate_test_instances(list
<ObjectModDesc
*>& o
)
4359 map
<string
, boost::optional
<bufferlist
> > attrs
;
4363 o
.push_back(new ObjectModDesc());
4364 o
.back()->append(100);
4365 o
.back()->setattrs(attrs
);
4366 o
.push_back(new ObjectModDesc());
4367 o
.back()->rmobject(1001);
4368 o
.push_back(new ObjectModDesc());
4370 o
.back()->setattrs(attrs
);
4371 o
.push_back(new ObjectModDesc());
4373 o
.back()->setattrs(attrs
);
4374 o
.back()->mark_unrollbackable();
4375 o
.back()->append(1000);
4378 void ObjectModDesc::encode(bufferlist
&_bl
) const
4380 ENCODE_START(max_required_version
, max_required_version
, _bl
);
4381 encode(can_local_rollback
, _bl
);
4382 encode(rollback_info_completed
, _bl
);
4386 void ObjectModDesc::decode(bufferlist::const_iterator
&_bl
)
4388 DECODE_START(2, _bl
);
4389 max_required_version
= struct_v
;
4390 decode(can_local_rollback
, _bl
);
4391 decode(rollback_info_completed
, _bl
);
4393 // ensure bl does not pin a larger buffer in memory
4395 bl
.reassign_to_mempool(mempool::mempool_osd_pglog
);
4399 // -- pg_log_entry_t --
4401 string
pg_log_entry_t::get_key_name() const
4403 return version
.get_key_name();
4406 void pg_log_entry_t::encode_with_checksum(bufferlist
& bl
) const
4409 bufferlist
ebl(sizeof(*this)*2);
4411 __u32 crc
= ebl
.crc32c(0);
4416 void pg_log_entry_t::decode_with_checksum(bufferlist::const_iterator
& p
)
4423 if (crc
!= bl
.crc32c(0))
4424 throw buffer::malformed_input("bad checksum on pg_log_entry_t");
4425 auto q
= bl
.cbegin();
4429 void pg_log_entry_t::encode(bufferlist
&bl
) const
4431 ENCODE_START(12, 4, bl
);
4434 encode(version
, bl
);
4437 * Added with reverting_to:
4438 * Previous code used prior_version to encode
4439 * what we now call reverting_to. This will
4440 * allow older code to decode reverting_to
4441 * into prior_version as expected.
4443 if (op
== LOST_REVERT
)
4444 encode(reverting_to
, bl
);
4446 encode(prior_version
, bl
);
4450 if (op
== LOST_REVERT
)
4451 encode(prior_version
, bl
);
4453 encode(user_version
, bl
);
4454 encode(mod_desc
, bl
);
4455 encode(extra_reqids
, bl
);
4457 encode(return_code
, bl
);
4458 if (!extra_reqids
.empty())
4459 encode(extra_reqid_return_codes
, bl
);
4463 void pg_log_entry_t::decode(bufferlist::const_iterator
&bl
)
4465 DECODE_START_LEGACY_COMPAT_LEN(12, 4, 4, bl
);
4469 decode(old_soid
, bl
);
4470 soid
.oid
= old_soid
.oid
;
4471 soid
.snap
= old_soid
.snap
;
4472 invalid_hash
= true;
4477 invalid_hash
= true;
4478 decode(version
, bl
);
4480 if (struct_v
>= 6 && op
== LOST_REVERT
)
4481 decode(reverting_to
, bl
);
4483 decode(prior_version
, bl
);
4489 invalid_pool
= true;
4491 if (op
== LOST_REVERT
) {
4492 if (struct_v
>= 6) {
4493 decode(prior_version
, bl
);
4495 reverting_to
= prior_version
;
4498 if (struct_v
>= 7 || // for v >= 7, this is for all ops.
4499 op
== CLONE
) { // for v < 7, it's only present for CLONE.
4501 // ensure snaps does not pin a larger buffer in memory
4503 snaps
.reassign_to_mempool(mempool::mempool_osd_pglog
);
4507 decode(user_version
, bl
);
4509 user_version
= version
.version
;
4512 decode(mod_desc
, bl
);
4514 mod_desc
.mark_unrollbackable();
4516 decode(extra_reqids
, bl
);
4517 if (struct_v
>= 11 && op
== ERROR
)
4518 decode(return_code
, bl
);
4519 if (struct_v
>= 12 && !extra_reqids
.empty())
4520 decode(extra_reqid_return_codes
, bl
);
4524 void pg_log_entry_t::dump(Formatter
*f
) const
4526 f
->dump_string("op", get_op_name());
4527 f
->dump_stream("object") << soid
;
4528 f
->dump_stream("version") << version
;
4529 f
->dump_stream("prior_version") << prior_version
;
4530 f
->dump_stream("reqid") << reqid
;
4531 f
->open_array_section("extra_reqids");
4533 for (auto p
= extra_reqids
.begin();
4534 p
!= extra_reqids
.end();
4536 f
->open_object_section("extra_reqid");
4537 f
->dump_stream("reqid") << p
->first
;
4538 f
->dump_stream("user_version") << p
->second
;
4539 auto it
= extra_reqid_return_codes
.find(idx
);
4540 if (it
!= extra_reqid_return_codes
.end()) {
4541 f
->dump_int("return_code", it
->second
);
4546 f
->dump_stream("mtime") << mtime
;
4547 f
->dump_int("return_code", return_code
);
4548 if (snaps
.length() > 0) {
4550 bufferlist c
= snaps
;
4551 auto p
= c
.cbegin();
4558 f
->open_object_section("snaps");
4559 for (vector
<snapid_t
>::iterator p
= v
.begin(); p
!= v
.end(); ++p
)
4560 f
->dump_unsigned("snap", *p
);
4564 f
->open_object_section("mod_desc");
4570 void pg_log_entry_t::generate_test_instances(list
<pg_log_entry_t
*>& o
)
4572 o
.push_back(new pg_log_entry_t());
4573 hobject_t
oid(object_t("objname"), "key", 123, 456, 0, "");
4574 o
.push_back(new pg_log_entry_t(MODIFY
, oid
, eversion_t(1,2), eversion_t(3,4),
4575 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4577 o
.push_back(new pg_log_entry_t(ERROR
, oid
, eversion_t(1,2), eversion_t(3,4),
4578 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4579 utime_t(8,9), -ENOENT
));
4582 ostream
& operator<<(ostream
& out
, const pg_log_entry_t
& e
)
4584 out
<< e
.version
<< " (" << e
.prior_version
<< ") "
4585 << std::left
<< std::setw(8) << e
.get_op_name() << ' '
4586 << e
.soid
<< " by " << e
.reqid
<< " " << e
.mtime
4587 << " " << e
.return_code
;
4588 if (e
.snaps
.length()) {
4589 vector
<snapid_t
> snaps
;
4590 bufferlist c
= e
.snaps
;
4591 auto p
= c
.cbegin();
4597 out
<< " snaps " << snaps
;
4602 // -- pg_log_dup_t --
4604 std::string
pg_log_dup_t::get_key_name() const
4606 static const char prefix
[] = "dup_";
4607 std::string
key(36, ' ');
4608 memcpy(&key
[0], prefix
, 4);
4609 version
.get_key_name(&key
[4]);
4610 key
.resize(35); // remove the null terminator
4614 void pg_log_dup_t::encode(bufferlist
&bl
) const
4616 ENCODE_START(1, 1, bl
);
4618 encode(version
, bl
);
4619 encode(user_version
, bl
);
4620 encode(return_code
, bl
);
4624 void pg_log_dup_t::decode(bufferlist::const_iterator
&bl
)
4626 DECODE_START(1, bl
);
4628 decode(version
, bl
);
4629 decode(user_version
, bl
);
4630 decode(return_code
, bl
);
4634 void pg_log_dup_t::dump(Formatter
*f
) const
4636 f
->dump_stream("reqid") << reqid
;
4637 f
->dump_stream("version") << version
;
4638 f
->dump_stream("user_version") << user_version
;
4639 f
->dump_stream("return_code") << return_code
;
4642 void pg_log_dup_t::generate_test_instances(list
<pg_log_dup_t
*>& o
)
4644 o
.push_back(new pg_log_dup_t());
4645 o
.push_back(new pg_log_dup_t(eversion_t(1,2),
4647 osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4649 o
.push_back(new pg_log_dup_t(eversion_t(1,2),
4651 osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4656 std::ostream
& operator<<(std::ostream
& out
, const pg_log_dup_t
& e
) {
4657 return out
<< "log_dup(reqid=" << e
.reqid
<<
4658 " v=" << e
.version
<< " uv=" << e
.user_version
<<
4659 " rc=" << e
.return_code
<< ")";
4665 // out: pg_log_t that only has entries that apply to import_pgid using curmap
4666 // reject: Entries rejected from "in" are in the reject.log. Other fields not set.
4667 void pg_log_t::filter_log(spg_t import_pgid
, const OSDMap
&curmap
,
4668 const string
&hit_set_namespace
, const pg_log_t
&in
,
4669 pg_log_t
&out
, pg_log_t
&reject
)
4675 for (list
<pg_log_entry_t
>::const_iterator i
= in
.log
.begin();
4676 i
!= in
.log
.end(); ++i
) {
4678 // Reject pg log entries for temporary objects
4679 if (i
->soid
.is_temp()) {
4680 reject
.log
.push_back(*i
);
4684 if (i
->soid
.nspace
!= hit_set_namespace
) {
4685 object_t oid
= i
->soid
.oid
;
4686 object_locator_t
loc(i
->soid
);
4687 pg_t raw_pgid
= curmap
.object_locator_to_pg(oid
, loc
);
4688 pg_t pgid
= curmap
.raw_pg_to_pg(raw_pgid
);
4690 if (import_pgid
.pgid
== pgid
) {
4691 out
.log
.push_back(*i
);
4693 reject
.log
.push_back(*i
);
4696 out
.log
.push_back(*i
);
4701 void pg_log_t::encode(bufferlist
& bl
) const
4703 ENCODE_START(7, 3, bl
);
4707 encode(can_rollback_to
, bl
);
4708 encode(rollback_info_trimmed_to
, bl
);
4713 void pg_log_t::decode(bufferlist::const_iterator
&bl
, int64_t pool
)
4715 DECODE_START_LEGACY_COMPAT_LEN(7, 3, 3, bl
);
4720 decode(backlog
, bl
);
4724 decode(can_rollback_to
, bl
);
4727 decode(rollback_info_trimmed_to
, bl
);
4729 rollback_info_trimmed_to
= tail
;
4736 // handle hobject_t format change
4738 for (list
<pg_log_entry_t
>::iterator i
= log
.begin();
4741 if (!i
->soid
.is_max() && i
->soid
.pool
== -1)
4742 i
->soid
.pool
= pool
;
4747 void pg_log_t::dump(Formatter
*f
) const
4749 f
->dump_stream("head") << head
;
4750 f
->dump_stream("tail") << tail
;
4751 f
->open_array_section("log");
4752 for (list
<pg_log_entry_t
>::const_iterator p
= log
.begin(); p
!= log
.end(); ++p
) {
4753 f
->open_object_section("entry");
4758 f
->open_array_section("dups");
4759 for (const auto& entry
: dups
) {
4760 f
->open_object_section("entry");
4767 void pg_log_t::generate_test_instances(list
<pg_log_t
*>& o
)
4769 o
.push_back(new pg_log_t
);
4771 // this is nonsensical:
4772 o
.push_back(new pg_log_t
);
4773 o
.back()->head
= eversion_t(1,2);
4774 o
.back()->tail
= eversion_t(3,4);
4775 list
<pg_log_entry_t
*> e
;
4776 pg_log_entry_t::generate_test_instances(e
);
4777 for (list
<pg_log_entry_t
*>::iterator p
= e
.begin(); p
!= e
.end(); ++p
)
4778 o
.back()->log
.push_back(**p
);
4781 static void _handle_dups(CephContext
* cct
, pg_log_t
&target
, const pg_log_t
&other
, unsigned maxdups
)
4783 auto earliest_dup_version
=
4784 target
.head
.version
< maxdups
? 0u : target
.head
.version
- maxdups
+ 1;
4785 lgeneric_subdout(cct
, osd
, 20) << "copy_up_to/copy_after earliest_dup_version " << earliest_dup_version
<< dendl
;
4787 for (auto d
= other
.dups
.cbegin(); d
!= other
.dups
.cend(); ++d
) {
4788 if (d
->version
.version
>= earliest_dup_version
) {
4789 lgeneric_subdout(cct
, osd
, 20)
4790 << "copy_up_to/copy_after copy dup version "
4791 << d
->version
<< dendl
;
4792 target
.dups
.push_back(pg_log_dup_t(*d
));
4796 for (auto i
= other
.log
.cbegin(); i
!= other
.log
.cend(); ++i
) {
4797 ceph_assert(i
->version
> other
.tail
);
4798 if (i
->version
> target
.tail
)
4800 if (i
->version
.version
>= earliest_dup_version
) {
4801 lgeneric_subdout(cct
, osd
, 20)
4802 << "copy_up_to/copy_after copy dup from log version "
4803 << i
->version
<< dendl
;
4804 target
.dups
.push_back(pg_log_dup_t(*i
));
4810 void pg_log_t::copy_after(CephContext
* cct
, const pg_log_t
&other
, eversion_t v
)
4812 can_rollback_to
= other
.can_rollback_to
;
4815 lgeneric_subdout(cct
, osd
, 20) << __func__
<< " v " << v
<< dendl
;
4816 for (list
<pg_log_entry_t
>::const_reverse_iterator i
= other
.log
.rbegin();
4817 i
!= other
.log
.rend();
4819 ceph_assert(i
->version
> other
.tail
);
4820 if (i
->version
<= v
) {
4821 // make tail accurate.
4825 lgeneric_subdout(cct
, osd
, 20) << __func__
<< " copy log version " << i
->version
<< dendl
;
4828 _handle_dups(cct
, *this, other
, cct
->_conf
->osd_pg_log_dups_tracked
);
4831 void pg_log_t::copy_up_to(CephContext
* cct
, const pg_log_t
&other
, int max
)
4833 can_rollback_to
= other
.can_rollback_to
;
4837 lgeneric_subdout(cct
, osd
, 20) << __func__
<< " max " << max
<< dendl
;
4838 for (list
<pg_log_entry_t
>::const_reverse_iterator i
= other
.log
.rbegin();
4839 i
!= other
.log
.rend();
4841 ceph_assert(i
->version
> other
.tail
);
4846 lgeneric_subdout(cct
, osd
, 20) << __func__
<< " copy log version " << i
->version
<< dendl
;
4849 _handle_dups(cct
, *this, other
, cct
->_conf
->osd_pg_log_dups_tracked
);
4852 ostream
& pg_log_t::print(ostream
& out
) const
4854 out
<< *this << std::endl
;
4855 for (list
<pg_log_entry_t
>::const_iterator p
= log
.begin();
4858 out
<< *p
<< std::endl
;
4859 for (const auto& entry
: dups
) {
4860 out
<< " dup entry: " << entry
<< std::endl
;
4865 // -- pg_missing_t --
4867 ostream
& operator<<(ostream
& out
, const pg_missing_item
& i
)
4870 if (i
.have
!= eversion_t())
4871 out
<< "(" << i
.have
<< ")";
4872 out
<< " flags = " << i
.flag_str();
4876 // -- object_copy_cursor_t --
4878 void object_copy_cursor_t::encode(bufferlist
& bl
) const
4880 ENCODE_START(1, 1, bl
);
4881 encode(attr_complete
, bl
);
4882 encode(data_offset
, bl
);
4883 encode(data_complete
, bl
);
4884 encode(omap_offset
, bl
);
4885 encode(omap_complete
, bl
);
4889 void object_copy_cursor_t::decode(bufferlist::const_iterator
&bl
)
4891 DECODE_START(1, bl
);
4892 decode(attr_complete
, bl
);
4893 decode(data_offset
, bl
);
4894 decode(data_complete
, bl
);
4895 decode(omap_offset
, bl
);
4896 decode(omap_complete
, bl
);
4900 void object_copy_cursor_t::dump(Formatter
*f
) const
4902 f
->dump_unsigned("attr_complete", (int)attr_complete
);
4903 f
->dump_unsigned("data_offset", data_offset
);
4904 f
->dump_unsigned("data_complete", (int)data_complete
);
4905 f
->dump_string("omap_offset", omap_offset
);
4906 f
->dump_unsigned("omap_complete", (int)omap_complete
);
4909 void object_copy_cursor_t::generate_test_instances(list
<object_copy_cursor_t
*>& o
)
4911 o
.push_back(new object_copy_cursor_t
);
4912 o
.push_back(new object_copy_cursor_t
);
4913 o
.back()->attr_complete
= true;
4914 o
.back()->data_offset
= 123;
4915 o
.push_back(new object_copy_cursor_t
);
4916 o
.back()->attr_complete
= true;
4917 o
.back()->data_complete
= true;
4918 o
.back()->omap_offset
= "foo";
4919 o
.push_back(new object_copy_cursor_t
);
4920 o
.back()->attr_complete
= true;
4921 o
.back()->data_complete
= true;
4922 o
.back()->omap_complete
= true;
4925 // -- object_copy_data_t --
4927 void object_copy_data_t::encode(bufferlist
& bl
, uint64_t features
) const
4929 ENCODE_START(8, 5, bl
);
4934 encode(omap_data
, bl
);
4936 encode(omap_header
, bl
);
4938 encode(snap_seq
, bl
);
4940 encode(data_digest
, bl
);
4941 encode(omap_digest
, bl
);
4943 encode(truncate_seq
, bl
);
4944 encode(truncate_size
, bl
);
4945 encode(reqid_return_codes
, bl
);
4949 void object_copy_data_t::decode(bufferlist::const_iterator
& bl
)
4951 DECODE_START(7, bl
);
4958 decode(category
, bl
); // no longer used
4963 map
<string
,bufferlist
> omap
;
4966 if (!omap
.empty()) {
4968 encode(omap
, omap_data
);
4973 decode(omap_header
, bl
);
4974 if (struct_v
>= 3) {
4976 decode(snap_seq
, bl
);
4981 if (struct_v
>= 4) {
4983 decode(data_digest
, bl
);
4984 decode(omap_digest
, bl
);
4992 decode(omap_data
, bl
);
4994 decode(omap_header
, bl
);
4996 decode(snap_seq
, bl
);
4997 if (struct_v
>= 4) {
4999 decode(data_digest
, bl
);
5000 decode(omap_digest
, bl
);
5002 if (struct_v
>= 6) {
5005 if (struct_v
>= 7) {
5006 decode(truncate_seq
, bl
);
5007 decode(truncate_size
, bl
);
5009 if (struct_v
>= 8) {
5010 decode(reqid_return_codes
, bl
);
5016 void object_copy_data_t::generate_test_instances(list
<object_copy_data_t
*>& o
)
5018 o
.push_back(new object_copy_data_t());
5020 list
<object_copy_cursor_t
*> cursors
;
5021 object_copy_cursor_t::generate_test_instances(cursors
);
5022 list
<object_copy_cursor_t
*>::iterator ci
= cursors
.begin();
5023 o
.back()->cursor
= **(ci
++);
5025 o
.push_back(new object_copy_data_t());
5026 o
.back()->cursor
= **(ci
++);
5028 o
.push_back(new object_copy_data_t());
5029 o
.back()->size
= 1234;
5030 o
.back()->mtime
.set_from_double(1234);
5031 bufferptr
bp("there", 5);
5034 o
.back()->attrs
["hello"] = bl
;
5035 bufferptr
bp2("not", 3);
5038 map
<string
,bufferlist
> omap
;
5041 encode(omap
, o
.back()->omap_data
);
5042 bufferptr
databp("iamsomedatatocontain", 20);
5043 o
.back()->data
.push_back(databp
);
5044 o
.back()->omap_header
.append("this is an omap header");
5045 o
.back()->snaps
.push_back(123);
5046 o
.back()->reqids
.push_back(make_pair(osd_reqid_t(), version_t()));
5049 void object_copy_data_t::dump(Formatter
*f
) const
5051 f
->open_object_section("cursor");
5053 f
->close_section(); // cursor
5054 f
->dump_int("size", size
);
5055 f
->dump_stream("mtime") << mtime
;
5056 /* we should really print out the attrs here, but bufferlist
5057 const-correctness prevents that */
5058 f
->dump_int("attrs_size", attrs
.size());
5059 f
->dump_int("flags", flags
);
5060 f
->dump_unsigned("data_digest", data_digest
);
5061 f
->dump_unsigned("omap_digest", omap_digest
);
5062 f
->dump_int("omap_data_length", omap_data
.length());
5063 f
->dump_int("omap_header_length", omap_header
.length());
5064 f
->dump_int("data_length", data
.length());
5065 f
->open_array_section("snaps");
5066 for (vector
<snapid_t
>::const_iterator p
= snaps
.begin();
5067 p
!= snaps
.end(); ++p
)
5068 f
->dump_unsigned("snap", *p
);
5070 f
->open_array_section("reqids");
5072 for (auto p
= reqids
.begin();
5075 f
->open_object_section("extra_reqid");
5076 f
->dump_stream("reqid") << p
->first
;
5077 f
->dump_stream("user_version") << p
->second
;
5078 auto it
= reqid_return_codes
.find(idx
);
5079 if (it
!= reqid_return_codes
.end()) {
5080 f
->dump_int("return_code", it
->second
);
5087 // -- pg_create_t --
5089 void pg_create_t::encode(bufferlist
&bl
) const
5091 ENCODE_START(1, 1, bl
);
5092 encode(created
, bl
);
5094 encode(split_bits
, bl
);
5098 void pg_create_t::decode(bufferlist::const_iterator
&bl
)
5100 DECODE_START(1, bl
);
5101 decode(created
, bl
);
5103 decode(split_bits
, bl
);
5107 void pg_create_t::dump(Formatter
*f
) const
5109 f
->dump_unsigned("created", created
);
5110 f
->dump_stream("parent") << parent
;
5111 f
->dump_int("split_bits", split_bits
);
5114 void pg_create_t::generate_test_instances(list
<pg_create_t
*>& o
)
5116 o
.push_back(new pg_create_t
);
5117 o
.push_back(new pg_create_t(1, pg_t(3, 4), 2));
5121 // -- pg_hit_set_info_t --
5123 void pg_hit_set_info_t::encode(bufferlist
& bl
) const
5125 ENCODE_START(2, 1, bl
);
5128 encode(version
, bl
);
5129 encode(using_gmt
, bl
);
5133 void pg_hit_set_info_t::decode(bufferlist::const_iterator
& p
)
5139 if (struct_v
>= 2) {
5140 decode(using_gmt
, p
);
5147 void pg_hit_set_info_t::dump(Formatter
*f
) const
5149 f
->dump_stream("begin") << begin
;
5150 f
->dump_stream("end") << end
;
5151 f
->dump_stream("version") << version
;
5152 f
->dump_stream("using_gmt") << using_gmt
;
5155 void pg_hit_set_info_t::generate_test_instances(list
<pg_hit_set_info_t
*>& ls
)
5157 ls
.push_back(new pg_hit_set_info_t
);
5158 ls
.push_back(new pg_hit_set_info_t
);
5159 ls
.back()->begin
= utime_t(1, 2);
5160 ls
.back()->end
= utime_t(3, 4);
5164 // -- pg_hit_set_history_t --
5166 void pg_hit_set_history_t::encode(bufferlist
& bl
) const
5168 ENCODE_START(1, 1, bl
);
5169 encode(current_last_update
, bl
);
5171 utime_t dummy_stamp
;
5172 encode(dummy_stamp
, bl
);
5175 pg_hit_set_info_t dummy_info
;
5176 encode(dummy_info
, bl
);
5178 encode(history
, bl
);
5182 void pg_hit_set_history_t::decode(bufferlist::const_iterator
& p
)
5185 decode(current_last_update
, p
);
5187 utime_t dummy_stamp
;
5188 decode(dummy_stamp
, p
);
5191 pg_hit_set_info_t dummy_info
;
5192 decode(dummy_info
, p
);
5198 void pg_hit_set_history_t::dump(Formatter
*f
) const
5200 f
->dump_stream("current_last_update") << current_last_update
;
5201 f
->open_array_section("history");
5202 for (list
<pg_hit_set_info_t
>::const_iterator p
= history
.begin();
5203 p
!= history
.end(); ++p
) {
5204 f
->open_object_section("info");
5211 void pg_hit_set_history_t::generate_test_instances(list
<pg_hit_set_history_t
*>& ls
)
5213 ls
.push_back(new pg_hit_set_history_t
);
5214 ls
.push_back(new pg_hit_set_history_t
);
5215 ls
.back()->current_last_update
= eversion_t(1, 2);
5216 ls
.back()->history
.push_back(pg_hit_set_info_t());
5219 // -- OSDSuperblock --
5221 void OSDSuperblock::encode(bufferlist
&bl
) const
5223 ENCODE_START(8, 5, bl
);
5224 encode(cluster_fsid
, bl
);
5226 encode(current_epoch
, bl
);
5227 encode(oldest_map
, bl
);
5228 encode(newest_map
, bl
);
5230 compat_features
.encode(bl
);
5231 encode(clean_thru
, bl
);
5232 encode(mounted
, bl
);
5233 encode(osd_fsid
, bl
);
5234 encode((epoch_t
)0, bl
); // epoch_t last_epoch_marked_full
5235 encode((uint32_t)0, bl
); // map<int64_t,epoch_t> pool_last_epoch_marked_full
5239 void OSDSuperblock::decode(bufferlist::const_iterator
&bl
)
5241 DECODE_START_LEGACY_COMPAT_LEN(8, 5, 5, bl
);
5246 decode(cluster_fsid
, bl
);
5248 decode(current_epoch
, bl
);
5249 decode(oldest_map
, bl
);
5250 decode(newest_map
, bl
);
5252 if (struct_v
>= 2) {
5253 compat_features
.decode(bl
);
5254 } else { //upgrade it!
5255 compat_features
.incompat
.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE
);
5257 decode(clean_thru
, bl
);
5258 decode(mounted
, bl
);
5260 decode(osd_fsid
, bl
);
5261 if (struct_v
>= 6) {
5262 epoch_t last_map_marked_full
;
5263 decode(last_map_marked_full
, bl
);
5265 if (struct_v
>= 7) {
5266 map
<int64_t,epoch_t
> pool_last_map_marked_full
;
5267 decode(pool_last_map_marked_full
, bl
);
5272 void OSDSuperblock::dump(Formatter
*f
) const
5274 f
->dump_stream("cluster_fsid") << cluster_fsid
;
5275 f
->dump_stream("osd_fsid") << osd_fsid
;
5276 f
->dump_int("whoami", whoami
);
5277 f
->dump_int("current_epoch", current_epoch
);
5278 f
->dump_int("oldest_map", oldest_map
);
5279 f
->dump_int("newest_map", newest_map
);
5280 f
->dump_float("weight", weight
);
5281 f
->open_object_section("compat");
5282 compat_features
.dump(f
);
5284 f
->dump_int("clean_thru", clean_thru
);
5285 f
->dump_int("last_epoch_mounted", mounted
);
5288 void OSDSuperblock::generate_test_instances(list
<OSDSuperblock
*>& o
)
5291 o
.push_back(new OSDSuperblock(z
));
5292 z
.cluster_fsid
.parse("01010101-0101-0101-0101-010101010101");
5293 z
.osd_fsid
.parse("02020202-0202-0202-0202-020202020202");
5295 z
.current_epoch
= 4;
5300 o
.push_back(new OSDSuperblock(z
));
5301 o
.push_back(new OSDSuperblock(z
));
5306 void SnapSet::encode(bufferlist
& bl
) const
5308 ENCODE_START(3, 2, bl
);
5310 encode(true, bl
); // head_exists
5313 encode(clone_overlap
, bl
);
5314 encode(clone_size
, bl
);
5315 encode(clone_snaps
, bl
);
5319 void SnapSet::decode(bufferlist::const_iterator
& bl
)
5321 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl
);
5323 bl
.advance(1u); // skip legacy head_exists (always true)
5326 decode(clone_overlap
, bl
);
5327 decode(clone_size
, bl
);
5328 if (struct_v
>= 3) {
5329 decode(clone_snaps
, bl
);
5331 clone_snaps
.clear();
5336 void SnapSet::dump(Formatter
*f
) const
5338 SnapContext
sc(seq
, snaps
);
5339 f
->open_object_section("snap_context");
5342 f
->open_array_section("clones");
5343 for (vector
<snapid_t
>::const_iterator p
= clones
.begin(); p
!= clones
.end(); ++p
) {
5344 f
->open_object_section("clone");
5345 f
->dump_unsigned("snap", *p
);
5346 auto cs
= clone_size
.find(*p
);
5347 if (cs
!= clone_size
.end())
5348 f
->dump_unsigned("size", cs
->second
);
5350 f
->dump_string("size", "????");
5351 auto co
= clone_overlap
.find(*p
);
5352 if (co
!= clone_overlap
.end())
5353 f
->dump_stream("overlap") << co
->second
;
5355 f
->dump_stream("overlap") << "????";
5356 auto q
= clone_snaps
.find(*p
);
5357 if (q
!= clone_snaps
.end()) {
5358 f
->open_array_section("snaps");
5359 for (auto s
: q
->second
) {
5360 f
->dump_unsigned("snap", s
);
5369 void SnapSet::generate_test_instances(list
<SnapSet
*>& o
)
5371 o
.push_back(new SnapSet
);
5372 o
.push_back(new SnapSet
);
5373 o
.back()->seq
= 123;
5374 o
.back()->snaps
.push_back(123);
5375 o
.back()->snaps
.push_back(12);
5376 o
.push_back(new SnapSet
);
5377 o
.back()->seq
= 123;
5378 o
.back()->snaps
.push_back(123);
5379 o
.back()->snaps
.push_back(12);
5380 o
.back()->clones
.push_back(12);
5381 o
.back()->clone_size
[12] = 12345;
5382 o
.back()->clone_overlap
[12];
5383 o
.back()->clone_snaps
[12] = {12, 10, 8};
5386 ostream
& operator<<(ostream
& out
, const SnapSet
& cs
)
5388 return out
<< cs
.seq
<< "=" << cs
.snaps
<< ":"
5392 void SnapSet::from_snap_set(const librados::snap_set_t
& ss
, bool legacy
)
5394 // NOTE: our reconstruction of snaps (and the snapc) is not strictly
5395 // correct: it will not include snaps that still logically exist
5396 // but for which there was no clone that is defined. For all
5397 // practical purposes this doesn't matter, since we only use that
5398 // information to clone on the OSD, and we have already moved
5399 // forward past that part of the object history.
5402 set
<snapid_t
> _snaps
;
5403 set
<snapid_t
> _clones
;
5404 for (vector
<librados::clone_info_t
>::const_iterator p
= ss
.clones
.begin();
5405 p
!= ss
.clones
.end();
5407 if (p
->cloneid
!= librados::SNAP_HEAD
) {
5408 _clones
.insert(p
->cloneid
);
5409 _snaps
.insert(p
->snaps
.begin(), p
->snaps
.end());
5410 clone_size
[p
->cloneid
] = p
->size
;
5411 clone_overlap
[p
->cloneid
]; // the entry must exist, even if it's empty.
5412 for (vector
<pair
<uint64_t, uint64_t> >::const_iterator q
=
5413 p
->overlap
.begin(); q
!= p
->overlap
.end(); ++q
)
5414 clone_overlap
[p
->cloneid
].insert(q
->first
, q
->second
);
5416 // p->snaps is ascending; clone_snaps is descending
5417 vector
<snapid_t
>& v
= clone_snaps
[p
->cloneid
];
5418 for (auto q
= p
->snaps
.rbegin(); q
!= p
->snaps
.rend(); ++q
) {
5427 clones
.reserve(_clones
.size());
5428 for (set
<snapid_t
>::iterator p
= _clones
.begin(); p
!= _clones
.end(); ++p
)
5429 clones
.push_back(*p
);
5433 snaps
.reserve(_snaps
.size());
5434 for (set
<snapid_t
>::reverse_iterator p
= _snaps
.rbegin();
5435 p
!= _snaps
.rend(); ++p
)
5436 snaps
.push_back(*p
);
5439 uint64_t SnapSet::get_clone_bytes(snapid_t clone
) const
5441 ceph_assert(clone_size
.count(clone
));
5442 uint64_t size
= clone_size
.find(clone
)->second
;
5443 ceph_assert(clone_overlap
.count(clone
));
5444 const interval_set
<uint64_t> &overlap
= clone_overlap
.find(clone
)->second
;
5445 ceph_assert(size
>= (uint64_t)overlap
.size());
5446 return size
- overlap
.size();
5449 void SnapSet::filter(const pg_pool_t
&pinfo
)
5451 vector
<snapid_t
> oldsnaps
;
5452 oldsnaps
.swap(snaps
);
5453 for (vector
<snapid_t
>::const_iterator i
= oldsnaps
.begin();
5454 i
!= oldsnaps
.end();
5456 if (!pinfo
.is_removed_snap(*i
))
5457 snaps
.push_back(*i
);
5461 SnapSet
SnapSet::get_filtered(const pg_pool_t
&pinfo
) const
5468 // -- watch_info_t --
5470 void watch_info_t::encode(bufferlist
& bl
, uint64_t features
) const
5472 ENCODE_START(4, 3, bl
);
5474 encode(timeout_seconds
, bl
);
5475 encode(addr
, bl
, features
);
5479 void watch_info_t::decode(bufferlist::const_iterator
& bl
)
5481 DECODE_START_LEGACY_COMPAT_LEN(4, 3, 3, bl
);
5487 decode(timeout_seconds
, bl
);
5488 if (struct_v
>= 4) {
5494 void watch_info_t::dump(Formatter
*f
) const
5496 f
->dump_unsigned("cookie", cookie
);
5497 f
->dump_unsigned("timeout_seconds", timeout_seconds
);
5498 f
->open_object_section("addr");
5503 void watch_info_t::generate_test_instances(list
<watch_info_t
*>& o
)
5505 o
.push_back(new watch_info_t
);
5506 o
.push_back(new watch_info_t
);
5507 o
.back()->cookie
= 123;
5508 o
.back()->timeout_seconds
= 99;
5510 ea
.set_type(entity_addr_t::TYPE_LEGACY
);
5512 ea
.set_family(AF_INET
);
5513 ea
.set_in4_quad(0, 127);
5514 ea
.set_in4_quad(1, 0);
5515 ea
.set_in4_quad(2, 1);
5516 ea
.set_in4_quad(3, 2);
5518 o
.back()->addr
= ea
;
5521 // -- chunk_info_t --
5523 void chunk_info_t::encode(bufferlist
& bl
) const
5525 ENCODE_START(1, 1, bl
);
5529 __u32 _flags
= flags
;
5534 void chunk_info_t::decode(bufferlist::const_iterator
& bl
)
5536 DECODE_START(1, bl
);
5542 flags
= (cflag_t
)_flags
;
5546 void chunk_info_t::dump(Formatter
*f
) const
5548 f
->dump_unsigned("length", length
);
5549 f
->open_object_section("oid");
5552 f
->dump_unsigned("flags", flags
);
5555 ostream
& operator<<(ostream
& out
, const chunk_info_t
& ci
)
5557 return out
<< "(len: " << ci
.length
<< " oid: " << ci
.oid
5558 << " offset: " << ci
.offset
5559 << " flags: " << ci
.get_flag_string(ci
.flags
) << ")";
5562 // -- object_manifest_t --
5564 void object_manifest_t::encode(bufferlist
& bl
) const
5566 ENCODE_START(1, 1, bl
);
5569 case TYPE_NONE
: break;
5571 encode(redirect_target
, bl
);
5574 encode(chunk_map
, bl
);
5582 void object_manifest_t::decode(bufferlist::const_iterator
& bl
)
5584 DECODE_START(1, bl
);
5587 case TYPE_NONE
: break;
5589 decode(redirect_target
, bl
);
5592 decode(chunk_map
, bl
);
5600 void object_manifest_t::dump(Formatter
*f
) const
5602 f
->dump_unsigned("type", type
);
5603 if (type
== TYPE_REDIRECT
) {
5604 f
->open_object_section("redirect_target");
5605 redirect_target
.dump(f
);
5607 } else if (type
== TYPE_CHUNKED
) {
5608 f
->open_array_section("chunk_map");
5609 for (auto& p
: chunk_map
) {
5610 f
->open_object_section("chunk");
5611 f
->dump_unsigned("offset", p
.first
);
5619 void object_manifest_t::generate_test_instances(list
<object_manifest_t
*>& o
)
5621 o
.push_back(new object_manifest_t());
5622 o
.back()->type
= TYPE_REDIRECT
;
5625 ostream
& operator<<(ostream
& out
, const object_manifest_t
& om
)
5627 out
<< "manifest(" << om
.get_type_name();
5628 if (om
.is_redirect()) {
5629 out
<< " " << om
.redirect_target
;
5630 } else if (om
.is_chunked()) {
5631 out
<< " " << om
.chunk_map
;
5637 // -- object_info_t --
5639 void object_info_t::copy_user_bits(const object_info_t
& other
)
5641 // these bits are copied from head->clone.
5643 mtime
= other
.mtime
;
5644 local_mtime
= other
.local_mtime
;
5645 last_reqid
= other
.last_reqid
;
5646 truncate_seq
= other
.truncate_seq
;
5647 truncate_size
= other
.truncate_size
;
5648 flags
= other
.flags
;
5649 user_version
= other
.user_version
;
5650 data_digest
= other
.data_digest
;
5651 omap_digest
= other
.omap_digest
;
5654 void object_info_t::encode(bufferlist
& bl
, uint64_t features
) const
5656 object_locator_t
myoloc(soid
);
5657 map
<entity_name_t
, watch_info_t
> old_watchers
;
5658 for (map
<pair
<uint64_t, entity_name_t
>, watch_info_t
>::const_iterator i
=
5660 i
!= watchers
.end();
5662 old_watchers
.insert(make_pair(i
->first
.second
, i
->second
));
5664 ENCODE_START(17, 8, bl
);
5666 encode(myoloc
, bl
); //Retained for compatibility
5667 encode((__u32
)0, bl
); // was category, no longer used
5668 encode(version
, bl
);
5669 encode(prior_version
, bl
);
5670 encode(last_reqid
, bl
);
5673 if (soid
.snap
== CEPH_NOSNAP
)
5674 encode(osd_reqid_t(), bl
); // used to be wrlock_by
5676 encode((uint32_t)0, bl
); // was legacy_snaps
5677 encode(truncate_seq
, bl
);
5678 encode(truncate_size
, bl
);
5679 encode(is_lost(), bl
);
5680 encode(old_watchers
, bl
, features
);
5681 /* shenanigans to avoid breaking backwards compatibility in the disk format.
5682 * When we can, switch this out for simply putting the version_t on disk. */
5683 eversion_t
user_eversion(0, user_version
);
5684 encode(user_eversion
, bl
);
5685 encode(test_flag(FLAG_USES_TMAP
), bl
);
5686 encode(watchers
, bl
, features
);
5687 __u32 _flags
= flags
;
5689 encode(local_mtime
, bl
);
5690 encode(data_digest
, bl
);
5691 encode(omap_digest
, bl
);
5692 encode(expected_object_size
, bl
);
5693 encode(expected_write_size
, bl
);
5694 encode(alloc_hint_flags
, bl
);
5695 if (has_manifest()) {
5696 encode(manifest
, bl
);
5701 void object_info_t::decode(bufferlist::const_iterator
& bl
)
5703 object_locator_t myoloc
;
5704 DECODE_START_LEGACY_COMPAT_LEN(17, 8, 8, bl
);
5705 map
<entity_name_t
, watch_info_t
> old_watchers
;
5710 decode(category
, bl
); // no longer used
5712 decode(version
, bl
);
5713 decode(prior_version
, bl
);
5714 decode(last_reqid
, bl
);
5717 if (soid
.snap
== CEPH_NOSNAP
) {
5718 osd_reqid_t wrlock_by
;
5719 decode(wrlock_by
, bl
);
5721 vector
<snapid_t
> legacy_snaps
;
5722 decode(legacy_snaps
, bl
);
5724 decode(truncate_seq
, bl
);
5725 decode(truncate_size
, bl
);
5727 // if this is struct_v >= 13, we will overwrite this
5728 // below since this field is just here for backwards
5734 decode(old_watchers
, bl
);
5735 eversion_t user_eversion
;
5736 decode(user_eversion
, bl
);
5737 user_version
= user_eversion
.version
;
5739 if (struct_v
>= 9) {
5740 bool uses_tmap
= false;
5741 decode(uses_tmap
, bl
);
5743 set_flag(FLAG_USES_TMAP
);
5745 set_flag(FLAG_USES_TMAP
);
5748 soid
.pool
= myoloc
.pool
;
5749 if (struct_v
>= 11) {
5750 decode(watchers
, bl
);
5752 for (map
<entity_name_t
, watch_info_t
>::iterator i
= old_watchers
.begin();
5753 i
!= old_watchers
.end();
5757 make_pair(i
->second
.cookie
, i
->first
), i
->second
));
5760 if (struct_v
>= 13) {
5763 flags
= (flag_t
)_flags
;
5765 if (struct_v
>= 14) {
5766 decode(local_mtime
, bl
);
5768 local_mtime
= utime_t();
5770 if (struct_v
>= 15) {
5771 decode(data_digest
, bl
);
5772 decode(omap_digest
, bl
);
5774 data_digest
= omap_digest
= -1;
5775 clear_flag(FLAG_DATA_DIGEST
);
5776 clear_flag(FLAG_OMAP_DIGEST
);
5778 if (struct_v
>= 16) {
5779 decode(expected_object_size
, bl
);
5780 decode(expected_write_size
, bl
);
5781 decode(alloc_hint_flags
, bl
);
5783 expected_object_size
= 0;
5784 expected_write_size
= 0;
5785 alloc_hint_flags
= 0;
5787 if (struct_v
>= 17) {
5788 if (has_manifest()) {
5789 decode(manifest
, bl
);
5795 void object_info_t::dump(Formatter
*f
) const
5797 f
->open_object_section("oid");
5800 f
->dump_stream("version") << version
;
5801 f
->dump_stream("prior_version") << prior_version
;
5802 f
->dump_stream("last_reqid") << last_reqid
;
5803 f
->dump_unsigned("user_version", user_version
);
5804 f
->dump_unsigned("size", size
);
5805 f
->dump_stream("mtime") << mtime
;
5806 f
->dump_stream("local_mtime") << local_mtime
;
5807 f
->dump_unsigned("lost", (int)is_lost());
5808 vector
<string
> sv
= get_flag_vector(flags
);
5809 f
->open_array_section("flags");
5811 f
->dump_string("flags", str
);
5813 f
->dump_unsigned("truncate_seq", truncate_seq
);
5814 f
->dump_unsigned("truncate_size", truncate_size
);
5815 f
->dump_format("data_digest", "0x%08x", data_digest
);
5816 f
->dump_format("omap_digest", "0x%08x", omap_digest
);
5817 f
->dump_unsigned("expected_object_size", expected_object_size
);
5818 f
->dump_unsigned("expected_write_size", expected_write_size
);
5819 f
->dump_unsigned("alloc_hint_flags", alloc_hint_flags
);
5820 f
->dump_object("manifest", manifest
);
5821 f
->open_object_section("watchers");
5822 for (map
<pair
<uint64_t, entity_name_t
>,watch_info_t
>::const_iterator p
=
5823 watchers
.begin(); p
!= watchers
.end(); ++p
) {
5825 ss
<< p
->first
.second
;
5826 f
->open_object_section(ss
.str().c_str());
5833 void object_info_t::generate_test_instances(list
<object_info_t
*>& o
)
5835 o
.push_back(new object_info_t());
5841 ostream
& operator<<(ostream
& out
, const object_info_t
& oi
)
5843 out
<< oi
.soid
<< "(" << oi
.version
5844 << " " << oi
.last_reqid
;
5846 out
<< " " << oi
.get_flag_string();
5847 out
<< " s " << oi
.size
;
5848 out
<< " uv " << oi
.user_version
;
5849 if (oi
.is_data_digest())
5850 out
<< " dd " << std::hex
<< oi
.data_digest
<< std::dec
;
5851 if (oi
.is_omap_digest())
5852 out
<< " od " << std::hex
<< oi
.omap_digest
<< std::dec
;
5853 out
<< " alloc_hint [" << oi
.expected_object_size
5854 << " " << oi
.expected_write_size
5855 << " " << oi
.alloc_hint_flags
<< "]";
5856 if (oi
.has_manifest())
5857 out
<< " " << oi
.manifest
;
5862 // -- ObjectRecovery --
5863 void ObjectRecoveryProgress::encode(bufferlist
&bl
) const
5865 ENCODE_START(1, 1, bl
);
5867 encode(data_complete
, bl
);
5868 encode(data_recovered_to
, bl
);
5869 encode(omap_recovered_to
, bl
);
5870 encode(omap_complete
, bl
);
5874 void ObjectRecoveryProgress::decode(bufferlist::const_iterator
&bl
)
5876 DECODE_START(1, bl
);
5878 decode(data_complete
, bl
);
5879 decode(data_recovered_to
, bl
);
5880 decode(omap_recovered_to
, bl
);
5881 decode(omap_complete
, bl
);
5885 ostream
&operator<<(ostream
&out
, const ObjectRecoveryProgress
&prog
)
5887 return prog
.print(out
);
5890 void ObjectRecoveryProgress::generate_test_instances(
5891 list
<ObjectRecoveryProgress
*>& o
)
5893 o
.push_back(new ObjectRecoveryProgress
);
5894 o
.back()->first
= false;
5895 o
.back()->data_complete
= true;
5896 o
.back()->omap_complete
= true;
5897 o
.back()->data_recovered_to
= 100;
5899 o
.push_back(new ObjectRecoveryProgress
);
5900 o
.back()->first
= true;
5901 o
.back()->data_complete
= false;
5902 o
.back()->omap_complete
= false;
5903 o
.back()->data_recovered_to
= 0;
5906 ostream
&ObjectRecoveryProgress::print(ostream
&out
) const
5908 return out
<< "ObjectRecoveryProgress("
5909 << ( first
? "" : "!" ) << "first, "
5910 << "data_recovered_to:" << data_recovered_to
5911 << ", data_complete:" << ( data_complete
? "true" : "false" )
5912 << ", omap_recovered_to:" << omap_recovered_to
5913 << ", omap_complete:" << ( omap_complete
? "true" : "false" )
5914 << ", error:" << ( error
? "true" : "false" )
5918 void ObjectRecoveryProgress::dump(Formatter
*f
) const
5920 f
->dump_int("first?", first
);
5921 f
->dump_int("data_complete?", data_complete
);
5922 f
->dump_unsigned("data_recovered_to", data_recovered_to
);
5923 f
->dump_int("omap_complete?", omap_complete
);
5924 f
->dump_string("omap_recovered_to", omap_recovered_to
);
5927 void ObjectRecoveryInfo::encode(bufferlist
&bl
, uint64_t features
) const
5929 ENCODE_START(2, 1, bl
);
5931 encode(version
, bl
);
5933 encode(oi
, bl
, features
);
5935 encode(copy_subset
, bl
);
5936 encode(clone_subset
, bl
);
5940 void ObjectRecoveryInfo::decode(bufferlist::const_iterator
&bl
,
5943 DECODE_START(2, bl
);
5945 decode(version
, bl
);
5949 decode(copy_subset
, bl
);
5950 decode(clone_subset
, bl
);
5954 if (!soid
.is_max() && soid
.pool
== -1)
5956 map
<hobject_t
, interval_set
<uint64_t>> tmp
;
5957 tmp
.swap(clone_subset
);
5958 for (map
<hobject_t
, interval_set
<uint64_t>>::iterator i
= tmp
.begin();
5961 hobject_t
first(i
->first
);
5962 if (!first
.is_max() && first
.pool
== -1)
5964 clone_subset
[first
].swap(i
->second
);
5969 void ObjectRecoveryInfo::generate_test_instances(
5970 list
<ObjectRecoveryInfo
*>& o
)
5972 o
.push_back(new ObjectRecoveryInfo
);
5973 o
.back()->soid
= hobject_t(sobject_t("key", CEPH_NOSNAP
));
5974 o
.back()->version
= eversion_t(0,0);
5975 o
.back()->size
= 100;
5979 void ObjectRecoveryInfo::dump(Formatter
*f
) const
5981 f
->dump_stream("object") << soid
;
5982 f
->dump_stream("at_version") << version
;
5983 f
->dump_stream("size") << size
;
5985 f
->open_object_section("object_info");
5990 f
->open_object_section("snapset");
5994 f
->dump_stream("copy_subset") << copy_subset
;
5995 f
->dump_stream("clone_subset") << clone_subset
;
5998 ostream
& operator<<(ostream
& out
, const ObjectRecoveryInfo
&inf
)
6000 return inf
.print(out
);
6003 ostream
&ObjectRecoveryInfo::print(ostream
&out
) const
6005 return out
<< "ObjectRecoveryInfo("
6006 << soid
<< "@" << version
6007 << ", size: " << size
6008 << ", copy_subset: " << copy_subset
6009 << ", clone_subset: " << clone_subset
6010 << ", snapset: " << ss
6014 // -- PushReplyOp --
6015 void PushReplyOp::generate_test_instances(list
<PushReplyOp
*> &o
)
6017 o
.push_back(new PushReplyOp
);
6018 o
.push_back(new PushReplyOp
);
6019 o
.back()->soid
= hobject_t(sobject_t("asdf", 2));
6020 o
.push_back(new PushReplyOp
);
6021 o
.back()->soid
= hobject_t(sobject_t("asdf", CEPH_NOSNAP
));
6024 void PushReplyOp::encode(bufferlist
&bl
) const
6026 ENCODE_START(1, 1, bl
);
6031 void PushReplyOp::decode(bufferlist::const_iterator
&bl
)
6033 DECODE_START(1, bl
);
6038 void PushReplyOp::dump(Formatter
*f
) const
6040 f
->dump_stream("soid") << soid
;
6043 ostream
&PushReplyOp::print(ostream
&out
) const
6046 << "PushReplyOp(" << soid
6050 ostream
& operator<<(ostream
& out
, const PushReplyOp
&op
)
6052 return op
.print(out
);
6055 uint64_t PushReplyOp::cost(CephContext
*cct
) const
6058 return cct
->_conf
->osd_push_per_object_cost
+
6059 cct
->_conf
->osd_recovery_max_chunk
;
6063 void PullOp::generate_test_instances(list
<PullOp
*> &o
)
6065 o
.push_back(new PullOp
);
6066 o
.push_back(new PullOp
);
6067 o
.back()->soid
= hobject_t(sobject_t("asdf", 2));
6068 o
.back()->recovery_info
.version
= eversion_t(3, 10);
6069 o
.push_back(new PullOp
);
6070 o
.back()->soid
= hobject_t(sobject_t("asdf", CEPH_NOSNAP
));
6071 o
.back()->recovery_info
.version
= eversion_t(0, 0);
6074 void PullOp::encode(bufferlist
&bl
, uint64_t features
) const
6076 ENCODE_START(1, 1, bl
);
6078 encode(recovery_info
, bl
, features
);
6079 encode(recovery_progress
, bl
);
6083 void PullOp::decode(bufferlist::const_iterator
&bl
)
6085 DECODE_START(1, bl
);
6087 decode(recovery_info
, bl
);
6088 decode(recovery_progress
, bl
);
6092 void PullOp::dump(Formatter
*f
) const
6094 f
->dump_stream("soid") << soid
;
6096 f
->open_object_section("recovery_info");
6097 recovery_info
.dump(f
);
6101 f
->open_object_section("recovery_progress");
6102 recovery_progress
.dump(f
);
6107 ostream
&PullOp::print(ostream
&out
) const
6110 << "PullOp(" << soid
6111 << ", recovery_info: " << recovery_info
6112 << ", recovery_progress: " << recovery_progress
6116 ostream
& operator<<(ostream
& out
, const PullOp
&op
)
6118 return op
.print(out
);
6121 uint64_t PullOp::cost(CephContext
*cct
) const
6123 return cct
->_conf
->osd_push_per_object_cost
+
6124 cct
->_conf
->osd_recovery_max_chunk
;
6128 void PushOp::generate_test_instances(list
<PushOp
*> &o
)
6130 o
.push_back(new PushOp
);
6131 o
.push_back(new PushOp
);
6132 o
.back()->soid
= hobject_t(sobject_t("asdf", 2));
6133 o
.back()->version
= eversion_t(3, 10);
6134 o
.push_back(new PushOp
);
6135 o
.back()->soid
= hobject_t(sobject_t("asdf", CEPH_NOSNAP
));
6136 o
.back()->version
= eversion_t(0, 0);
6139 void PushOp::encode(bufferlist
&bl
, uint64_t features
) const
6141 ENCODE_START(1, 1, bl
);
6143 encode(version
, bl
);
6145 encode(data_included
, bl
);
6146 encode(omap_header
, bl
);
6147 encode(omap_entries
, bl
);
6148 encode(attrset
, bl
);
6149 encode(recovery_info
, bl
, features
);
6150 encode(after_progress
, bl
);
6151 encode(before_progress
, bl
);
6155 void PushOp::decode(bufferlist::const_iterator
&bl
)
6157 DECODE_START(1, bl
);
6159 decode(version
, bl
);
6161 decode(data_included
, bl
);
6162 decode(omap_header
, bl
);
6163 decode(omap_entries
, bl
);
6164 decode(attrset
, bl
);
6165 decode(recovery_info
, bl
);
6166 decode(after_progress
, bl
);
6167 decode(before_progress
, bl
);
6171 void PushOp::dump(Formatter
*f
) const
6173 f
->dump_stream("soid") << soid
;
6174 f
->dump_stream("version") << version
;
6175 f
->dump_int("data_len", data
.length());
6176 f
->dump_stream("data_included") << data_included
;
6177 f
->dump_int("omap_header_len", omap_header
.length());
6178 f
->dump_int("omap_entries_len", omap_entries
.size());
6179 f
->dump_int("attrset_len", attrset
.size());
6181 f
->open_object_section("recovery_info");
6182 recovery_info
.dump(f
);
6186 f
->open_object_section("after_progress");
6187 after_progress
.dump(f
);
6191 f
->open_object_section("before_progress");
6192 before_progress
.dump(f
);
6197 ostream
&PushOp::print(ostream
&out
) const
6200 << "PushOp(" << soid
6201 << ", version: " << version
6202 << ", data_included: " << data_included
6203 << ", data_size: " << data
.length()
6204 << ", omap_header_size: " << omap_header
.length()
6205 << ", omap_entries_size: " << omap_entries
.size()
6206 << ", attrset_size: " << attrset
.size()
6207 << ", recovery_info: " << recovery_info
6208 << ", after_progress: " << after_progress
6209 << ", before_progress: " << before_progress
6213 ostream
& operator<<(ostream
& out
, const PushOp
&op
)
6215 return op
.print(out
);
6218 uint64_t PushOp::cost(CephContext
*cct
) const
6220 uint64_t cost
= data_included
.size();
6221 for (map
<string
, bufferlist
>::const_iterator i
=
6222 omap_entries
.begin();
6223 i
!= omap_entries
.end();
6225 cost
+= i
->second
.length();
6227 cost
+= cct
->_conf
->osd_push_per_object_cost
;
6233 void ScrubMap::merge_incr(const ScrubMap
&l
)
6235 ceph_assert(valid_through
== l
.incr_since
);
6236 valid_through
= l
.valid_through
;
6238 for (map
<hobject_t
,object
>::const_iterator p
= l
.objects
.begin();
6239 p
!= l
.objects
.end();
6241 if (p
->second
.negative
) {
6242 map
<hobject_t
,object
>::iterator q
= objects
.find(p
->first
);
6243 if (q
!= objects
.end()) {
6247 objects
[p
->first
] = p
->second
;
6252 void ScrubMap::encode(bufferlist
& bl
) const
6254 ENCODE_START(3, 2, bl
);
6255 encode(objects
, bl
);
6256 encode((__u32
)0, bl
); // used to be attrs; now deprecated
6257 bufferlist old_logbl
; // not used
6258 encode(old_logbl
, bl
);
6259 encode(valid_through
, bl
);
6260 encode(incr_since
, bl
);
6264 void ScrubMap::decode(bufferlist::const_iterator
& bl
, int64_t pool
)
6266 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl
);
6267 decode(objects
, bl
);
6269 map
<string
,string
> attrs
; // deprecated
6272 bufferlist old_logbl
; // not used
6273 decode(old_logbl
, bl
);
6274 decode(valid_through
, bl
);
6275 decode(incr_since
, bl
);
6278 // handle hobject_t upgrade
6280 map
<hobject_t
, object
> tmp
;
6282 for (map
<hobject_t
, object
>::iterator i
= tmp
.begin();
6285 hobject_t
first(i
->first
);
6286 if (!first
.is_max() && first
.pool
== -1)
6288 objects
[first
] = i
->second
;
6293 void ScrubMap::dump(Formatter
*f
) const
6295 f
->dump_stream("valid_through") << valid_through
;
6296 f
->dump_stream("incremental_since") << incr_since
;
6297 f
->open_array_section("objects");
6298 for (map
<hobject_t
,object
>::const_iterator p
= objects
.begin(); p
!= objects
.end(); ++p
) {
6299 f
->open_object_section("object");
6300 f
->dump_string("name", p
->first
.oid
.name
);
6301 f
->dump_unsigned("hash", p
->first
.get_hash());
6302 f
->dump_string("key", p
->first
.get_key());
6303 f
->dump_int("snapid", p
->first
.snap
);
6310 void ScrubMap::generate_test_instances(list
<ScrubMap
*>& o
)
6312 o
.push_back(new ScrubMap
);
6313 o
.push_back(new ScrubMap
);
6314 o
.back()->valid_through
= eversion_t(1, 2);
6315 o
.back()->incr_since
= eversion_t(3, 4);
6317 object::generate_test_instances(obj
);
6318 o
.back()->objects
[hobject_t(object_t("foo"), "fookey", 123, 456, 0, "")] = *obj
.back();
6320 o
.back()->objects
[hobject_t(object_t("bar"), string(), 123, 456, 0, "")] = *obj
.back();
6323 // -- ScrubMap::object --
6325 void ScrubMap::object::encode(bufferlist
& bl
) const
6327 bool compat_read_error
= read_error
|| ec_hash_mismatch
|| ec_size_mismatch
;
6328 ENCODE_START(10, 7, bl
);
6330 encode(negative
, bl
);
6333 encode(digest_present
, bl
);
6334 encode((uint32_t)0, bl
); // obsolete nlinks
6335 encode((uint32_t)0, bl
); // snapcolls
6336 encode(omap_digest
, bl
);
6337 encode(omap_digest_present
, bl
);
6338 encode(compat_read_error
, bl
);
6339 encode(stat_error
, bl
);
6340 encode(read_error
, bl
);
6341 encode(ec_hash_mismatch
, bl
);
6342 encode(ec_size_mismatch
, bl
);
6343 encode(large_omap_object_found
, bl
);
6344 encode(large_omap_object_key_count
, bl
);
6345 encode(large_omap_object_value_size
, bl
);
6346 encode(object_omap_bytes
, bl
);
6347 encode(object_omap_keys
, bl
);
6351 void ScrubMap::object::decode(bufferlist::const_iterator
& bl
)
6353 DECODE_START(10, bl
);
6355 bool tmp
, compat_read_error
= false;
6361 digest_present
= tmp
;
6365 set
<snapid_t
> snapcolls
;
6366 decode(snapcolls
, bl
);
6368 decode(omap_digest
, bl
);
6370 omap_digest_present
= tmp
;
6371 decode(compat_read_error
, bl
);
6374 if (struct_v
>= 8) {
6378 ec_hash_mismatch
= tmp
;
6380 ec_size_mismatch
= tmp
;
6382 // If older encoder found a read_error, set read_error
6383 if (compat_read_error
&& !read_error
&& !ec_hash_mismatch
&& !ec_size_mismatch
)
6385 if (struct_v
>= 9) {
6387 large_omap_object_found
= tmp
;
6388 decode(large_omap_object_key_count
, bl
);
6389 decode(large_omap_object_value_size
, bl
);
6391 if (struct_v
>= 10) {
6392 decode(object_omap_bytes
, bl
);
6393 decode(object_omap_keys
, bl
);
6398 void ScrubMap::object::dump(Formatter
*f
) const
6400 f
->dump_int("size", size
);
6401 f
->dump_int("negative", negative
);
6402 f
->open_array_section("attrs");
6403 for (map
<string
,bufferptr
>::const_iterator p
= attrs
.begin(); p
!= attrs
.end(); ++p
) {
6404 f
->open_object_section("attr");
6405 f
->dump_string("name", p
->first
);
6406 f
->dump_int("length", p
->second
.length());
6412 void ScrubMap::object::generate_test_instances(list
<object
*>& o
)
6414 o
.push_back(new object
);
6415 o
.push_back(new object
);
6416 o
.back()->negative
= true;
6417 o
.push_back(new object
);
6418 o
.back()->size
= 123;
6419 o
.back()->attrs
["foo"] = buffer::copy("foo", 3);
6420 o
.back()->attrs
["bar"] = buffer::copy("barval", 6);
6425 ostream
& operator<<(ostream
& out
, const OSDOp
& op
)
6427 out
<< ceph_osd_op_name(op
.op
.op
);
6428 if (ceph_osd_op_type_data(op
.op
.op
)) {
6431 case CEPH_OSD_OP_ASSERT_VER
:
6432 out
<< " v" << op
.op
.assert_ver
.ver
;
6434 case CEPH_OSD_OP_TRUNCATE
:
6435 out
<< " " << op
.op
.extent
.offset
;
6437 case CEPH_OSD_OP_MASKTRUNC
:
6438 case CEPH_OSD_OP_TRIMTRUNC
:
6439 out
<< " " << op
.op
.extent
.truncate_seq
<< "@"
6440 << (int64_t)op
.op
.extent
.truncate_size
;
6442 case CEPH_OSD_OP_ROLLBACK
:
6443 out
<< " " << snapid_t(op
.op
.snap
.snapid
);
6445 case CEPH_OSD_OP_WATCH
:
6446 out
<< " " << ceph_osd_watch_op_name(op
.op
.watch
.op
)
6447 << " cookie " << op
.op
.watch
.cookie
;
6448 if (op
.op
.watch
.gen
)
6449 out
<< " gen " << op
.op
.watch
.gen
;
6451 case CEPH_OSD_OP_NOTIFY
:
6452 out
<< " cookie " << op
.op
.notify
.cookie
;
6454 case CEPH_OSD_OP_COPY_GET
:
6455 out
<< " max " << op
.op
.copy_get
.max
;
6457 case CEPH_OSD_OP_COPY_FROM
:
6458 out
<< " ver " << op
.op
.copy_from
.src_version
;
6460 case CEPH_OSD_OP_SETALLOCHINT
:
6461 out
<< " object_size " << op
.op
.alloc_hint
.expected_object_size
6462 << " write_size " << op
.op
.alloc_hint
.expected_write_size
;
6464 case CEPH_OSD_OP_READ
:
6465 case CEPH_OSD_OP_SPARSE_READ
:
6466 case CEPH_OSD_OP_SYNC_READ
:
6467 case CEPH_OSD_OP_WRITE
:
6468 case CEPH_OSD_OP_WRITEFULL
:
6469 case CEPH_OSD_OP_ZERO
:
6470 case CEPH_OSD_OP_APPEND
:
6471 case CEPH_OSD_OP_MAPEXT
:
6472 case CEPH_OSD_OP_CMPEXT
:
6473 out
<< " " << op
.op
.extent
.offset
<< "~" << op
.op
.extent
.length
;
6474 if (op
.op
.extent
.truncate_seq
)
6475 out
<< " [" << op
.op
.extent
.truncate_seq
<< "@"
6476 << (int64_t)op
.op
.extent
.truncate_size
<< "]";
6478 out
<< " [" << ceph_osd_op_flag_string(op
.op
.flags
) << "]";
6480 // don't show any arg info
6483 } else if (ceph_osd_op_type_attr(op
.op
.op
)) {
6485 if (op
.op
.xattr
.name_len
&& op
.indata
.length()) {
6487 op
.indata
.write(0, op
.op
.xattr
.name_len
, out
);
6489 if (op
.op
.xattr
.value_len
)
6490 out
<< " (" << op
.op
.xattr
.value_len
<< ")";
6491 if (op
.op
.op
== CEPH_OSD_OP_CMPXATTR
)
6492 out
<< " op " << (int)op
.op
.xattr
.cmp_op
6493 << " mode " << (int)op
.op
.xattr
.cmp_mode
;
6494 } else if (ceph_osd_op_type_exec(op
.op
.op
)) {
6496 if (op
.op
.cls
.class_len
&& op
.indata
.length()) {
6498 op
.indata
.write(0, op
.op
.cls
.class_len
, out
);
6500 op
.indata
.write(op
.op
.cls
.class_len
, op
.op
.cls
.method_len
, out
);
6502 } else if (ceph_osd_op_type_pg(op
.op
.op
)) {
6504 case CEPH_OSD_OP_PGLS
:
6505 case CEPH_OSD_OP_PGLS_FILTER
:
6506 case CEPH_OSD_OP_PGNLS
:
6507 case CEPH_OSD_OP_PGNLS_FILTER
:
6508 out
<< " start_epoch " << op
.op
.pgls
.start_epoch
;
6510 case CEPH_OSD_OP_PG_HITSET_LS
:
6512 case CEPH_OSD_OP_PG_HITSET_GET
:
6513 out
<< " " << utime_t(op
.op
.hit_set_get
.stamp
);
6515 case CEPH_OSD_OP_SCRUBLS
:
6523 void OSDOp::split_osd_op_vector_in_data(vector
<OSDOp
>& ops
, bufferlist
& in
)
6525 bufferlist::iterator datap
= in
.begin();
6526 for (unsigned i
= 0; i
< ops
.size(); i
++) {
6527 if (ops
[i
].op
.payload_len
) {
6528 datap
.copy(ops
[i
].op
.payload_len
, ops
[i
].indata
);
6533 void OSDOp::merge_osd_op_vector_in_data(vector
<OSDOp
>& ops
, bufferlist
& out
)
6535 for (unsigned i
= 0; i
< ops
.size(); i
++) {
6536 if (ops
[i
].indata
.length()) {
6537 ops
[i
].op
.payload_len
= ops
[i
].indata
.length();
6538 out
.append(ops
[i
].indata
);
6543 void OSDOp::split_osd_op_vector_out_data(vector
<OSDOp
>& ops
, bufferlist
& in
)
6545 bufferlist::iterator datap
= in
.begin();
6546 for (unsigned i
= 0; i
< ops
.size(); i
++) {
6547 if (ops
[i
].op
.payload_len
) {
6548 datap
.copy(ops
[i
].op
.payload_len
, ops
[i
].outdata
);
6553 void OSDOp::merge_osd_op_vector_out_data(vector
<OSDOp
>& ops
, bufferlist
& out
)
6555 for (unsigned i
= 0; i
< ops
.size(); i
++) {
6556 if (ops
[i
].outdata
.length()) {
6557 ops
[i
].op
.payload_len
= ops
[i
].outdata
.length();
6558 out
.append(ops
[i
].outdata
);
6563 void OSDOp::clear_data(vector
<OSDOp
>& ops
)
6565 for (unsigned i
= 0; i
< ops
.size(); i
++) {
6568 if (ceph_osd_op_type_attr(op
.op
.op
) &&
6569 op
.op
.xattr
.name_len
&&
6570 op
.indata
.length() >= op
.op
.xattr
.name_len
) {
6571 bufferptr
bp(op
.op
.xattr
.name_len
);
6574 bl
.copy_in(0, op
.op
.xattr
.name_len
, op
.indata
);
6575 op
.indata
.claim(bl
);
6576 } else if (ceph_osd_op_type_exec(op
.op
.op
) &&
6577 op
.op
.cls
.class_len
&&
6578 op
.indata
.length() >
6579 (op
.op
.cls
.class_len
+ op
.op
.cls
.method_len
)) {
6580 __u8 len
= op
.op
.cls
.class_len
+ op
.op
.cls
.method_len
;
6584 bl
.copy_in(0, len
, op
.indata
);
6585 op
.indata
.claim(bl
);