]> git.proxmox.com Git - ceph.git/blame - ceph/src/osd/osd_types.cc
update dh_systemd restart patch for pacific
[ceph.git] / ceph / src / osd / osd_types.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2011 New Dream Network
7 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License version 2.1, as published by the Free Software
14 * Foundation. See file COPYING.
15 *
16 */
17
9f95a23c
TL
18#include <list>
19#include <map>
20#include <ostream>
21#include <sstream>
22#include <set>
23#include <string>
24#include <utility>
25#include <vector>
26
27
7c673cae
FG
28#include <boost/assign/list_of.hpp>
29
7c673cae 30#include "include/ceph_features.h"
9f95a23c 31#include "include/encoding.h"
11fdf7f2 32#include "include/stringify.h"
7c673cae
FG
33extern "C" {
34#include "crush/hash.h"
35}
9f95a23c
TL
36
37#include "common/Formatter.h"
f67539c2 38#include "common/StackStringStream.h"
7c673cae 39#include "OSDMap.h"
9f95a23c
TL
40#include "osd_types.h"
41#include "os/Transaction.h"
42
43using std::list;
44using std::make_pair;
45using std::map;
46using std::ostream;
9f95a23c
TL
47using std::pair;
48using std::set;
49using std::string;
9f95a23c
TL
50using std::unique_ptr;
51using std::vector;
52
f67539c2 53using ceph::bufferlist;
9f95a23c
TL
54using ceph::decode;
55using ceph::decode_nohead;
56using ceph::encode;
57using ceph::encode_nohead;
58using ceph::Formatter;
f67539c2
TL
59using ceph::make_timespan;
60using ceph::JSONFormatter;
9f95a23c
TL
61
62using namespace std::literals;
7c673cae
FG
63
64const char *ceph_osd_flag_name(unsigned flag)
65{
66 switch (flag) {
67 case CEPH_OSD_FLAG_ACK: return "ack";
68 case CEPH_OSD_FLAG_ONNVRAM: return "onnvram";
69 case CEPH_OSD_FLAG_ONDISK: return "ondisk";
70 case CEPH_OSD_FLAG_RETRY: return "retry";
71 case CEPH_OSD_FLAG_READ: return "read";
72 case CEPH_OSD_FLAG_WRITE: return "write";
73 case CEPH_OSD_FLAG_ORDERSNAP: return "ordersnap";
74 case CEPH_OSD_FLAG_PEERSTAT_OLD: return "peerstat_old";
75 case CEPH_OSD_FLAG_BALANCE_READS: return "balance_reads";
76 case CEPH_OSD_FLAG_PARALLELEXEC: return "parallelexec";
77 case CEPH_OSD_FLAG_PGOP: return "pgop";
78 case CEPH_OSD_FLAG_EXEC: return "exec";
79 case CEPH_OSD_FLAG_EXEC_PUBLIC: return "exec_public";
80 case CEPH_OSD_FLAG_LOCALIZE_READS: return "localize_reads";
81 case CEPH_OSD_FLAG_RWORDERED: return "rwordered";
82 case CEPH_OSD_FLAG_IGNORE_CACHE: return "ignore_cache";
83 case CEPH_OSD_FLAG_SKIPRWLOCKS: return "skiprwlocks";
84 case CEPH_OSD_FLAG_IGNORE_OVERLAY: return "ignore_overlay";
85 case CEPH_OSD_FLAG_FLUSH: return "flush";
86 case CEPH_OSD_FLAG_MAP_SNAP_CLONE: return "map_snap_clone";
87 case CEPH_OSD_FLAG_ENFORCE_SNAPC: return "enforce_snapc";
88 case CEPH_OSD_FLAG_REDIRECTED: return "redirected";
89 case CEPH_OSD_FLAG_KNOWN_REDIR: return "known_if_redirected";
90 case CEPH_OSD_FLAG_FULL_TRY: return "full_try";
91 case CEPH_OSD_FLAG_FULL_FORCE: return "full_force";
224ce89b 92 case CEPH_OSD_FLAG_IGNORE_REDIRECT: return "ignore_redirect";
9f95a23c 93 case CEPH_OSD_FLAG_RETURNVEC: return "returnvec";
7c673cae
FG
94 default: return "???";
95 }
96}
97
98string ceph_osd_flag_string(unsigned flags)
99{
100 string s;
101 for (unsigned i=0; i<32; ++i) {
102 if (flags & (1u<<i)) {
103 if (s.length())
104 s += "+";
105 s += ceph_osd_flag_name(1u << i);
106 }
107 }
108 if (s.length())
109 return s;
110 return string("-");
111}
112
113const char * ceph_osd_op_flag_name(unsigned flag)
114{
115 const char *name;
116
117 switch(flag) {
118 case CEPH_OSD_OP_FLAG_EXCL:
119 name = "excl";
120 break;
121 case CEPH_OSD_OP_FLAG_FAILOK:
122 name = "failok";
123 break;
124 case CEPH_OSD_OP_FLAG_FADVISE_RANDOM:
125 name = "fadvise_random";
126 break;
127 case CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL:
128 name = "fadvise_sequential";
129 break;
130 case CEPH_OSD_OP_FLAG_FADVISE_WILLNEED:
131 name = "favise_willneed";
132 break;
133 case CEPH_OSD_OP_FLAG_FADVISE_DONTNEED:
134 name = "fadvise_dontneed";
135 break;
136 case CEPH_OSD_OP_FLAG_FADVISE_NOCACHE:
137 name = "fadvise_nocache";
138 break;
11fdf7f2
TL
139 case CEPH_OSD_OP_FLAG_WITH_REFERENCE:
140 name = "with_reference";
141 break;
91327a77
AA
142 case CEPH_OSD_OP_FLAG_BYPASS_CLEAN_CACHE:
143 name = "bypass_clean_cache";
144 break;
7c673cae
FG
145 default:
146 name = "???";
147 };
148
149 return name;
150}
151
152string ceph_osd_op_flag_string(unsigned flags)
153{
154 string s;
155 for (unsigned i=0; i<32; ++i) {
156 if (flags & (1u<<i)) {
157 if (s.length())
158 s += "+";
159 s += ceph_osd_op_flag_name(1u << i);
160 }
161 }
162 if (s.length())
163 return s;
164 return string("-");
165}
166
167string ceph_osd_alloc_hint_flag_string(unsigned flags)
168{
169 string s;
170 for (unsigned i=0; i<32; ++i) {
171 if (flags & (1u<<i)) {
172 if (s.length())
173 s += "+";
174 s += ceph_osd_alloc_hint_flag_name(1u << i);
175 }
176 }
177 if (s.length())
178 return s;
179 return string("-");
180}
181
9f95a23c 182void pg_shard_t::encode(ceph::buffer::list &bl) const
7c673cae
FG
183{
184 ENCODE_START(1, 1, bl);
11fdf7f2
TL
185 encode(osd, bl);
186 encode(shard, bl);
7c673cae
FG
187 ENCODE_FINISH(bl);
188}
9f95a23c 189void pg_shard_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
190{
191 DECODE_START(1, bl);
11fdf7f2
TL
192 decode(osd, bl);
193 decode(shard, bl);
7c673cae
FG
194 DECODE_FINISH(bl);
195}
196
197ostream &operator<<(ostream &lhs, const pg_shard_t &rhs)
198{
199 if (rhs.is_undefined())
200 return lhs << "?";
201 if (rhs.shard == shard_id_t::NO_SHARD)
b32b8144
FG
202 return lhs << rhs.get_osd();
203 return lhs << rhs.get_osd() << '(' << (unsigned)(rhs.shard) << ')';
7c673cae
FG
204}
205
11fdf7f2
TL
206void dump(Formatter* f, const osd_alerts_t& alerts)
207{
208 for (auto& a : alerts) {
209 string s0 = " osd: ";
210 s0 += stringify(a.first);
211 string s;
212 for (auto& aa : a.second) {
213 s = s0;
214 s += " ";
215 s += aa.first;
216 s += ":";
217 s += aa.second;
218 f->dump_string("alert", s);
219 }
220 }
221}
222
7c673cae
FG
223// -- osd_reqid_t --
224void osd_reqid_t::dump(Formatter *f) const
225{
226 f->dump_stream("name") << name;
227 f->dump_int("inc", inc);
228 f->dump_unsigned("tid", tid);
229}
230
231void osd_reqid_t::generate_test_instances(list<osd_reqid_t*>& o)
232{
233 o.push_back(new osd_reqid_t);
234 o.push_back(new osd_reqid_t(entity_name_t::CLIENT(123), 1, 45678));
235}
236
237// -- object_locator_t --
238
9f95a23c 239void object_locator_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
240{
241 // verify that nobody's corrupted the locator
11fdf7f2 242 ceph_assert(hash == -1 || key.empty());
7c673cae
FG
243 __u8 encode_compat = 3;
244 ENCODE_START(6, encode_compat, bl);
11fdf7f2 245 encode(pool, bl);
7c673cae 246 int32_t preferred = -1; // tell old code there is no preferred osd (-1).
11fdf7f2
TL
247 encode(preferred, bl);
248 encode(key, bl);
249 encode(nspace, bl);
250 encode(hash, bl);
7c673cae 251 if (hash != -1)
11fdf7f2 252 encode_compat = std::max<std::uint8_t>(encode_compat, 6); // need to interpret the hash
7c673cae
FG
253 ENCODE_FINISH_NEW_COMPAT(bl, encode_compat);
254}
255
9f95a23c 256void object_locator_t::decode(ceph::buffer::list::const_iterator& p)
7c673cae
FG
257{
258 DECODE_START_LEGACY_COMPAT_LEN(6, 3, 3, p);
259 if (struct_v < 2) {
260 int32_t op;
11fdf7f2 261 decode(op, p);
7c673cae
FG
262 pool = op;
263 int16_t pref;
11fdf7f2 264 decode(pref, p);
7c673cae 265 } else {
11fdf7f2 266 decode(pool, p);
7c673cae 267 int32_t preferred;
11fdf7f2 268 decode(preferred, p);
7c673cae 269 }
11fdf7f2 270 decode(key, p);
7c673cae 271 if (struct_v >= 5)
11fdf7f2 272 decode(nspace, p);
7c673cae 273 if (struct_v >= 6)
11fdf7f2 274 decode(hash, p);
7c673cae
FG
275 else
276 hash = -1;
277 DECODE_FINISH(p);
278 // verify that nobody's corrupted the locator
11fdf7f2 279 ceph_assert(hash == -1 || key.empty());
7c673cae
FG
280}
281
282void object_locator_t::dump(Formatter *f) const
283{
284 f->dump_int("pool", pool);
285 f->dump_string("key", key);
286 f->dump_string("namespace", nspace);
287 f->dump_int("hash", hash);
288}
289
290void object_locator_t::generate_test_instances(list<object_locator_t*>& o)
291{
292 o.push_back(new object_locator_t);
293 o.push_back(new object_locator_t(123));
294 o.push_back(new object_locator_t(123, 876));
295 o.push_back(new object_locator_t(1, "n2"));
296 o.push_back(new object_locator_t(1234, "", "key"));
297 o.push_back(new object_locator_t(12, "n1", "key2"));
298}
299
300// -- request_redirect_t --
9f95a23c 301void request_redirect_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
302{
303 ENCODE_START(1, 1, bl);
11fdf7f2
TL
304 encode(redirect_locator, bl);
305 encode(redirect_object, bl);
306 // legacy of the removed osd_instructions member
307 encode((uint32_t)0, bl);
7c673cae
FG
308 ENCODE_FINISH(bl);
309}
310
9f95a23c 311void request_redirect_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
312{
313 DECODE_START(1, bl);
11fdf7f2
TL
314 uint32_t legacy_osd_instructions_len;
315 decode(redirect_locator, bl);
316 decode(redirect_object, bl);
317 decode(legacy_osd_instructions_len, bl);
318 if (legacy_osd_instructions_len) {
9f95a23c 319 bl += legacy_osd_instructions_len;
11fdf7f2 320 }
7c673cae
FG
321 DECODE_FINISH(bl);
322}
323
324void request_redirect_t::dump(Formatter *f) const
325{
326 f->dump_string("object", redirect_object);
327 f->open_object_section("locator");
328 redirect_locator.dump(f);
329 f->close_section(); // locator
330}
331
332void request_redirect_t::generate_test_instances(list<request_redirect_t*>& o)
333{
334 object_locator_t loc(1, "redir_obj");
335 o.push_back(new request_redirect_t());
336 o.push_back(new request_redirect_t(loc, 0));
337 o.push_back(new request_redirect_t(loc, "redir_obj"));
338 o.push_back(new request_redirect_t(loc));
339}
340
341void objectstore_perf_stat_t::dump(Formatter *f) const
342{
11fdf7f2
TL
343 // *_ms values just for compatibility.
344 f->dump_float("commit_latency_ms", os_commit_latency_ns / 1000000.0);
345 f->dump_float("apply_latency_ms", os_apply_latency_ns / 1000000.0);
346 f->dump_unsigned("commit_latency_ns", os_commit_latency_ns);
347 f->dump_unsigned("apply_latency_ns", os_apply_latency_ns);
7c673cae
FG
348}
349
9f95a23c 350void objectstore_perf_stat_t::encode(ceph::buffer::list &bl, uint64_t features) const
7c673cae 351{
11fdf7f2
TL
352 uint8_t target_v = 2;
353 if (!HAVE_FEATURE(features, OS_PERF_STAT_NS)) {
354 target_v = 1;
355 }
356 ENCODE_START(target_v, target_v, bl);
357 if (target_v >= 2) {
358 encode(os_commit_latency_ns, bl);
359 encode(os_apply_latency_ns, bl);
360 } else {
361 constexpr auto NS_PER_MS = std::chrono::nanoseconds(1ms).count();
362 uint32_t commit_latency_ms = os_commit_latency_ns / NS_PER_MS;
363 uint32_t apply_latency_ms = os_apply_latency_ns / NS_PER_MS;
364 encode(commit_latency_ms, bl); // for compatibility with older monitor.
365 encode(apply_latency_ms, bl); // for compatibility with older monitor.
366 }
7c673cae
FG
367 ENCODE_FINISH(bl);
368}
369
9f95a23c 370void objectstore_perf_stat_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 371{
11fdf7f2
TL
372 DECODE_START(2, bl);
373 if (struct_v >= 2) {
374 decode(os_commit_latency_ns, bl);
375 decode(os_apply_latency_ns, bl);
376 } else {
377 uint32_t commit_latency_ms;
378 uint32_t apply_latency_ms;
379 decode(commit_latency_ms, bl);
380 decode(apply_latency_ms, bl);
381 constexpr auto NS_PER_MS = std::chrono::nanoseconds(1ms).count();
382 os_commit_latency_ns = commit_latency_ms * NS_PER_MS;
383 os_apply_latency_ns = apply_latency_ms * NS_PER_MS;
384 }
7c673cae
FG
385 DECODE_FINISH(bl);
386}
387
388void objectstore_perf_stat_t::generate_test_instances(std::list<objectstore_perf_stat_t*>& o)
389{
390 o.push_back(new objectstore_perf_stat_t());
391 o.push_back(new objectstore_perf_stat_t());
11fdf7f2
TL
392 o.back()->os_commit_latency_ns = 20000000;
393 o.back()->os_apply_latency_ns = 30000000;
7c673cae
FG
394}
395
396// -- osd_stat_t --
ded94939 397void osd_stat_t::dump(Formatter *f, bool with_net) const
7c673cae 398{
31f18b77
FG
399 f->dump_unsigned("up_from", up_from);
400 f->dump_unsigned("seq", seq);
35e4c445 401 f->dump_unsigned("num_pgs", num_pgs);
81eedcae
TL
402 f->dump_unsigned("num_osds", num_osds);
403 f->dump_unsigned("num_per_pool_osds", num_per_pool_osds);
9f95a23c 404 f->dump_unsigned("num_per_pool_omap_osds", num_per_pool_omap_osds);
11fdf7f2
TL
405
406 /// dump legacy stats fields to ensure backward compatibility.
407 f->dump_unsigned("kb", statfs.kb());
408 f->dump_unsigned("kb_used", statfs.kb_used_raw());
409 f->dump_unsigned("kb_used_data", statfs.kb_used_data());
410 f->dump_unsigned("kb_used_omap", statfs.kb_used_omap());
411 f->dump_unsigned("kb_used_meta", statfs.kb_used_internal_metadata());
412 f->dump_unsigned("kb_avail", statfs.kb_avail());
413 ////////////////////
414
415 f->open_object_section("statfs");
416 statfs.dump(f);
417 f->close_section();
7c673cae
FG
418 f->open_array_section("hb_peers");
419 for (auto p : hb_peers)
420 f->dump_int("osd", p);
421 f->close_section();
422 f->dump_int("snap_trim_queue_len", snap_trim_queue_len);
423 f->dump_int("num_snap_trimming", num_snap_trimming);
11fdf7f2 424 f->dump_int("num_shards_repaired", num_shards_repaired);
7c673cae
FG
425 f->open_object_section("op_queue_age_hist");
426 op_queue_age_hist.dump(f);
427 f->close_section();
428 f->open_object_section("perf_stat");
429 os_perf_stat.dump(f);
430 f->close_section();
11fdf7f2
TL
431 f->open_array_section("alerts");
432 ::dump(f, os_alerts);
433 f->close_section();
ded94939 434 if (with_net) {
9f95a23c
TL
435 dump_ping_time(f);
436 }
437}
438
439void osd_stat_t::dump_ping_time(Formatter *f) const
440{
eafe8130
TL
441 f->open_array_section("network_ping_times");
442 for (auto &i : hb_pingtime) {
443 f->open_object_section("entry");
444 f->dump_int("osd", i.first);
445 const time_t lu(i.second.last_update);
446 char buffer[26];
447 string lustr(ctime_r(&lu, buffer));
448 lustr.pop_back(); // Remove trailing \n
449 f->dump_string("last update", lustr);
450 f->open_array_section("interfaces");
451 f->open_object_section("interface");
452 f->dump_string("interface", "back");
453 f->open_object_section("average");
9f95a23c
TL
454 f->dump_float("1min", i.second.back_pingtime[0]/1000.0);
455 f->dump_float("5min", i.second.back_pingtime[1]/1000.0);
456 f->dump_float("15min", i.second.back_pingtime[2]/1000.0);
eafe8130
TL
457 f->close_section(); // average
458 f->open_object_section("min");
9f95a23c
TL
459 f->dump_float("1min", i.second.back_min[0]/1000.0);
460 f->dump_float("5min", i.second.back_min[1]/1000.0);
461 f->dump_float("15min", i.second.back_min[2]/1000.0);
eafe8130
TL
462 f->close_section(); // min
463 f->open_object_section("max");
9f95a23c
TL
464 f->dump_float("1min", i.second.back_max[0]/1000.0);
465 f->dump_float("5min", i.second.back_max[1]/1000.0);
466 f->dump_float("15min", i.second.back_max[2]/1000.0);
eafe8130 467 f->close_section(); // max
9f95a23c 468 f->dump_float("last", i.second.back_last/1000.0);
eafe8130
TL
469 f->close_section(); // interface
470
471 if (i.second.front_pingtime[0] != 0) {
472 f->open_object_section("interface");
473 f->dump_string("interface", "front");
474 f->open_object_section("average");
9f95a23c
TL
475 f->dump_float("1min", i.second.front_pingtime[0]/1000.0);
476 f->dump_float("5min", i.second.front_pingtime[1]/1000.0);
477 f->dump_float("15min", i.second.front_pingtime[2]/1000.0);
eafe8130
TL
478 f->close_section(); // average
479 f->open_object_section("min");
9f95a23c
TL
480 f->dump_float("1min", i.second.front_min[0]/1000.0);
481 f->dump_float("5min", i.second.front_min[1]/1000.0);
482 f->dump_float("15min", i.second.front_min[2]/1000.0);
eafe8130
TL
483 f->close_section(); // min
484 f->open_object_section("max");
9f95a23c
TL
485 f->dump_float("1min", i.second.front_max[0]/1000.0);
486 f->dump_float("5min", i.second.front_max[1]/1000.0);
487 f->dump_float("15min", i.second.front_max[2]/1000.0);
eafe8130 488 f->close_section(); // max
9f95a23c 489 f->dump_float("last", i.second.front_last/1000.0);
eafe8130
TL
490 f->close_section(); // interface
491 }
492 f->close_section(); // interfaces
493 f->close_section(); // entry
494 }
495 f->close_section(); // network_ping_time
7c673cae
FG
496}
497
9f95a23c 498void osd_stat_t::encode(ceph::buffer::list &bl, uint64_t features) const
11fdf7f2 499{
eafe8130 500 ENCODE_START(14, 2, bl);
11fdf7f2
TL
501
502 //////// for compatibility ////////
503 int64_t kb = statfs.kb();
504 int64_t kb_used = statfs.kb_used_raw();
505 int64_t kb_avail = statfs.kb_avail();
506 encode(kb, bl);
507 encode(kb_used, bl);
508 encode(kb_avail, bl);
509 ///////////////////////////////////
510
511 encode(snap_trim_queue_len, bl);
512 encode(num_snap_trimming, bl);
513 encode(hb_peers, bl);
514 encode((uint32_t)0, bl);
515 encode(op_queue_age_hist, bl);
516 encode(os_perf_stat, bl, features);
517 encode(up_from, bl);
518 encode(seq, bl);
519 encode(num_pgs, bl);
520
521 //////// for compatibility ////////
522 int64_t kb_used_data = statfs.kb_used_data();
523 int64_t kb_used_omap = statfs.kb_used_omap();
524 int64_t kb_used_meta = statfs.kb_used_internal_metadata();
525 encode(kb_used_data, bl);
526 encode(kb_used_omap, bl);
527 encode(kb_used_meta, bl);
528 encode(statfs, bl);
529 ///////////////////////////////////
530 encode(os_alerts, bl);
531 encode(num_shards_repaired, bl);
81eedcae
TL
532 encode(num_osds, bl);
533 encode(num_per_pool_osds, bl);
9f95a23c 534 encode(num_per_pool_omap_osds, bl);
eafe8130
TL
535
536 // hb_pingtime map
537 encode((int)hb_pingtime.size(), bl);
538 for (auto i : hb_pingtime) {
539 encode(i.first, bl); // osd
540 encode(i.second.last_update, bl);
541 encode(i.second.back_pingtime[0], bl);
542 encode(i.second.back_pingtime[1], bl);
543 encode(i.second.back_pingtime[2], bl);
544 encode(i.second.back_min[0], bl);
545 encode(i.second.back_min[1], bl);
546 encode(i.second.back_min[2], bl);
547 encode(i.second.back_max[0], bl);
548 encode(i.second.back_max[1], bl);
549 encode(i.second.back_max[2], bl);
550 encode(i.second.back_last, bl);
551 encode(i.second.front_pingtime[0], bl);
552 encode(i.second.front_pingtime[1], bl);
553 encode(i.second.front_pingtime[2], bl);
554 encode(i.second.front_min[0], bl);
555 encode(i.second.front_min[1], bl);
556 encode(i.second.front_min[2], bl);
557 encode(i.second.front_max[0], bl);
558 encode(i.second.front_max[1], bl);
559 encode(i.second.front_max[2], bl);
560 encode(i.second.front_last, bl);
561 }
7c673cae
FG
562 ENCODE_FINISH(bl);
563}
564
9f95a23c 565void osd_stat_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 566{
11fdf7f2
TL
567 int64_t kb, kb_used,kb_avail;
568 int64_t kb_used_data, kb_used_omap, kb_used_meta;
eafe8130 569 DECODE_START_LEGACY_COMPAT_LEN(14, 2, 2, bl);
11fdf7f2
TL
570 decode(kb, bl);
571 decode(kb_used, bl);
572 decode(kb_avail, bl);
573 decode(snap_trim_queue_len, bl);
574 decode(num_snap_trimming, bl);
575 decode(hb_peers, bl);
7c673cae 576 vector<int> num_hb_out;
11fdf7f2 577 decode(num_hb_out, bl);
7c673cae 578 if (struct_v >= 3)
11fdf7f2 579 decode(op_queue_age_hist, bl);
7c673cae 580 if (struct_v >= 4)
11fdf7f2 581 decode(os_perf_stat, bl);
31f18b77 582 if (struct_v >= 6) {
11fdf7f2
TL
583 decode(up_from, bl);
584 decode(seq, bl);
31f18b77 585 }
35e4c445 586 if (struct_v >= 7) {
11fdf7f2
TL
587 decode(num_pgs, bl);
588 }
589 if (struct_v >= 8) {
590 decode(kb_used_data, bl);
591 decode(kb_used_omap, bl);
592 decode(kb_used_meta, bl);
593 } else {
594 kb_used_data = kb_used;
595 kb_used_omap = 0;
596 kb_used_meta = 0;
597 }
598 if (struct_v >= 9) {
599 decode(statfs, bl);
600 } else {
601 statfs.reset();
602 statfs.total = kb << 10;
603 statfs.available = kb_avail << 10;
604 // actually it's totally unexpected to have ststfs.total < statfs.available
605 // here but unfortunately legacy generate_test_instances produced such a
606 // case hence inserting some handling rather than assert
607 statfs.internally_reserved =
608 statfs.total > statfs.available ? statfs.total - statfs.available : 0;
609 kb_used <<= 10;
610 if ((int64_t)statfs.internally_reserved > kb_used) {
611 statfs.internally_reserved -= kb_used;
612 } else {
613 statfs.internally_reserved = 0;
614 }
615 statfs.allocated = kb_used_data << 10;
616 statfs.omap_allocated = kb_used_omap << 10;
617 statfs.internal_metadata = kb_used_meta << 10;
618 }
619 if (struct_v >= 10) {
620 decode(os_alerts, bl);
621 } else {
622 os_alerts.clear();
623 }
624 if (struct_v >= 11) {
625 decode(num_shards_repaired, bl);
626 } else {
627 num_shards_repaired = 0;
35e4c445 628 }
81eedcae
TL
629 if (struct_v >= 12) {
630 decode(num_osds, bl);
631 decode(num_per_pool_osds, bl);
632 } else {
633 num_osds = 0;
634 num_per_pool_osds = 0;
635 }
eafe8130 636 if (struct_v >= 13) {
9f95a23c
TL
637 decode(num_per_pool_omap_osds, bl);
638 } else {
639 num_per_pool_omap_osds = 0;
eafe8130
TL
640 }
641 hb_pingtime.clear();
642 if (struct_v >= 14) {
643 int count;
644 decode(count, bl);
645 for (int i = 0 ; i < count ; i++) {
646 int osd;
647 decode(osd, bl);
648 struct Interfaces ifs;
649 decode(ifs.last_update, bl);
650 decode(ifs.back_pingtime[0],bl);
651 decode(ifs.back_pingtime[1], bl);
652 decode(ifs.back_pingtime[2], bl);
653 decode(ifs.back_min[0],bl);
654 decode(ifs.back_min[1], bl);
655 decode(ifs.back_min[2], bl);
656 decode(ifs.back_max[0],bl);
657 decode(ifs.back_max[1], bl);
658 decode(ifs.back_max[2], bl);
659 decode(ifs.back_last, bl);
660 decode(ifs.front_pingtime[0], bl);
661 decode(ifs.front_pingtime[1], bl);
662 decode(ifs.front_pingtime[2], bl);
663 decode(ifs.front_min[0], bl);
664 decode(ifs.front_min[1], bl);
665 decode(ifs.front_min[2], bl);
666 decode(ifs.front_max[0], bl);
667 decode(ifs.front_max[1], bl);
668 decode(ifs.front_max[2], bl);
669 decode(ifs.front_last, bl);
670 hb_pingtime[osd] = ifs;
671 }
672 }
7c673cae
FG
673 DECODE_FINISH(bl);
674}
675
676void osd_stat_t::generate_test_instances(std::list<osd_stat_t*>& o)
677{
678 o.push_back(new osd_stat_t);
679
680 o.push_back(new osd_stat_t);
11fdf7f2
TL
681 list<store_statfs_t*> ll;
682 store_statfs_t::generate_test_instances(ll);
683 o.back()->statfs = *ll.back();
7c673cae
FG
684 o.back()->hb_peers.push_back(7);
685 o.back()->snap_trim_queue_len = 8;
686 o.back()->num_snap_trimming = 99;
11fdf7f2
TL
687 o.back()->num_shards_repaired = 101;
688 o.back()->os_alerts[0].emplace(
689 "some alert", "some alert details");
690 o.back()->os_alerts[1].emplace(
691 "some alert2", "some alert2 details");
eafe8130
TL
692 struct Interfaces gen_interfaces = {
693 123456789, { 1000, 900, 800 }, { 990, 890, 790 }, { 1010, 910, 810 }, 1001,
694 { 1100, 1000, 900 }, { 1090, 990, 890 }, { 1110, 1010, 910 }, 1101 };
695 o.back()->hb_pingtime[20] = gen_interfaces;
696 gen_interfaces = {
697 987654321, { 100, 200, 300 }, { 90, 190, 290 }, { 110, 210, 310 }, 101 };
698 o.back()->hb_pingtime[30] = gen_interfaces;
7c673cae
FG
699}
700
701// -- pg_t --
702
703int pg_t::print(char *o, int maxlen) const
704{
11fdf7f2 705 return snprintf(o, maxlen, "%llu.%x", (unsigned long long)pool(), ps());
7c673cae
FG
706}
707
708bool pg_t::parse(const char *s)
709{
710 uint64_t ppool;
711 uint32_t pseed;
11fdf7f2 712 int r = sscanf(s, "%llu.%x", (long long unsigned *)&ppool, &pseed);
7c673cae
FG
713 if (r < 2)
714 return false;
715 m_pool = ppool;
716 m_seed = pseed;
7c673cae
FG
717 return true;
718}
719
720bool spg_t::parse(const char *s)
721{
7c673cae
FG
722 shard = shard_id_t::NO_SHARD;
723 uint64_t ppool;
724 uint32_t pseed;
7c673cae
FG
725 uint32_t pshard;
726 int r = sscanf(s, "%llu.%x", (long long unsigned *)&ppool, &pseed);
727 if (r < 2)
728 return false;
729 pgid.set_pool(ppool);
730 pgid.set_ps(pseed);
731
11fdf7f2 732 const char *p = strchr(s, 's');
7c673cae 733 if (p) {
11fdf7f2 734 r = sscanf(p, "s%u", &pshard);
7c673cae
FG
735 if (r == 1) {
736 shard = shard_id_t(pshard);
737 } else {
738 return false;
739 }
740 }
741 return true;
742}
743
744char *spg_t::calc_name(char *buf, const char *suffix_backwords) const
745{
746 while (*suffix_backwords)
747 *--buf = *suffix_backwords++;
748
749 if (!is_no_shard()) {
750 buf = ritoa<uint8_t, 10>((uint8_t)shard.id, buf);
751 *--buf = 's';
752 }
753
754 return pgid.calc_name(buf, "");
755}
756
757ostream& operator<<(ostream& out, const spg_t &pg)
758{
759 char buf[spg_t::calc_name_buf_size];
760 buf[spg_t::calc_name_buf_size - 1] = '\0';
761 out << pg.calc_name(buf + spg_t::calc_name_buf_size - 1, "");
762 return out;
763}
764
765pg_t pg_t::get_ancestor(unsigned old_pg_num) const
766{
767 int old_bits = cbits(old_pg_num);
768 int old_mask = (1 << old_bits) - 1;
769 pg_t ret = *this;
770 ret.m_seed = ceph_stable_mod(m_seed, old_pg_num, old_mask);
771 return ret;
772}
773
774bool pg_t::is_split(unsigned old_pg_num, unsigned new_pg_num, set<pg_t> *children) const
775{
11fdf7f2
TL
776 //ceph_assert(m_seed < old_pg_num);
777 if (m_seed >= old_pg_num) {
778 // degenerate case
779 return false;
780 }
7c673cae
FG
781 if (new_pg_num <= old_pg_num)
782 return false;
783
784 bool split = false;
785 if (true) {
786 unsigned old_bits = cbits(old_pg_num);
787 unsigned old_mask = (1 << old_bits) - 1;
788 for (unsigned n = 1; ; n++) {
789 unsigned next_bit = (n << (old_bits-1));
790 unsigned s = next_bit | m_seed;
791
792 if (s < old_pg_num || s == m_seed)
793 continue;
794 if (s >= new_pg_num)
795 break;
796 if ((unsigned)ceph_stable_mod(s, old_pg_num, old_mask) == m_seed) {
797 split = true;
798 if (children)
11fdf7f2 799 children->insert(pg_t(s, m_pool));
7c673cae
FG
800 }
801 }
802 }
803 if (false) {
804 // brute force
805 int old_bits = cbits(old_pg_num);
806 int old_mask = (1 << old_bits) - 1;
807 for (unsigned x = old_pg_num; x < new_pg_num; ++x) {
808 unsigned o = ceph_stable_mod(x, old_pg_num, old_mask);
809 if (o == m_seed) {
810 split = true;
11fdf7f2 811 children->insert(pg_t(x, m_pool));
7c673cae
FG
812 }
813 }
814 }
815 return split;
816}
817
818unsigned pg_t::get_split_bits(unsigned pg_num) const {
819 if (pg_num == 1)
820 return 0;
11fdf7f2 821 ceph_assert(pg_num > 1);
7c673cae
FG
822
823 // Find unique p such that pg_num \in [2^(p-1), 2^p)
824 unsigned p = cbits(pg_num);
11fdf7f2 825 ceph_assert(p); // silence coverity #751330
7c673cae
FG
826
827 if ((m_seed % (1<<(p-1))) < (pg_num % (1<<(p-1))))
828 return p;
829 else
830 return p - 1;
831}
832
11fdf7f2
TL
833bool pg_t::is_merge_source(
834 unsigned old_pg_num,
835 unsigned new_pg_num,
836 pg_t *parent) const
837{
838 if (m_seed < old_pg_num &&
839 m_seed >= new_pg_num) {
840 if (parent) {
841 pg_t t = *this;
842 while (t.m_seed >= new_pg_num) {
843 t = t.get_parent();
844 }
845 *parent = t;
846 }
847 return true;
848 }
849 return false;
850}
851
7c673cae
FG
852pg_t pg_t::get_parent() const
853{
854 unsigned bits = cbits(m_seed);
11fdf7f2 855 ceph_assert(bits);
7c673cae
FG
856 pg_t retval = *this;
857 retval.m_seed &= ~((~0)<<(bits - 1));
858 return retval;
859}
860
861hobject_t pg_t::get_hobj_start() const
862{
11fdf7f2 863 return hobject_t(object_t(), string(), 0, m_seed, m_pool,
7c673cae
FG
864 string());
865}
866
867hobject_t pg_t::get_hobj_end(unsigned pg_num) const
868{
869 // note: this assumes a bitwise sort; with the legacy nibblewise
870 // sort a PG did not always cover a single contiguous range of the
871 // (bit-reversed) hash range.
872 unsigned bits = get_split_bits(pg_num);
873 uint64_t rev_start = hobject_t::_reverse_bits(m_seed);
874 uint64_t rev_end = (rev_start | (0xffffffff >> bits)) + 1;
875 if (rev_end >= 0x100000000) {
11fdf7f2 876 ceph_assert(rev_end == 0x100000000);
7c673cae
FG
877 return hobject_t::get_max();
878 } else {
879 return hobject_t(object_t(), string(), CEPH_NOSNAP,
880 hobject_t::_reverse_bits(rev_end), m_pool,
881 string());
882 }
883}
884
885void pg_t::dump(Formatter *f) const
886{
887 f->dump_unsigned("pool", m_pool);
888 f->dump_unsigned("seed", m_seed);
7c673cae
FG
889}
890
891void pg_t::generate_test_instances(list<pg_t*>& o)
892{
893 o.push_back(new pg_t);
11fdf7f2
TL
894 o.push_back(new pg_t(1, 2));
895 o.push_back(new pg_t(13123, 3));
896 o.push_back(new pg_t(131223, 4));
7c673cae
FG
897}
898
899char *pg_t::calc_name(char *buf, const char *suffix_backwords) const
900{
901 while (*suffix_backwords)
902 *--buf = *suffix_backwords++;
903
7c673cae
FG
904 buf = ritoa<uint32_t, 16>(m_seed, buf);
905
906 *--buf = '.';
907
908 return ritoa<uint64_t, 10>(m_pool, buf);
909}
910
911ostream& operator<<(ostream& out, const pg_t &pg)
912{
913 char buf[pg_t::calc_name_buf_size];
914 buf[pg_t::calc_name_buf_size - 1] = '\0';
915 out << pg.calc_name(buf + pg_t::calc_name_buf_size - 1, "");
916 return out;
917}
918
919
920// -- coll_t --
921
922void coll_t::calc_str()
923{
924 switch (type) {
925 case TYPE_META:
926 strcpy(_str_buff, "meta");
927 _str = _str_buff;
928 break;
929 case TYPE_PG:
930 _str_buff[spg_t::calc_name_buf_size - 1] = '\0';
931 _str = pgid.calc_name(_str_buff + spg_t::calc_name_buf_size - 1, "daeh_");
932 break;
933 case TYPE_PG_TEMP:
934 _str_buff[spg_t::calc_name_buf_size - 1] = '\0';
935 _str = pgid.calc_name(_str_buff + spg_t::calc_name_buf_size - 1, "PMET_");
936 break;
937 default:
11fdf7f2 938 ceph_abort_msg("unknown collection type");
7c673cae
FG
939 }
940}
941
942bool coll_t::parse(const std::string& s)
943{
944 if (s == "meta") {
945 type = TYPE_META;
946 pgid = spg_t();
947 removal_seq = 0;
948 calc_str();
11fdf7f2 949 ceph_assert(s == _str);
7c673cae
FG
950 return true;
951 }
952 if (s.find("_head") == s.length() - 5 &&
953 pgid.parse(s.substr(0, s.length() - 5))) {
954 type = TYPE_PG;
955 removal_seq = 0;
956 calc_str();
11fdf7f2 957 ceph_assert(s == _str);
7c673cae
FG
958 return true;
959 }
960 if (s.find("_TEMP") == s.length() - 5 &&
961 pgid.parse(s.substr(0, s.length() - 5))) {
962 type = TYPE_PG_TEMP;
963 removal_seq = 0;
964 calc_str();
11fdf7f2 965 ceph_assert(s == _str);
7c673cae
FG
966 return true;
967 }
968 return false;
969}
970
9f95a23c 971void coll_t::encode(ceph::buffer::list& bl) const
7c673cae 972{
11fdf7f2 973 using ceph::encode;
7c673cae
FG
974 // when changing this, remember to update encoded_size() too.
975 if (is_temp()) {
976 // can't express this as v2...
977 __u8 struct_v = 3;
11fdf7f2
TL
978 encode(struct_v, bl);
979 encode(to_str(), bl);
7c673cae
FG
980 } else {
981 __u8 struct_v = 2;
11fdf7f2
TL
982 encode(struct_v, bl);
983 encode((__u8)type, bl);
984 encode(pgid, bl);
7c673cae 985 snapid_t snap = CEPH_NOSNAP;
11fdf7f2 986 encode(snap, bl);
7c673cae
FG
987 }
988}
989
990size_t coll_t::encoded_size() const
991{
992 size_t r = sizeof(__u8);
993 if (is_temp()) {
994 // v3
995 r += sizeof(__u32);
996 if (_str) {
997 r += strlen(_str);
998 }
999 } else {
1000 // v2
1001 // 1. type
1002 r += sizeof(__u8);
1003 // 2. pgid
1004 // - encoding header
1005 r += sizeof(ceph_le32) + 2 * sizeof(__u8);
1006 // - pg_t
1007 r += sizeof(__u8) + sizeof(uint64_t) + 2 * sizeof(uint32_t);
1008 // - shard_id_t
1009 r += sizeof(int8_t);
1010 // 3. snapid_t
1011 r += sizeof(uint64_t);
1012 }
1013
1014 return r;
1015}
1016
9f95a23c 1017void coll_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae 1018{
11fdf7f2 1019 using ceph::decode;
7c673cae 1020 __u8 struct_v;
11fdf7f2 1021 decode(struct_v, bl);
7c673cae
FG
1022 switch (struct_v) {
1023 case 1:
1024 {
1025 snapid_t snap;
11fdf7f2
TL
1026 decode(pgid, bl);
1027 decode(snap, bl);
7c673cae
FG
1028
1029 // infer the type
1030 if (pgid == spg_t() && snap == 0) {
1031 type = TYPE_META;
1032 } else {
1033 type = TYPE_PG;
1034 }
1035 removal_seq = 0;
1036 }
1037 break;
1038
1039 case 2:
1040 {
1041 __u8 _type;
1042 snapid_t snap;
11fdf7f2
TL
1043 decode(_type, bl);
1044 decode(pgid, bl);
1045 decode(snap, bl);
7c673cae
FG
1046 type = (type_t)_type;
1047 removal_seq = 0;
1048 }
1049 break;
1050
1051 case 3:
1052 {
1053 string str;
11fdf7f2 1054 decode(str, bl);
7c673cae
FG
1055 bool ok = parse(str);
1056 if (!ok)
1057 throw std::domain_error(std::string("unable to parse pg ") + str);
1058 }
1059 break;
1060
1061 default:
1062 {
f67539c2
TL
1063 CachedStackStringStream css;
1064 *css << "coll_t::decode(): don't know how to decode version "
1065 << struct_v;
1066 throw std::domain_error(css->str());
7c673cae
FG
1067 }
1068 }
1069}
1070
1071void coll_t::dump(Formatter *f) const
1072{
1073 f->dump_unsigned("type_id", (unsigned)type);
1074 if (type != TYPE_META)
1075 f->dump_stream("pgid") << pgid;
1076 f->dump_string("name", to_str());
1077}
1078
1079void coll_t::generate_test_instances(list<coll_t*>& o)
1080{
1081 o.push_back(new coll_t());
1082 o.push_back(new coll_t(spg_t(pg_t(1, 0), shard_id_t::NO_SHARD)));
1083 o.push_back(new coll_t(o.back()->get_temp()));
1084 o.push_back(new coll_t(spg_t(pg_t(3, 2), shard_id_t(12))));
1085 o.push_back(new coll_t(o.back()->get_temp()));
1086 o.push_back(new coll_t());
1087}
1088
1089// ---
1090
1091std::string pg_vector_string(const vector<int32_t> &a)
1092{
f67539c2
TL
1093 CachedStackStringStream css;
1094 *css << "[";
9f95a23c
TL
1095 for (auto i = a.cbegin(); i != a.cend(); ++i) {
1096 if (i != a.begin())
f67539c2 1097 *css << ",";
9f95a23c 1098 if (*i != CRUSH_ITEM_NONE)
f67539c2 1099 *css << *i;
9f95a23c 1100 else
f67539c2 1101 *css << "NONE";
7c673cae 1102 }
f67539c2
TL
1103 *css << "]";
1104 return css->str();
7c673cae
FG
1105}
1106
11fdf7f2 1107std::string pg_state_string(uint64_t state)
7c673cae 1108{
f67539c2 1109 CachedStackStringStream css;
7c673cae 1110 if (state & PG_STATE_STALE)
f67539c2 1111 *css << "stale+";
7c673cae 1112 if (state & PG_STATE_CREATING)
f67539c2 1113 *css << "creating+";
7c673cae 1114 if (state & PG_STATE_ACTIVE)
f67539c2 1115 *css << "active+";
7c673cae 1116 if (state & PG_STATE_ACTIVATING)
f67539c2 1117 *css << "activating+";
7c673cae 1118 if (state & PG_STATE_CLEAN)
f67539c2 1119 *css << "clean+";
7c673cae 1120 if (state & PG_STATE_RECOVERY_WAIT)
f67539c2 1121 *css << "recovery_wait+";
7c673cae 1122 if (state & PG_STATE_RECOVERY_TOOFULL)
f67539c2 1123 *css << "recovery_toofull+";
7c673cae 1124 if (state & PG_STATE_RECOVERING)
f67539c2 1125 *css << "recovering+";
c07f9fc5 1126 if (state & PG_STATE_FORCED_RECOVERY)
f67539c2 1127 *css << "forced_recovery+";
7c673cae 1128 if (state & PG_STATE_DOWN)
f67539c2 1129 *css << "down+";
b32b8144 1130 if (state & PG_STATE_RECOVERY_UNFOUND)
f67539c2 1131 *css << "recovery_unfound+";
b32b8144 1132 if (state & PG_STATE_BACKFILL_UNFOUND)
f67539c2 1133 *css << "backfill_unfound+";
7c673cae 1134 if (state & PG_STATE_UNDERSIZED)
f67539c2 1135 *css << "undersized+";
7c673cae 1136 if (state & PG_STATE_DEGRADED)
f67539c2 1137 *css << "degraded+";
7c673cae 1138 if (state & PG_STATE_REMAPPED)
f67539c2 1139 *css << "remapped+";
11fdf7f2 1140 if (state & PG_STATE_PREMERGE)
f67539c2 1141 *css << "premerge+";
7c673cae 1142 if (state & PG_STATE_SCRUBBING)
f67539c2 1143 *css << "scrubbing+";
7c673cae 1144 if (state & PG_STATE_DEEP_SCRUB)
f67539c2 1145 *css << "deep+";
7c673cae 1146 if (state & PG_STATE_INCONSISTENT)
f67539c2 1147 *css << "inconsistent+";
7c673cae 1148 if (state & PG_STATE_PEERING)
f67539c2 1149 *css << "peering+";
7c673cae 1150 if (state & PG_STATE_REPAIR)
f67539c2 1151 *css << "repair+";
3efd9988 1152 if (state & PG_STATE_BACKFILL_WAIT)
f67539c2 1153 *css << "backfill_wait+";
3efd9988 1154 if (state & PG_STATE_BACKFILLING)
f67539c2 1155 *css << "backfilling+";
c07f9fc5 1156 if (state & PG_STATE_FORCED_BACKFILL)
f67539c2 1157 *css << "forced_backfill+";
7c673cae 1158 if (state & PG_STATE_BACKFILL_TOOFULL)
f67539c2 1159 *css << "backfill_toofull+";
7c673cae 1160 if (state & PG_STATE_INCOMPLETE)
f67539c2 1161 *css << "incomplete+";
7c673cae 1162 if (state & PG_STATE_PEERED)
f67539c2 1163 *css << "peered+";
7c673cae 1164 if (state & PG_STATE_SNAPTRIM)
f67539c2 1165 *css << "snaptrim+";
7c673cae 1166 if (state & PG_STATE_SNAPTRIM_WAIT)
f67539c2 1167 *css << "snaptrim_wait+";
224ce89b 1168 if (state & PG_STATE_SNAPTRIM_ERROR)
f67539c2 1169 *css << "snaptrim_error+";
11fdf7f2 1170 if (state & PG_STATE_FAILED_REPAIR)
f67539c2 1171 *css << "failed_repair+";
9f95a23c 1172 if (state & PG_STATE_LAGGY)
f67539c2 1173 *css << "laggy+";
9f95a23c 1174 if (state & PG_STATE_WAIT)
f67539c2
TL
1175 *css << "wait+";
1176 auto ret = css->str();
7c673cae
FG
1177 if (ret.length() > 0)
1178 ret.resize(ret.length() - 1);
1179 else
31f18b77 1180 ret = "unknown";
7c673cae
FG
1181 return ret;
1182}
1183
9f95a23c 1184std::optional<uint64_t> pg_string_state(const std::string& state)
7c673cae 1185{
9f95a23c 1186 std::optional<uint64_t> type;
7c673cae
FG
1187 if (state == "active")
1188 type = PG_STATE_ACTIVE;
1189 else if (state == "clean")
1190 type = PG_STATE_CLEAN;
1191 else if (state == "down")
1192 type = PG_STATE_DOWN;
b32b8144
FG
1193 else if (state == "recovery_unfound")
1194 type = PG_STATE_RECOVERY_UNFOUND;
1195 else if (state == "backfill_unfound")
1196 type = PG_STATE_BACKFILL_UNFOUND;
11fdf7f2
TL
1197 else if (state == "premerge")
1198 type = PG_STATE_PREMERGE;
7c673cae
FG
1199 else if (state == "scrubbing")
1200 type = PG_STATE_SCRUBBING;
1201 else if (state == "degraded")
1202 type = PG_STATE_DEGRADED;
1203 else if (state == "inconsistent")
1204 type = PG_STATE_INCONSISTENT;
1205 else if (state == "peering")
1206 type = PG_STATE_PEERING;
1207 else if (state == "repair")
1208 type = PG_STATE_REPAIR;
1209 else if (state == "recovering")
1210 type = PG_STATE_RECOVERING;
c07f9fc5
FG
1211 else if (state == "forced_recovery")
1212 type = PG_STATE_FORCED_RECOVERY;
7c673cae
FG
1213 else if (state == "backfill_wait")
1214 type = PG_STATE_BACKFILL_WAIT;
1215 else if (state == "incomplete")
1216 type = PG_STATE_INCOMPLETE;
1217 else if (state == "stale")
1218 type = PG_STATE_STALE;
1219 else if (state == "remapped")
1220 type = PG_STATE_REMAPPED;
94b18763 1221 else if (state == "deep")
7c673cae 1222 type = PG_STATE_DEEP_SCRUB;
3efd9988
FG
1223 else if (state == "backfilling")
1224 type = PG_STATE_BACKFILLING;
c07f9fc5
FG
1225 else if (state == "forced_backfill")
1226 type = PG_STATE_FORCED_BACKFILL;
7c673cae
FG
1227 else if (state == "backfill_toofull")
1228 type = PG_STATE_BACKFILL_TOOFULL;
1229 else if (state == "recovery_wait")
1230 type = PG_STATE_RECOVERY_WAIT;
1231 else if (state == "recovery_toofull")
1232 type = PG_STATE_RECOVERY_TOOFULL;
1233 else if (state == "undersized")
1234 type = PG_STATE_UNDERSIZED;
1235 else if (state == "activating")
1236 type = PG_STATE_ACTIVATING;
1237 else if (state == "peered")
1238 type = PG_STATE_PEERED;
1239 else if (state == "snaptrim")
1240 type = PG_STATE_SNAPTRIM;
1241 else if (state == "snaptrim_wait")
1242 type = PG_STATE_SNAPTRIM_WAIT;
224ce89b
WB
1243 else if (state == "snaptrim_error")
1244 type = PG_STATE_SNAPTRIM_ERROR;
91327a77
AA
1245 else if (state == "creating")
1246 type = PG_STATE_CREATING;
11fdf7f2
TL
1247 else if (state == "failed_repair")
1248 type = PG_STATE_FAILED_REPAIR;
9f95a23c
TL
1249 else if (state == "laggy")
1250 type = PG_STATE_LAGGY;
1251 else if (state == "wait")
1252 type = PG_STATE_WAIT;
11fdf7f2
TL
1253 else if (state == "unknown")
1254 type = 0;
7c673cae 1255 else
9f95a23c 1256 type = std::nullopt;
7c673cae
FG
1257 return type;
1258}
1259
1260// -- eversion_t --
1261string eversion_t::get_key_name() const
1262{
11fdf7f2
TL
1263 std::string key(32, ' ');
1264 get_key_name(&key[0]);
1265 key.resize(31); // remove the null terminator
1266 return key;
7c673cae
FG
1267}
1268
7c673cae
FG
1269// -- pool_snap_info_t --
1270void pool_snap_info_t::dump(Formatter *f) const
1271{
1272 f->dump_unsigned("snapid", snapid);
1273 f->dump_stream("stamp") << stamp;
1274 f->dump_string("name", name);
1275}
1276
9f95a23c 1277void pool_snap_info_t::encode(ceph::buffer::list& bl, uint64_t features) const
7c673cae 1278{
11fdf7f2 1279 using ceph::encode;
7c673cae
FG
1280 if ((features & CEPH_FEATURE_PGPOOL3) == 0) {
1281 __u8 struct_v = 1;
11fdf7f2
TL
1282 encode(struct_v, bl);
1283 encode(snapid, bl);
1284 encode(stamp, bl);
1285 encode(name, bl);
7c673cae
FG
1286 return;
1287 }
1288 ENCODE_START(2, 2, bl);
11fdf7f2
TL
1289 encode(snapid, bl);
1290 encode(stamp, bl);
1291 encode(name, bl);
7c673cae
FG
1292 ENCODE_FINISH(bl);
1293}
1294
9f95a23c 1295void pool_snap_info_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
1296{
1297 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
11fdf7f2
TL
1298 decode(snapid, bl);
1299 decode(stamp, bl);
1300 decode(name, bl);
7c673cae
FG
1301 DECODE_FINISH(bl);
1302}
1303
1304void pool_snap_info_t::generate_test_instances(list<pool_snap_info_t*>& o)
1305{
1306 o.push_back(new pool_snap_info_t);
1307 o.push_back(new pool_snap_info_t);
1308 o.back()->snapid = 1;
1309 o.back()->stamp = utime_t(1, 2);
1310 o.back()->name = "foo";
1311}
1312
1313// -- pool_opts_t --
1314
1315typedef std::map<std::string, pool_opts_t::opt_desc_t> opt_mapping_t;
1316static opt_mapping_t opt_mapping = boost::assign::map_list_of
1317 ("scrub_min_interval", pool_opts_t::opt_desc_t(
1318 pool_opts_t::SCRUB_MIN_INTERVAL, pool_opts_t::DOUBLE))
1319 ("scrub_max_interval", pool_opts_t::opt_desc_t(
1320 pool_opts_t::SCRUB_MAX_INTERVAL, pool_opts_t::DOUBLE))
1321 ("deep_scrub_interval", pool_opts_t::opt_desc_t(
1322 pool_opts_t::DEEP_SCRUB_INTERVAL, pool_opts_t::DOUBLE))
1323 ("recovery_priority", pool_opts_t::opt_desc_t(
1324 pool_opts_t::RECOVERY_PRIORITY, pool_opts_t::INT))
1325 ("recovery_op_priority", pool_opts_t::opt_desc_t(
1326 pool_opts_t::RECOVERY_OP_PRIORITY, pool_opts_t::INT))
1327 ("scrub_priority", pool_opts_t::opt_desc_t(
1328 pool_opts_t::SCRUB_PRIORITY, pool_opts_t::INT))
1329 ("compression_mode", pool_opts_t::opt_desc_t(
1330 pool_opts_t::COMPRESSION_MODE, pool_opts_t::STR))
1331 ("compression_algorithm", pool_opts_t::opt_desc_t(
1332 pool_opts_t::COMPRESSION_ALGORITHM, pool_opts_t::STR))
1333 ("compression_required_ratio", pool_opts_t::opt_desc_t(
1334 pool_opts_t::COMPRESSION_REQUIRED_RATIO, pool_opts_t::DOUBLE))
1335 ("compression_max_blob_size", pool_opts_t::opt_desc_t(
1336 pool_opts_t::COMPRESSION_MAX_BLOB_SIZE, pool_opts_t::INT))
1337 ("compression_min_blob_size", pool_opts_t::opt_desc_t(
1338 pool_opts_t::COMPRESSION_MIN_BLOB_SIZE, pool_opts_t::INT))
1339 ("csum_type", pool_opts_t::opt_desc_t(
1340 pool_opts_t::CSUM_TYPE, pool_opts_t::INT))
1341 ("csum_max_block", pool_opts_t::opt_desc_t(
1342 pool_opts_t::CSUM_MAX_BLOCK, pool_opts_t::INT))
1343 ("csum_min_block", pool_opts_t::opt_desc_t(
11fdf7f2
TL
1344 pool_opts_t::CSUM_MIN_BLOCK, pool_opts_t::INT))
1345 ("fingerprint_algorithm", pool_opts_t::opt_desc_t(
1346 pool_opts_t::FINGERPRINT_ALGORITHM, pool_opts_t::STR))
1347 ("pg_num_min", pool_opts_t::opt_desc_t(
1348 pool_opts_t::PG_NUM_MIN, pool_opts_t::INT))
1349 ("target_size_bytes", pool_opts_t::opt_desc_t(
1350 pool_opts_t::TARGET_SIZE_BYTES, pool_opts_t::INT))
1351 ("target_size_ratio", pool_opts_t::opt_desc_t(
1352 pool_opts_t::TARGET_SIZE_RATIO, pool_opts_t::DOUBLE))
1353 ("pg_autoscale_bias", pool_opts_t::opt_desc_t(
9f95a23c
TL
1354 pool_opts_t::PG_AUTOSCALE_BIAS, pool_opts_t::DOUBLE))
1355 ("read_lease_interval", pool_opts_t::opt_desc_t(
f67539c2
TL
1356 pool_opts_t::READ_LEASE_INTERVAL, pool_opts_t::DOUBLE))
1357 ("dedup_tier", pool_opts_t::opt_desc_t(
1358 pool_opts_t::DEDUP_TIER, pool_opts_t::INT))
1359 ("dedup_chunk_algorithm", pool_opts_t::opt_desc_t(
1360 pool_opts_t::DEDUP_CHUNK_ALGORITHM, pool_opts_t::STR))
1361 ("dedup_cdc_chunk_size", pool_opts_t::opt_desc_t(
1362 pool_opts_t::DEDUP_CDC_CHUNK_SIZE, pool_opts_t::INT));
7c673cae 1363
11fdf7f2
TL
1364bool pool_opts_t::is_opt_name(const std::string& name)
1365{
1366 return opt_mapping.count(name);
7c673cae
FG
1367}
1368
11fdf7f2
TL
1369pool_opts_t::opt_desc_t pool_opts_t::get_opt_desc(const std::string& name)
1370{
9f95a23c 1371 auto i = opt_mapping.find(name);
11fdf7f2
TL
1372 ceph_assert(i != opt_mapping.end());
1373 return i->second;
7c673cae
FG
1374}
1375
11fdf7f2
TL
1376bool pool_opts_t::is_set(pool_opts_t::key_t key) const
1377{
1378 return opts.count(key);
7c673cae
FG
1379}
1380
11fdf7f2
TL
1381const pool_opts_t::value_t& pool_opts_t::get(pool_opts_t::key_t key) const
1382{
9f95a23c 1383 auto i = opts.find(key);
11fdf7f2 1384 ceph_assert(i != opts.end());
7c673cae
FG
1385 return i->second;
1386}
1387
1388bool pool_opts_t::unset(pool_opts_t::key_t key) {
1389 return opts.erase(key) > 0;
1390}
1391
11fdf7f2 1392class pool_opts_dumper_t : public boost::static_visitor<> {
7c673cae
FG
1393public:
1394 pool_opts_dumper_t(const std::string& name_, Formatter* f_) :
1395 name(name_.c_str()), f(f_) {}
1396
1397 void operator()(std::string s) const {
1398 f->dump_string(name, s);
1399 }
11fdf7f2 1400 void operator()(int64_t i) const {
7c673cae
FG
1401 f->dump_int(name, i);
1402 }
1403 void operator()(double d) const {
1404 f->dump_float(name, d);
1405 }
1406
1407private:
1408 const char* name;
1409 Formatter* f;
1410};
1411
1412void pool_opts_t::dump(const std::string& name, Formatter* f) const
1413{
1414 const opt_desc_t& desc = get_opt_desc(name);
9f95a23c 1415 auto i = opts.find(desc.key);
7c673cae
FG
1416 if (i == opts.end()) {
1417 return;
1418 }
1419 boost::apply_visitor(pool_opts_dumper_t(name, f), i->second);
1420}
1421
1422void pool_opts_t::dump(Formatter* f) const
1423{
9f95a23c 1424 for (auto i = opt_mapping.cbegin(); i != opt_mapping.cend(); ++i) {
7c673cae
FG
1425 const std::string& name = i->first;
1426 const opt_desc_t& desc = i->second;
9f95a23c 1427 auto j = opts.find(desc.key);
7c673cae
FG
1428 if (j == opts.end()) {
1429 continue;
1430 }
1431 boost::apply_visitor(pool_opts_dumper_t(name, f), j->second);
1432 }
1433}
1434
11fdf7f2 1435class pool_opts_encoder_t : public boost::static_visitor<> {
7c673cae 1436public:
9f95a23c 1437 explicit pool_opts_encoder_t(ceph::buffer::list& bl_, uint64_t features)
11fdf7f2
TL
1438 : bl(bl_),
1439 features(features) {}
1440
1441 void operator()(const std::string &s) const {
1442 encode(static_cast<int32_t>(pool_opts_t::STR), bl);
1443 encode(s, bl);
1444 }
1445 void operator()(int64_t i) const {
1446 encode(static_cast<int32_t>(pool_opts_t::INT), bl);
1447 if (HAVE_FEATURE(features, SERVER_NAUTILUS)) {
1448 encode(i, bl);
1449 } else {
1450 encode(static_cast<int32_t>(i), bl);
1451 }
7c673cae
FG
1452 }
1453 void operator()(double d) const {
11fdf7f2
TL
1454 encode(static_cast<int32_t>(pool_opts_t::DOUBLE), bl);
1455 encode(d, bl);
7c673cae
FG
1456 }
1457
1458private:
9f95a23c 1459 ceph::buffer::list& bl;
11fdf7f2 1460 uint64_t features;
7c673cae
FG
1461};
1462
9f95a23c 1463void pool_opts_t::encode(ceph::buffer::list& bl, uint64_t features) const
11fdf7f2
TL
1464{
1465 unsigned v = 2;
1466 if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
1467 v = 1;
1468 }
1469 ENCODE_START(v, 1, bl);
7c673cae 1470 uint32_t n = static_cast<uint32_t>(opts.size());
11fdf7f2 1471 encode(n, bl);
9f95a23c 1472 for (auto i = opts.cbegin(); i != opts.cend(); ++i) {
11fdf7f2
TL
1473 encode(static_cast<int32_t>(i->first), bl);
1474 boost::apply_visitor(pool_opts_encoder_t(bl, features), i->second);
7c673cae
FG
1475 }
1476 ENCODE_FINISH(bl);
1477}
1478
9f95a23c 1479void pool_opts_t::decode(ceph::buffer::list::const_iterator& bl)
11fdf7f2 1480{
7c673cae
FG
1481 DECODE_START(1, bl);
1482 __u32 n;
11fdf7f2 1483 decode(n, bl);
7c673cae
FG
1484 opts.clear();
1485 while (n--) {
1486 int32_t k, t;
11fdf7f2
TL
1487 decode(k, bl);
1488 decode(t, bl);
7c673cae
FG
1489 if (t == STR) {
1490 std::string s;
11fdf7f2 1491 decode(s, bl);
7c673cae
FG
1492 opts[static_cast<key_t>(k)] = s;
1493 } else if (t == INT) {
11fdf7f2
TL
1494 int64_t i;
1495 if (struct_v >= 2) {
1496 decode(i, bl);
1497 } else {
1498 int ii;
1499 decode(ii, bl);
1500 i = ii;
1501 }
7c673cae
FG
1502 opts[static_cast<key_t>(k)] = i;
1503 } else if (t == DOUBLE) {
1504 double d;
11fdf7f2 1505 decode(d, bl);
7c673cae
FG
1506 opts[static_cast<key_t>(k)] = d;
1507 } else {
11fdf7f2 1508 ceph_assert(!"invalid type");
7c673cae
FG
1509 }
1510 }
1511 DECODE_FINISH(bl);
1512}
1513
1514ostream& operator<<(ostream& out, const pool_opts_t& opts)
1515{
9f95a23c 1516 for (auto i = opt_mapping.begin(); i != opt_mapping.end(); ++i) {
7c673cae
FG
1517 const std::string& name = i->first;
1518 const pool_opts_t::opt_desc_t& desc = i->second;
9f95a23c 1519 auto j = opts.opts.find(desc.key);
7c673cae
FG
1520 if (j == opts.opts.end()) {
1521 continue;
1522 }
1523 out << " " << name << " " << j->second;
1524 }
1525 return out;
1526}
1527
1528// -- pg_pool_t --
1529
c07f9fc5
FG
1530const char *pg_pool_t::APPLICATION_NAME_CEPHFS("cephfs");
1531const char *pg_pool_t::APPLICATION_NAME_RBD("rbd");
1532const char *pg_pool_t::APPLICATION_NAME_RGW("rgw");
1533
7c673cae
FG
1534void pg_pool_t::dump(Formatter *f) const
1535{
11fdf7f2 1536 f->dump_stream("create_time") << get_create_time();
7c673cae
FG
1537 f->dump_unsigned("flags", get_flags());
1538 f->dump_string("flags_names", get_flags_string());
1539 f->dump_int("type", get_type());
1540 f->dump_int("size", get_size());
1541 f->dump_int("min_size", get_min_size());
31f18b77 1542 f->dump_int("crush_rule", get_crush_rule());
f67539c2
TL
1543 f->dump_int("peering_crush_bucket_count", peering_crush_bucket_count);
1544 f->dump_int("peering_crush_bucket_target", peering_crush_bucket_target);
1545 f->dump_int("peering_crush_bucket_barrier", peering_crush_bucket_barrier);
1546 f->dump_int("peering_crush_bucket_mandatory_member", peering_crush_mandatory_member);
7c673cae 1547 f->dump_int("object_hash", get_object_hash());
11fdf7f2
TL
1548 f->dump_string("pg_autoscale_mode",
1549 get_pg_autoscale_mode_name(pg_autoscale_mode));
7c673cae
FG
1550 f->dump_unsigned("pg_num", get_pg_num());
1551 f->dump_unsigned("pg_placement_num", get_pgp_num());
11fdf7f2
TL
1552 f->dump_unsigned("pg_placement_num_target", get_pgp_num_target());
1553 f->dump_unsigned("pg_num_target", get_pg_num_target());
1554 f->dump_unsigned("pg_num_pending", get_pg_num_pending());
1555 f->dump_object("last_pg_merge_meta", last_pg_merge_meta);
7c673cae
FG
1556 f->dump_stream("last_change") << get_last_change();
1557 f->dump_stream("last_force_op_resend") << get_last_force_op_resend();
11fdf7f2
TL
1558 f->dump_stream("last_force_op_resend_prenautilus")
1559 << get_last_force_op_resend_prenautilus();
7c673cae
FG
1560 f->dump_stream("last_force_op_resend_preluminous")
1561 << get_last_force_op_resend_preluminous();
1562 f->dump_unsigned("auid", get_auid());
1563 f->dump_string("snap_mode", is_pool_snaps_mode() ? "pool" : "selfmanaged");
1564 f->dump_unsigned("snap_seq", get_snap_seq());
1565 f->dump_unsigned("snap_epoch", get_snap_epoch());
1566 f->open_array_section("pool_snaps");
9f95a23c 1567 for (auto p = snaps.cbegin(); p != snaps.cend(); ++p) {
7c673cae
FG
1568 f->open_object_section("pool_snap_info");
1569 p->second.dump(f);
1570 f->close_section();
1571 }
1572 f->close_section();
1573 f->dump_stream("removed_snaps") << removed_snaps;
1574 f->dump_unsigned("quota_max_bytes", quota_max_bytes);
1575 f->dump_unsigned("quota_max_objects", quota_max_objects);
1576 f->open_array_section("tiers");
9f95a23c 1577 for (auto p = tiers.cbegin(); p != tiers.cend(); ++p)
7c673cae
FG
1578 f->dump_unsigned("pool_id", *p);
1579 f->close_section();
1580 f->dump_int("tier_of", tier_of);
1581 f->dump_int("read_tier", read_tier);
1582 f->dump_int("write_tier", write_tier);
1583 f->dump_string("cache_mode", get_cache_mode_name());
1584 f->dump_unsigned("target_max_bytes", target_max_bytes);
1585 f->dump_unsigned("target_max_objects", target_max_objects);
1586 f->dump_unsigned("cache_target_dirty_ratio_micro",
1587 cache_target_dirty_ratio_micro);
1588 f->dump_unsigned("cache_target_dirty_high_ratio_micro",
1589 cache_target_dirty_high_ratio_micro);
1590 f->dump_unsigned("cache_target_full_ratio_micro",
1591 cache_target_full_ratio_micro);
1592 f->dump_unsigned("cache_min_flush_age", cache_min_flush_age);
1593 f->dump_unsigned("cache_min_evict_age", cache_min_evict_age);
1594 f->dump_string("erasure_code_profile", erasure_code_profile);
1595 f->open_object_section("hit_set_params");
1596 hit_set_params.dump(f);
1597 f->close_section(); // hit_set_params
1598 f->dump_unsigned("hit_set_period", hit_set_period);
1599 f->dump_unsigned("hit_set_count", hit_set_count);
1600 f->dump_bool("use_gmt_hitset", use_gmt_hitset);
1601 f->dump_unsigned("min_read_recency_for_promote", min_read_recency_for_promote);
1602 f->dump_unsigned("min_write_recency_for_promote", min_write_recency_for_promote);
1603 f->dump_unsigned("hit_set_grade_decay_rate", hit_set_grade_decay_rate);
1604 f->dump_unsigned("hit_set_search_last_n", hit_set_search_last_n);
1605 f->open_array_section("grade_table");
1606 for (unsigned i = 0; i < hit_set_count; ++i)
1607 f->dump_unsigned("value", get_grade(i));
1608 f->close_section();
1609 f->dump_unsigned("stripe_width", get_stripe_width());
1610 f->dump_unsigned("expected_num_objects", expected_num_objects);
1611 f->dump_bool("fast_read", fast_read);
1612 f->open_object_section("options");
1613 opts.dump(f);
1614 f->close_section(); // options
c07f9fc5
FG
1615 f->open_object_section("application_metadata");
1616 for (auto &app_pair : application_metadata) {
1617 f->open_object_section(app_pair.first.c_str());
1618 for (auto &kv_pair : app_pair.second) {
1619 f->dump_string(kv_pair.first.c_str(), kv_pair.second);
1620 }
1621 f->close_section(); // application
1622 }
1623 f->close_section(); // application_metadata
7c673cae
FG
1624}
1625
1626void pg_pool_t::convert_to_pg_shards(const vector<int> &from, set<pg_shard_t>* to) const {
1627 for (size_t i = 0; i < from.size(); ++i) {
1628 if (from[i] != CRUSH_ITEM_NONE) {
1629 to->insert(
1630 pg_shard_t(
1631 from[i],
11fdf7f2 1632 is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD));
7c673cae
FG
1633 }
1634 }
1635}
1636
1637void pg_pool_t::calc_pg_masks()
1638{
1639 pg_num_mask = (1 << cbits(pg_num-1)) - 1;
1640 pgp_num_mask = (1 << cbits(pgp_num-1)) - 1;
1641}
1642
1643unsigned pg_pool_t::get_pg_num_divisor(pg_t pgid) const
1644{
1645 if (pg_num == pg_num_mask + 1)
1646 return pg_num; // power-of-2 split
1647 unsigned mask = pg_num_mask >> 1;
1648 if ((pgid.ps() & mask) < (pg_num & mask))
1649 return pg_num_mask + 1; // smaller bin size (already split)
1650 else
1651 return (pg_num_mask + 1) >> 1; // bigger bin (not yet split)
1652}
1653
11fdf7f2
TL
1654bool pg_pool_t::is_pending_merge(pg_t pgid, bool *target) const
1655{
1656 if (pg_num_pending >= pg_num) {
1657 return false;
1658 }
1659 if (pgid.ps() >= pg_num_pending && pgid.ps() < pg_num) {
1660 if (target) {
1661 *target = false;
1662 }
1663 return true;
1664 }
1665 for (unsigned ps = pg_num_pending; ps < pg_num; ++ps) {
1666 if (pg_t(ps, pgid.pool()).get_parent() == pgid) {
1667 if (target) {
1668 *target = true;
1669 }
1670 return true;
1671 }
1672 }
1673 return false;
1674}
1675
7c673cae
FG
1676/*
1677 * we have two snap modes:
11fdf7f2 1678 * - pool snaps
7c673cae
FG
1679 * - snap existence/non-existence defined by snaps[] and snap_seq
1680 * - user managed snaps
11fdf7f2 1681 * - existence tracked by librados user
7c673cae
FG
1682 */
1683bool pg_pool_t::is_pool_snaps_mode() const
1684{
11fdf7f2 1685 return has_flag(FLAG_POOL_SNAPS);
7c673cae
FG
1686}
1687
1688bool pg_pool_t::is_unmanaged_snaps_mode() const
1689{
11fdf7f2 1690 return has_flag(FLAG_SELFMANAGED_SNAPS);
7c673cae
FG
1691}
1692
1693bool pg_pool_t::is_removed_snap(snapid_t s) const
1694{
1695 if (is_pool_snaps_mode())
1696 return s <= get_snap_seq() && snaps.count(s) == 0;
1697 else
1698 return removed_snaps.contains(s);
1699}
1700
f67539c2 1701snapid_t pg_pool_t::snap_exists(std::string_view s) const
7c673cae 1702{
9f95a23c 1703 for (auto p = snaps.cbegin(); p != snaps.cend(); ++p)
7c673cae
FG
1704 if (p->second.name == s)
1705 return p->second.snapid;
1706 return 0;
1707}
1708
1709void pg_pool_t::add_snap(const char *n, utime_t stamp)
1710{
11fdf7f2
TL
1711 ceph_assert(!is_unmanaged_snaps_mode());
1712 flags |= FLAG_POOL_SNAPS;
7c673cae
FG
1713 snapid_t s = get_snap_seq() + 1;
1714 snap_seq = s;
1715 snaps[s].snapid = s;
1716 snaps[s].name = n;
1717 snaps[s].stamp = stamp;
1718}
1719
9f95a23c 1720uint64_t pg_pool_t::add_unmanaged_snap(bool preoctopus_compat)
7c673cae 1721{
11fdf7f2
TL
1722 ceph_assert(!is_pool_snaps_mode());
1723 if (snap_seq == 0) {
9f95a23c
TL
1724 if (preoctopus_compat) {
1725 // kludge for pre-mimic tracking of pool vs selfmanaged snaps. after
1726 // mimic this field is not decoded but our flag is set; pre-mimic, we
1727 // have a non-empty removed_snaps to signifiy a non-pool-snaps pool.
1728 removed_snaps.insert(snapid_t(1));
1729 }
7c673cae
FG
1730 snap_seq = 1;
1731 }
11fdf7f2 1732 flags |= FLAG_SELFMANAGED_SNAPS;
9f95a23c
TL
1733 snap_seq = snap_seq + 1;
1734 return snap_seq;
7c673cae
FG
1735}
1736
1737void pg_pool_t::remove_snap(snapid_t s)
1738{
11fdf7f2 1739 ceph_assert(snaps.count(s));
7c673cae
FG
1740 snaps.erase(s);
1741 snap_seq = snap_seq + 1;
1742}
1743
9f95a23c 1744void pg_pool_t::remove_unmanaged_snap(snapid_t s, bool preoctopus_compat)
7c673cae 1745{
11fdf7f2 1746 ceph_assert(is_unmanaged_snaps_mode());
9f95a23c
TL
1747 ++snap_seq;
1748 if (preoctopus_compat) {
1749 removed_snaps.insert(s);
1750 // try to add in the new seq, just to try to keep the interval_set contiguous
1751 if (!removed_snaps.contains(get_snap_seq())) {
1752 removed_snaps.insert(get_snap_seq());
1753 }
28e407b8 1754 }
7c673cae
FG
1755}
1756
1757SnapContext pg_pool_t::get_snap_context() const
1758{
1759 vector<snapid_t> s(snaps.size());
1760 unsigned i = 0;
9f95a23c 1761 for (auto p = snaps.crbegin(); p != snaps.crend(); ++p)
7c673cae
FG
1762 s[i++] = p->first;
1763 return SnapContext(get_snap_seq(), s);
1764}
1765
1766uint32_t pg_pool_t::hash_key(const string& key, const string& ns) const
1767{
1768 if (ns.empty())
1769 return ceph_str_hash(object_hash, key.data(), key.length());
1770 int nsl = ns.length();
1771 int len = key.length() + nsl + 1;
1772 char buf[len];
1773 memcpy(&buf[0], ns.data(), nsl);
1774 buf[nsl] = '\037';
1775 memcpy(&buf[nsl+1], key.data(), key.length());
1776 return ceph_str_hash(object_hash, &buf[0], len);
1777}
1778
1779uint32_t pg_pool_t::raw_hash_to_pg(uint32_t v) const
1780{
1781 return ceph_stable_mod(v, pg_num, pg_num_mask);
1782}
1783
1784/*
1785 * map a raw pg (with full precision ps) into an actual pg, for storage
1786 */
1787pg_t pg_pool_t::raw_pg_to_pg(pg_t pg) const
1788{
1789 pg.set_ps(ceph_stable_mod(pg.ps(), pg_num, pg_num_mask));
1790 return pg;
1791}
1792
1793/*
1794 * map raw pg (full precision ps) into a placement seed. include
1795 * pool id in that value so that different pools don't use the same
1796 * seeds.
1797 */
1798ps_t pg_pool_t::raw_pg_to_pps(pg_t pg) const
1799{
1800 if (flags & FLAG_HASHPSPOOL) {
1801 // Hash the pool id so that pool PGs do not overlap.
1802 return
1803 crush_hash32_2(CRUSH_HASH_RJENKINS1,
1804 ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask),
1805 pg.pool());
1806 } else {
1807 // Legacy behavior; add ps and pool together. This is not a great
1808 // idea because the PGs from each pool will essentially overlap on
1809 // top of each other: 0.5 == 1.4 == 2.3 == ...
1810 return
1811 ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask) +
1812 pg.pool();
1813 }
1814}
1815
1816uint32_t pg_pool_t::get_random_pg_position(pg_t pg, uint32_t seed) const
1817{
1818 uint32_t r = crush_hash32_2(CRUSH_HASH_RJENKINS1, seed, 123);
1819 if (pg_num == pg_num_mask + 1) {
1820 r &= ~pg_num_mask;
1821 } else {
1822 unsigned smaller_mask = pg_num_mask >> 1;
1823 if ((pg.ps() & smaller_mask) < (pg_num & smaller_mask)) {
1824 r &= ~pg_num_mask;
1825 } else {
1826 r &= ~smaller_mask;
1827 }
1828 }
1829 r |= pg.ps();
1830 return r;
1831}
1832
9f95a23c 1833void pg_pool_t::encode(ceph::buffer::list& bl, uint64_t features) const
7c673cae 1834{
11fdf7f2 1835 using ceph::encode;
7c673cae
FG
1836 if ((features & CEPH_FEATURE_PGPOOL3) == 0) {
1837 // this encoding matches the old struct ceph_pg_pool
1838 __u8 struct_v = 2;
11fdf7f2
TL
1839 encode(struct_v, bl);
1840 encode(type, bl);
1841 encode(size, bl);
1842 encode(crush_rule, bl);
1843 encode(object_hash, bl);
1844 encode(pg_num, bl);
1845 encode(pgp_num, bl);
7c673cae 1846 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
11fdf7f2
TL
1847 encode(lpg_num, bl);
1848 encode(lpgp_num, bl);
1849 encode(last_change, bl);
1850 encode(snap_seq, bl);
1851 encode(snap_epoch, bl);
7c673cae
FG
1852
1853 __u32 n = snaps.size();
11fdf7f2 1854 encode(n, bl);
7c673cae 1855 n = removed_snaps.num_intervals();
11fdf7f2 1856 encode(n, bl);
7c673cae 1857
11fdf7f2 1858 encode(auid, bl);
7c673cae 1859
11fdf7f2
TL
1860 encode_nohead(snaps, bl, features);
1861 encode_nohead(removed_snaps, bl);
7c673cae
FG
1862 return;
1863 }
1864
1865 if ((features & CEPH_FEATURE_OSDENC) == 0) {
1866 __u8 struct_v = 4;
11fdf7f2
TL
1867 encode(struct_v, bl);
1868 encode(type, bl);
1869 encode(size, bl);
1870 encode(crush_rule, bl);
1871 encode(object_hash, bl);
1872 encode(pg_num, bl);
1873 encode(pgp_num, bl);
7c673cae 1874 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
11fdf7f2
TL
1875 encode(lpg_num, bl);
1876 encode(lpgp_num, bl);
1877 encode(last_change, bl);
1878 encode(snap_seq, bl);
1879 encode(snap_epoch, bl);
1880 encode(snaps, bl, features);
1881 encode(removed_snaps, bl);
1882 encode(auid, bl);
1883 encode(flags, bl);
1884 encode((uint32_t)0, bl); // crash_replay_interval
7c673cae
FG
1885 return;
1886 }
1887
1888 if ((features & CEPH_FEATURE_OSD_POOLRESEND) == 0) {
1889 // we simply added last_force_op_resend here, which is a fully
1890 // backward compatible change. however, encoding the same map
1891 // differently between monitors triggers scrub noise (even though
1892 // they are decodable without the feature), so let's be pendantic
1893 // about it.
1894 ENCODE_START(14, 5, bl);
11fdf7f2
TL
1895 encode(type, bl);
1896 encode(size, bl);
1897 encode(crush_rule, bl);
1898 encode(object_hash, bl);
1899 encode(pg_num, bl);
1900 encode(pgp_num, bl);
7c673cae 1901 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
11fdf7f2
TL
1902 encode(lpg_num, bl);
1903 encode(lpgp_num, bl);
1904 encode(last_change, bl);
1905 encode(snap_seq, bl);
1906 encode(snap_epoch, bl);
1907 encode(snaps, bl, features);
1908 encode(removed_snaps, bl);
1909 encode(auid, bl);
1910 encode(flags, bl);
1911 encode((uint32_t)0, bl); // crash_replay_interval
1912 encode(min_size, bl);
1913 encode(quota_max_bytes, bl);
1914 encode(quota_max_objects, bl);
1915 encode(tiers, bl);
1916 encode(tier_of, bl);
7c673cae 1917 __u8 c = cache_mode;
11fdf7f2
TL
1918 encode(c, bl);
1919 encode(read_tier, bl);
1920 encode(write_tier, bl);
1921 encode(properties, bl);
1922 encode(hit_set_params, bl);
1923 encode(hit_set_period, bl);
1924 encode(hit_set_count, bl);
1925 encode(stripe_width, bl);
1926 encode(target_max_bytes, bl);
1927 encode(target_max_objects, bl);
1928 encode(cache_target_dirty_ratio_micro, bl);
1929 encode(cache_target_full_ratio_micro, bl);
1930 encode(cache_min_flush_age, bl);
1931 encode(cache_min_evict_age, bl);
1932 encode(erasure_code_profile, bl);
7c673cae
FG
1933 ENCODE_FINISH(bl);
1934 return;
1935 }
1936
f67539c2 1937 uint8_t v = 30;
28e407b8
AA
1938 // NOTE: any new encoding dependencies must be reflected by
1939 // SIGNIFICANT_FEATURES
7c673cae
FG
1940 if (!(features & CEPH_FEATURE_NEW_OSDOP_ENCODING)) {
1941 // this was the first post-hammer thing we added; if it's missing, encode
1942 // like hammer.
1943 v = 21;
94b18763 1944 } else if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
7c673cae 1945 v = 24;
11fdf7f2
TL
1946 } else if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
1947 v = 26;
1948 } else if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
1949 v = 27;
f67539c2
TL
1950 } else if (!is_stretch_pool()) {
1951 v = 29;
7c673cae
FG
1952 }
1953
1954 ENCODE_START(v, 5, bl);
11fdf7f2
TL
1955 encode(type, bl);
1956 encode(size, bl);
1957 encode(crush_rule, bl);
1958 encode(object_hash, bl);
1959 encode(pg_num, bl);
1960 encode(pgp_num, bl);
7c673cae 1961 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
11fdf7f2
TL
1962 encode(lpg_num, bl);
1963 encode(lpgp_num, bl);
1964 encode(last_change, bl);
1965 encode(snap_seq, bl);
1966 encode(snap_epoch, bl);
1967 encode(snaps, bl, features);
1968 encode(removed_snaps, bl);
1969 encode(auid, bl);
1970 if (v >= 27) {
1971 encode(flags, bl);
1972 } else {
1973 auto tmp = flags;
1974 tmp &= ~(FLAG_SELFMANAGED_SNAPS | FLAG_POOL_SNAPS | FLAG_CREATING);
1975 encode(tmp, bl);
1976 }
1977 encode((uint32_t)0, bl); // crash_replay_interval
1978 encode(min_size, bl);
1979 encode(quota_max_bytes, bl);
1980 encode(quota_max_objects, bl);
1981 encode(tiers, bl);
1982 encode(tier_of, bl);
7c673cae 1983 __u8 c = cache_mode;
11fdf7f2
TL
1984 encode(c, bl);
1985 encode(read_tier, bl);
1986 encode(write_tier, bl);
1987 encode(properties, bl);
1988 encode(hit_set_params, bl);
1989 encode(hit_set_period, bl);
1990 encode(hit_set_count, bl);
1991 encode(stripe_width, bl);
1992 encode(target_max_bytes, bl);
1993 encode(target_max_objects, bl);
1994 encode(cache_target_dirty_ratio_micro, bl);
1995 encode(cache_target_full_ratio_micro, bl);
1996 encode(cache_min_flush_age, bl);
1997 encode(cache_min_evict_age, bl);
1998 encode(erasure_code_profile, bl);
1999 encode(last_force_op_resend_preluminous, bl);
2000 encode(min_read_recency_for_promote, bl);
2001 encode(expected_num_objects, bl);
7c673cae 2002 if (v >= 19) {
11fdf7f2 2003 encode(cache_target_dirty_high_ratio_micro, bl);
7c673cae
FG
2004 }
2005 if (v >= 20) {
11fdf7f2 2006 encode(min_write_recency_for_promote, bl);
7c673cae
FG
2007 }
2008 if (v >= 21) {
11fdf7f2 2009 encode(use_gmt_hitset, bl);
7c673cae
FG
2010 }
2011 if (v >= 22) {
11fdf7f2 2012 encode(fast_read, bl);
7c673cae
FG
2013 }
2014 if (v >= 23) {
11fdf7f2
TL
2015 encode(hit_set_grade_decay_rate, bl);
2016 encode(hit_set_search_last_n, bl);
7c673cae
FG
2017 }
2018 if (v >= 24) {
11fdf7f2 2019 encode(opts, bl, features);
7c673cae
FG
2020 }
2021 if (v >= 25) {
11fdf7f2 2022 encode(last_force_op_resend_prenautilus, bl);
7c673cae 2023 }
c07f9fc5 2024 if (v >= 26) {
11fdf7f2
TL
2025 encode(application_metadata, bl);
2026 }
2027 if (v >= 27) {
2028 encode(create_time, bl);
2029 }
2030 if (v >= 28) {
2031 encode(pg_num_target, bl);
2032 encode(pgp_num_target, bl);
2033 encode(pg_num_pending, bl);
2034 encode((epoch_t)0, bl); // pg_num_dec_last_epoch_started from 14.1.[01]
2035 encode((epoch_t)0, bl); // pg_num_dec_last_epoch_clean from 14.1.[01]
2036 encode(last_force_op_resend, bl);
2037 encode(pg_autoscale_mode, bl);
2038 }
2039 if (v >= 29) {
2040 encode(last_pg_merge_meta, bl);
c07f9fc5 2041 }
f67539c2
TL
2042 if (v >= 30) {
2043 encode(peering_crush_bucket_count, bl);
2044 encode(peering_crush_bucket_target, bl);
2045 encode(peering_crush_bucket_barrier, bl);
2046 encode(peering_crush_mandatory_member, bl);
2047 }
7c673cae
FG
2048 ENCODE_FINISH(bl);
2049}
2050
9f95a23c 2051void pg_pool_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae 2052{
f67539c2 2053 DECODE_START_LEGACY_COMPAT_LEN(30, 5, 5, bl);
11fdf7f2
TL
2054 decode(type, bl);
2055 decode(size, bl);
2056 decode(crush_rule, bl);
2057 decode(object_hash, bl);
2058 decode(pg_num, bl);
2059 decode(pgp_num, bl);
7c673cae
FG
2060 {
2061 __u32 lpg_num, lpgp_num;
11fdf7f2
TL
2062 decode(lpg_num, bl);
2063 decode(lpgp_num, bl);
7c673cae 2064 }
11fdf7f2
TL
2065 decode(last_change, bl);
2066 decode(snap_seq, bl);
2067 decode(snap_epoch, bl);
7c673cae
FG
2068
2069 if (struct_v >= 3) {
11fdf7f2
TL
2070 decode(snaps, bl);
2071 decode(removed_snaps, bl);
2072 decode(auid, bl);
7c673cae
FG
2073 } else {
2074 __u32 n, m;
11fdf7f2
TL
2075 decode(n, bl);
2076 decode(m, bl);
2077 decode(auid, bl);
2078 decode_nohead(n, snaps, bl);
2079 decode_nohead(m, removed_snaps, bl);
7c673cae
FG
2080 }
2081
2082 if (struct_v >= 4) {
11fdf7f2
TL
2083 decode(flags, bl);
2084 uint32_t crash_replay_interval;
2085 decode(crash_replay_interval, bl);
7c673cae
FG
2086 } else {
2087 flags = 0;
11fdf7f2
TL
2088 }
2089 // upgrade path for selfmanaged vs pool snaps
2090 if (snap_seq > 0 && (flags & (FLAG_SELFMANAGED_SNAPS|FLAG_POOL_SNAPS)) == 0) {
2091 if (!removed_snaps.empty()) {
2092 flags |= FLAG_SELFMANAGED_SNAPS;
2093 } else {
2094 flags |= FLAG_POOL_SNAPS;
2095 }
7c673cae
FG
2096 }
2097 if (struct_v >= 7) {
11fdf7f2 2098 decode(min_size, bl);
7c673cae
FG
2099 } else {
2100 min_size = size - size/2;
2101 }
2102 if (struct_v >= 8) {
11fdf7f2
TL
2103 decode(quota_max_bytes, bl);
2104 decode(quota_max_objects, bl);
7c673cae
FG
2105 }
2106 if (struct_v >= 9) {
11fdf7f2
TL
2107 decode(tiers, bl);
2108 decode(tier_of, bl);
7c673cae 2109 __u8 v;
11fdf7f2 2110 decode(v, bl);
7c673cae 2111 cache_mode = (cache_mode_t)v;
11fdf7f2
TL
2112 decode(read_tier, bl);
2113 decode(write_tier, bl);
7c673cae
FG
2114 }
2115 if (struct_v >= 10) {
11fdf7f2 2116 decode(properties, bl);
7c673cae
FG
2117 }
2118 if (struct_v >= 11) {
11fdf7f2
TL
2119 decode(hit_set_params, bl);
2120 decode(hit_set_period, bl);
2121 decode(hit_set_count, bl);
7c673cae
FG
2122 } else {
2123 pg_pool_t def;
2124 hit_set_period = def.hit_set_period;
2125 hit_set_count = def.hit_set_count;
2126 }
2127 if (struct_v >= 12) {
11fdf7f2 2128 decode(stripe_width, bl);
7c673cae
FG
2129 } else {
2130 set_stripe_width(0);
2131 }
2132 if (struct_v >= 13) {
11fdf7f2
TL
2133 decode(target_max_bytes, bl);
2134 decode(target_max_objects, bl);
2135 decode(cache_target_dirty_ratio_micro, bl);
2136 decode(cache_target_full_ratio_micro, bl);
2137 decode(cache_min_flush_age, bl);
2138 decode(cache_min_evict_age, bl);
7c673cae
FG
2139 } else {
2140 target_max_bytes = 0;
2141 target_max_objects = 0;
2142 cache_target_dirty_ratio_micro = 0;
2143 cache_target_full_ratio_micro = 0;
2144 cache_min_flush_age = 0;
2145 cache_min_evict_age = 0;
2146 }
2147 if (struct_v >= 14) {
11fdf7f2 2148 decode(erasure_code_profile, bl);
7c673cae
FG
2149 }
2150 if (struct_v >= 15) {
11fdf7f2 2151 decode(last_force_op_resend_preluminous, bl);
7c673cae
FG
2152 } else {
2153 last_force_op_resend_preluminous = 0;
2154 }
2155 if (struct_v >= 16) {
11fdf7f2 2156 decode(min_read_recency_for_promote, bl);
7c673cae
FG
2157 } else {
2158 min_read_recency_for_promote = 1;
2159 }
2160 if (struct_v >= 17) {
11fdf7f2 2161 decode(expected_num_objects, bl);
7c673cae
FG
2162 } else {
2163 expected_num_objects = 0;
2164 }
2165 if (struct_v >= 19) {
11fdf7f2 2166 decode(cache_target_dirty_high_ratio_micro, bl);
7c673cae
FG
2167 } else {
2168 cache_target_dirty_high_ratio_micro = cache_target_dirty_ratio_micro;
2169 }
2170 if (struct_v >= 20) {
11fdf7f2 2171 decode(min_write_recency_for_promote, bl);
7c673cae
FG
2172 } else {
2173 min_write_recency_for_promote = 1;
2174 }
2175 if (struct_v >= 21) {
11fdf7f2 2176 decode(use_gmt_hitset, bl);
7c673cae
FG
2177 } else {
2178 use_gmt_hitset = false;
2179 }
2180 if (struct_v >= 22) {
11fdf7f2 2181 decode(fast_read, bl);
7c673cae
FG
2182 } else {
2183 fast_read = false;
2184 }
2185 if (struct_v >= 23) {
11fdf7f2
TL
2186 decode(hit_set_grade_decay_rate, bl);
2187 decode(hit_set_search_last_n, bl);
7c673cae
FG
2188 } else {
2189 hit_set_grade_decay_rate = 0;
2190 hit_set_search_last_n = 1;
2191 }
2192 if (struct_v >= 24) {
11fdf7f2 2193 decode(opts, bl);
7c673cae
FG
2194 }
2195 if (struct_v >= 25) {
11fdf7f2 2196 decode(last_force_op_resend_prenautilus, bl);
7c673cae 2197 } else {
11fdf7f2 2198 last_force_op_resend_prenautilus = last_force_op_resend_preluminous;
7c673cae 2199 }
c07f9fc5 2200 if (struct_v >= 26) {
11fdf7f2
TL
2201 decode(application_metadata, bl);
2202 }
2203 if (struct_v >= 27) {
2204 decode(create_time, bl);
2205 }
2206 if (struct_v >= 28) {
2207 decode(pg_num_target, bl);
2208 decode(pgp_num_target, bl);
2209 decode(pg_num_pending, bl);
2210 epoch_t old_merge_last_epoch_clean, old_merge_last_epoch_started;
2211 decode(old_merge_last_epoch_started, bl);
2212 decode(old_merge_last_epoch_clean, bl);
2213 decode(last_force_op_resend, bl);
2214 decode(pg_autoscale_mode, bl);
2215 if (struct_v >= 29) {
2216 decode(last_pg_merge_meta, bl);
2217 } else {
2218 last_pg_merge_meta.last_epoch_clean = old_merge_last_epoch_clean;
2219 last_pg_merge_meta.last_epoch_started = old_merge_last_epoch_started;
2220 }
2221 } else {
2222 pg_num_target = pg_num;
2223 pgp_num_target = pgp_num;
2224 pg_num_pending = pg_num;
2225 last_force_op_resend = last_force_op_resend_prenautilus;
9f95a23c 2226 pg_autoscale_mode = pg_autoscale_mode_t::WARN; // default to warn on upgrade
c07f9fc5 2227 }
f67539c2
TL
2228 if (struct_v >= 30) {
2229 decode(peering_crush_bucket_count, bl);
2230 decode(peering_crush_bucket_target, bl);
2231 decode(peering_crush_bucket_barrier, bl);
2232 decode(peering_crush_mandatory_member, bl);
2233 }
7c673cae
FG
2234 DECODE_FINISH(bl);
2235 calc_pg_masks();
2236 calc_grade_table();
2237}
2238
f67539c2
TL
2239bool pg_pool_t::stretch_set_can_peer(const set<int>& want, const OSDMap& osdmap,
2240 std::ostream * out) const
2241{
2242 if (!is_stretch_pool()) return true;
2243 const uint32_t barrier_id = peering_crush_bucket_barrier;
2244 const uint32_t barrier_count = peering_crush_bucket_count;
2245 set<int> ancestors;
2246 const shared_ptr<CrushWrapper>& crush = osdmap.crush;
2247 for (int osdid : want) {
2248 int ancestor = crush->get_parent_of_type(osdid, barrier_id,
2249 crush_rule);
2250 ancestors.insert(ancestor);
2251 }
2252 if (ancestors.size() < barrier_count) {
2253 if (out) {
2254 *out << __func__ << ": not enough crush buckets with OSDs in want set "
2255 << want;
2256 }
2257 return false;
2258 } else if (peering_crush_mandatory_member != CRUSH_ITEM_NONE &&
2259 !ancestors.count(peering_crush_mandatory_member)) {
2260 if (out) {
2261 *out << __func__ << ": missing mandatory crush bucket member "
2262 << peering_crush_mandatory_member;
2263 }
2264 return false;
2265 }
2266 return true;
2267}
2268
7c673cae
FG
2269void pg_pool_t::generate_test_instances(list<pg_pool_t*>& o)
2270{
2271 pg_pool_t a;
2272 o.push_back(new pg_pool_t(a));
2273
11fdf7f2 2274 a.create_time = utime_t(4,5);
7c673cae
FG
2275 a.type = TYPE_REPLICATED;
2276 a.size = 2;
31f18b77 2277 a.crush_rule = 3;
7c673cae
FG
2278 a.object_hash = 4;
2279 a.pg_num = 6;
11fdf7f2
TL
2280 a.pgp_num = 4;
2281 a.pgp_num_target = 4;
2282 a.pg_num_target = 5;
2283 a.pg_num_pending = 5;
2284 a.last_pg_merge_meta.last_epoch_started = 2;
2285 a.last_pg_merge_meta.last_epoch_clean = 2;
7c673cae
FG
2286 a.last_change = 9;
2287 a.last_force_op_resend = 123823;
2288 a.last_force_op_resend_preluminous = 123824;
2289 a.snap_seq = 10;
2290 a.snap_epoch = 11;
11fdf7f2 2291 a.flags = FLAG_POOL_SNAPS;
7c673cae 2292 a.auid = 12;
7c673cae
FG
2293 a.quota_max_bytes = 473;
2294 a.quota_max_objects = 474;
2295 o.push_back(new pg_pool_t(a));
2296
2297 a.snaps[3].name = "asdf";
2298 a.snaps[3].snapid = 3;
2299 a.snaps[3].stamp = utime_t(123, 4);
2300 a.snaps[6].name = "qwer";
2301 a.snaps[6].snapid = 6;
2302 a.snaps[6].stamp = utime_t(23423, 4);
2303 o.push_back(new pg_pool_t(a));
2304
11fdf7f2
TL
2305 a.flags = FLAG_SELFMANAGED_SNAPS;
2306 a.snaps.clear();
2307 a.removed_snaps.insert(2);
7c673cae
FG
2308 a.quota_max_bytes = 2473;
2309 a.quota_max_objects = 4374;
2310 a.tiers.insert(0);
2311 a.tiers.insert(1);
2312 a.tier_of = 2;
2313 a.cache_mode = CACHEMODE_WRITEBACK;
2314 a.read_tier = 1;
2315 a.write_tier = 1;
2316 a.hit_set_params = HitSet::Params(new BloomHitSet::Params);
2317 a.hit_set_period = 3600;
2318 a.hit_set_count = 8;
2319 a.min_read_recency_for_promote = 1;
2320 a.min_write_recency_for_promote = 1;
2321 a.hit_set_grade_decay_rate = 50;
2322 a.hit_set_search_last_n = 1;
2323 a.calc_grade_table();
2324 a.set_stripe_width(12345);
2325 a.target_max_bytes = 1238132132;
2326 a.target_max_objects = 1232132;
2327 a.cache_target_dirty_ratio_micro = 187232;
2328 a.cache_target_dirty_high_ratio_micro = 309856;
2329 a.cache_target_full_ratio_micro = 987222;
2330 a.cache_min_flush_age = 231;
2331 a.cache_min_evict_age = 2321;
2332 a.erasure_code_profile = "profile in osdmap";
2333 a.expected_num_objects = 123456;
2334 a.fast_read = false;
c07f9fc5 2335 a.application_metadata = {{"rbd", {{"key", "value"}}}};
7c673cae
FG
2336 o.push_back(new pg_pool_t(a));
2337}
2338
2339ostream& operator<<(ostream& out, const pg_pool_t& p)
2340{
9f95a23c
TL
2341 out << p.get_type_name();
2342 if (p.get_type_name() == "erasure") {
2343 out << " profile " << p.erasure_code_profile;
2344 }
2345 out << " size " << p.get_size()
7c673cae 2346 << " min_size " << p.get_min_size()
31f18b77 2347 << " crush_rule " << p.get_crush_rule()
7c673cae
FG
2348 << " object_hash " << p.get_object_hash_name()
2349 << " pg_num " << p.get_pg_num()
11fdf7f2
TL
2350 << " pgp_num " << p.get_pgp_num();
2351 if (p.get_pg_num_target() != p.get_pg_num()) {
2352 out << " pg_num_target " << p.get_pg_num_target();
2353 }
2354 if (p.get_pgp_num_target() != p.get_pgp_num()) {
2355 out << " pgp_num_target " << p.get_pgp_num_target();
2356 }
2357 if (p.get_pg_num_pending() != p.get_pg_num()) {
2358 out << " pg_num_pending " << p.get_pg_num_pending();
2359 }
9f95a23c 2360 if (p.pg_autoscale_mode != pg_pool_t::pg_autoscale_mode_t::UNKNOWN) {
11fdf7f2
TL
2361 out << " autoscale_mode " << p.get_pg_autoscale_mode_name(p.pg_autoscale_mode);
2362 }
2363 out << " last_change " << p.get_last_change();
7c673cae 2364 if (p.get_last_force_op_resend() ||
11fdf7f2 2365 p.get_last_force_op_resend_prenautilus() ||
7c673cae
FG
2366 p.get_last_force_op_resend_preluminous())
2367 out << " lfor " << p.get_last_force_op_resend() << "/"
11fdf7f2 2368 << p.get_last_force_op_resend_prenautilus() << "/"
7c673cae
FG
2369 << p.get_last_force_op_resend_preluminous();
2370 if (p.get_auid())
2371 out << " owner " << p.get_auid();
2372 if (p.flags)
2373 out << " flags " << p.get_flags_string();
7c673cae
FG
2374 if (p.quota_max_bytes)
2375 out << " max_bytes " << p.quota_max_bytes;
2376 if (p.quota_max_objects)
2377 out << " max_objects " << p.quota_max_objects;
2378 if (!p.tiers.empty())
2379 out << " tiers " << p.tiers;
2380 if (p.is_tier())
2381 out << " tier_of " << p.tier_of;
2382 if (p.has_read_tier())
2383 out << " read_tier " << p.read_tier;
2384 if (p.has_write_tier())
2385 out << " write_tier " << p.write_tier;
2386 if (p.cache_mode)
2387 out << " cache_mode " << p.get_cache_mode_name();
2388 if (p.target_max_bytes)
2389 out << " target_bytes " << p.target_max_bytes;
2390 if (p.target_max_objects)
2391 out << " target_objects " << p.target_max_objects;
2392 if (p.hit_set_params.get_type() != HitSet::TYPE_NONE) {
2393 out << " hit_set " << p.hit_set_params
2394 << " " << p.hit_set_period << "s"
2395 << " x" << p.hit_set_count << " decay_rate "
2396 << p.hit_set_grade_decay_rate
2397 << " search_last_n " << p.hit_set_search_last_n;
2398 }
2399 if (p.min_read_recency_for_promote)
2400 out << " min_read_recency_for_promote " << p.min_read_recency_for_promote;
2401 if (p.min_write_recency_for_promote)
2402 out << " min_write_recency_for_promote " << p.min_write_recency_for_promote;
2403 out << " stripe_width " << p.get_stripe_width();
2404 if (p.expected_num_objects)
2405 out << " expected_num_objects " << p.expected_num_objects;
2406 if (p.fast_read)
2407 out << " fast_read " << p.fast_read;
2408 out << p.opts;
c07f9fc5
FG
2409 if (!p.application_metadata.empty()) {
2410 out << " application ";
2411 for (auto it = p.application_metadata.begin();
2412 it != p.application_metadata.end(); ++it) {
2413 if (it != p.application_metadata.begin())
2414 out << ",";
2415 out << it->first;
2416 }
2417 }
7c673cae
FG
2418 return out;
2419}
2420
2421
2422// -- object_stat_sum_t --
2423
2424void object_stat_sum_t::dump(Formatter *f) const
2425{
2426 f->dump_int("num_bytes", num_bytes);
2427 f->dump_int("num_objects", num_objects);
2428 f->dump_int("num_object_clones", num_object_clones);
2429 f->dump_int("num_object_copies", num_object_copies);
2430 f->dump_int("num_objects_missing_on_primary", num_objects_missing_on_primary);
2431 f->dump_int("num_objects_missing", num_objects_missing);
2432 f->dump_int("num_objects_degraded", num_objects_degraded);
2433 f->dump_int("num_objects_misplaced", num_objects_misplaced);
2434 f->dump_int("num_objects_unfound", num_objects_unfound);
2435 f->dump_int("num_objects_dirty", num_objects_dirty);
2436 f->dump_int("num_whiteouts", num_whiteouts);
2437 f->dump_int("num_read", num_rd);
2438 f->dump_int("num_read_kb", num_rd_kb);
2439 f->dump_int("num_write", num_wr);
2440 f->dump_int("num_write_kb", num_wr_kb);
2441 f->dump_int("num_scrub_errors", num_scrub_errors);
2442 f->dump_int("num_shallow_scrub_errors", num_shallow_scrub_errors);
2443 f->dump_int("num_deep_scrub_errors", num_deep_scrub_errors);
2444 f->dump_int("num_objects_recovered", num_objects_recovered);
2445 f->dump_int("num_bytes_recovered", num_bytes_recovered);
2446 f->dump_int("num_keys_recovered", num_keys_recovered);
2447 f->dump_int("num_objects_omap", num_objects_omap);
2448 f->dump_int("num_objects_hit_set_archive", num_objects_hit_set_archive);
2449 f->dump_int("num_bytes_hit_set_archive", num_bytes_hit_set_archive);
2450 f->dump_int("num_flush", num_flush);
2451 f->dump_int("num_flush_kb", num_flush_kb);
2452 f->dump_int("num_evict", num_evict);
2453 f->dump_int("num_evict_kb", num_evict_kb);
2454 f->dump_int("num_promote", num_promote);
2455 f->dump_int("num_flush_mode_high", num_flush_mode_high);
2456 f->dump_int("num_flush_mode_low", num_flush_mode_low);
2457 f->dump_int("num_evict_mode_some", num_evict_mode_some);
2458 f->dump_int("num_evict_mode_full", num_evict_mode_full);
2459 f->dump_int("num_objects_pinned", num_objects_pinned);
2460 f->dump_int("num_legacy_snapsets", num_legacy_snapsets);
28e407b8 2461 f->dump_int("num_large_omap_objects", num_large_omap_objects);
11fdf7f2
TL
2462 f->dump_int("num_objects_manifest", num_objects_manifest);
2463 f->dump_int("num_omap_bytes", num_omap_bytes);
2464 f->dump_int("num_omap_keys", num_omap_keys);
2465 f->dump_int("num_objects_repaired", num_objects_repaired);
7c673cae
FG
2466}
2467
9f95a23c 2468void object_stat_sum_t::encode(ceph::buffer::list& bl) const
7c673cae 2469{
11fdf7f2 2470 ENCODE_START(20, 14, bl);
7c673cae
FG
2471#if defined(CEPH_LITTLE_ENDIAN)
2472 bl.append((char *)(&num_bytes), sizeof(object_stat_sum_t));
2473#else
11fdf7f2
TL
2474 encode(num_bytes, bl);
2475 encode(num_objects, bl);
2476 encode(num_object_clones, bl);
2477 encode(num_object_copies, bl);
2478 encode(num_objects_missing_on_primary, bl);
2479 encode(num_objects_degraded, bl);
2480 encode(num_objects_unfound, bl);
2481 encode(num_rd, bl);
2482 encode(num_rd_kb, bl);
2483 encode(num_wr, bl);
2484 encode(num_wr_kb, bl);
2485 encode(num_scrub_errors, bl);
2486 encode(num_objects_recovered, bl);
2487 encode(num_bytes_recovered, bl);
2488 encode(num_keys_recovered, bl);
2489 encode(num_shallow_scrub_errors, bl);
2490 encode(num_deep_scrub_errors, bl);
2491 encode(num_objects_dirty, bl);
2492 encode(num_whiteouts, bl);
2493 encode(num_objects_omap, bl);
2494 encode(num_objects_hit_set_archive, bl);
2495 encode(num_objects_misplaced, bl);
2496 encode(num_bytes_hit_set_archive, bl);
2497 encode(num_flush, bl);
2498 encode(num_flush_kb, bl);
2499 encode(num_evict, bl);
2500 encode(num_evict_kb, bl);
2501 encode(num_promote, bl);
2502 encode(num_flush_mode_high, bl);
2503 encode(num_flush_mode_low, bl);
2504 encode(num_evict_mode_some, bl);
2505 encode(num_evict_mode_full, bl);
2506 encode(num_objects_pinned, bl);
2507 encode(num_objects_missing, bl);
2508 encode(num_legacy_snapsets, bl);
2509 encode(num_large_omap_objects, bl);
2510 encode(num_objects_manifest, bl);
2511 encode(num_omap_bytes, bl);
2512 encode(num_omap_keys, bl);
2513 encode(num_objects_repaired, bl);
7c673cae
FG
2514#endif
2515 ENCODE_FINISH(bl);
2516}
2517
9f95a23c 2518void object_stat_sum_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
2519{
2520 bool decode_finish = false;
11fdf7f2
TL
2521 static const int STAT_SUM_DECODE_VERSION = 20;
2522 DECODE_START(STAT_SUM_DECODE_VERSION, bl);
7c673cae 2523#if defined(CEPH_LITTLE_ENDIAN)
11fdf7f2 2524 if (struct_v == STAT_SUM_DECODE_VERSION) {
7c673cae
FG
2525 bl.copy(sizeof(object_stat_sum_t), (char*)(&num_bytes));
2526 decode_finish = true;
2527 }
2528#endif
2529 if (!decode_finish) {
11fdf7f2
TL
2530 decode(num_bytes, bl);
2531 decode(num_objects, bl);
2532 decode(num_object_clones, bl);
2533 decode(num_object_copies, bl);
2534 decode(num_objects_missing_on_primary, bl);
2535 decode(num_objects_degraded, bl);
2536 decode(num_objects_unfound, bl);
2537 decode(num_rd, bl);
2538 decode(num_rd_kb, bl);
2539 decode(num_wr, bl);
2540 decode(num_wr_kb, bl);
2541 decode(num_scrub_errors, bl);
2542 decode(num_objects_recovered, bl);
2543 decode(num_bytes_recovered, bl);
2544 decode(num_keys_recovered, bl);
2545 decode(num_shallow_scrub_errors, bl);
2546 decode(num_deep_scrub_errors, bl);
2547 decode(num_objects_dirty, bl);
2548 decode(num_whiteouts, bl);
2549 decode(num_objects_omap, bl);
2550 decode(num_objects_hit_set_archive, bl);
2551 decode(num_objects_misplaced, bl);
2552 decode(num_bytes_hit_set_archive, bl);
2553 decode(num_flush, bl);
2554 decode(num_flush_kb, bl);
2555 decode(num_evict, bl);
2556 decode(num_evict_kb, bl);
2557 decode(num_promote, bl);
2558 decode(num_flush_mode_high, bl);
2559 decode(num_flush_mode_low, bl);
2560 decode(num_evict_mode_some, bl);
2561 decode(num_evict_mode_full, bl);
2562 decode(num_objects_pinned, bl);
2563 decode(num_objects_missing, bl);
7c673cae 2564 if (struct_v >= 16) {
11fdf7f2 2565 decode(num_legacy_snapsets, bl);
7c673cae
FG
2566 } else {
2567 num_legacy_snapsets = num_object_clones; // upper bound
2568 }
28e407b8 2569 if (struct_v >= 17) {
11fdf7f2
TL
2570 decode(num_large_omap_objects, bl);
2571 }
2572 if (struct_v >= 18) {
2573 decode(num_objects_manifest, bl);
2574 }
2575 if (struct_v >= 19) {
2576 decode(num_omap_bytes, bl);
2577 decode(num_omap_keys, bl);
2578 }
2579 if (struct_v >= 20) {
2580 decode(num_objects_repaired, bl);
28e407b8 2581 }
7c673cae
FG
2582 }
2583 DECODE_FINISH(bl);
2584}
2585
2586void object_stat_sum_t::generate_test_instances(list<object_stat_sum_t*>& o)
2587{
2588 object_stat_sum_t a;
2589
2590 a.num_bytes = 1;
2591 a.num_objects = 3;
2592 a.num_object_clones = 4;
2593 a.num_object_copies = 5;
2594 a.num_objects_missing_on_primary = 6;
2595 a.num_objects_missing = 123;
2596 a.num_objects_degraded = 7;
2597 a.num_objects_unfound = 8;
2598 a.num_rd = 9; a.num_rd_kb = 10;
2599 a.num_wr = 11; a.num_wr_kb = 12;
2600 a.num_objects_recovered = 14;
2601 a.num_bytes_recovered = 15;
2602 a.num_keys_recovered = 16;
2603 a.num_deep_scrub_errors = 17;
2604 a.num_shallow_scrub_errors = 18;
2605 a.num_scrub_errors = a.num_deep_scrub_errors + a.num_shallow_scrub_errors;
2606 a.num_objects_dirty = 21;
2607 a.num_whiteouts = 22;
2608 a.num_objects_misplaced = 1232;
2609 a.num_objects_hit_set_archive = 2;
2610 a.num_bytes_hit_set_archive = 27;
2611 a.num_flush = 5;
2612 a.num_flush_kb = 6;
2613 a.num_evict = 7;
2614 a.num_evict_kb = 8;
2615 a.num_promote = 9;
2616 a.num_flush_mode_high = 0;
2617 a.num_flush_mode_low = 1;
2618 a.num_evict_mode_some = 1;
2619 a.num_evict_mode_full = 0;
2620 a.num_objects_pinned = 20;
28e407b8 2621 a.num_large_omap_objects = 5;
11fdf7f2
TL
2622 a.num_objects_manifest = 2;
2623 a.num_omap_bytes = 20000;
2624 a.num_omap_keys = 200;
2625 a.num_objects_repaired = 300;
7c673cae
FG
2626 o.push_back(new object_stat_sum_t(a));
2627}
2628
2629void object_stat_sum_t::add(const object_stat_sum_t& o)
2630{
2631 num_bytes += o.num_bytes;
2632 num_objects += o.num_objects;
2633 num_object_clones += o.num_object_clones;
2634 num_object_copies += o.num_object_copies;
2635 num_objects_missing_on_primary += o.num_objects_missing_on_primary;
2636 num_objects_missing += o.num_objects_missing;
2637 num_objects_degraded += o.num_objects_degraded;
2638 num_objects_misplaced += o.num_objects_misplaced;
2639 num_rd += o.num_rd;
2640 num_rd_kb += o.num_rd_kb;
2641 num_wr += o.num_wr;
2642 num_wr_kb += o.num_wr_kb;
2643 num_objects_unfound += o.num_objects_unfound;
2644 num_scrub_errors += o.num_scrub_errors;
2645 num_shallow_scrub_errors += o.num_shallow_scrub_errors;
2646 num_deep_scrub_errors += o.num_deep_scrub_errors;
2647 num_objects_recovered += o.num_objects_recovered;
2648 num_bytes_recovered += o.num_bytes_recovered;
2649 num_keys_recovered += o.num_keys_recovered;
2650 num_objects_dirty += o.num_objects_dirty;
2651 num_whiteouts += o.num_whiteouts;
2652 num_objects_omap += o.num_objects_omap;
2653 num_objects_hit_set_archive += o.num_objects_hit_set_archive;
2654 num_bytes_hit_set_archive += o.num_bytes_hit_set_archive;
2655 num_flush += o.num_flush;
2656 num_flush_kb += o.num_flush_kb;
2657 num_evict += o.num_evict;
2658 num_evict_kb += o.num_evict_kb;
2659 num_promote += o.num_promote;
2660 num_flush_mode_high += o.num_flush_mode_high;
2661 num_flush_mode_low += o.num_flush_mode_low;
2662 num_evict_mode_some += o.num_evict_mode_some;
2663 num_evict_mode_full += o.num_evict_mode_full;
2664 num_objects_pinned += o.num_objects_pinned;
2665 num_legacy_snapsets += o.num_legacy_snapsets;
28e407b8 2666 num_large_omap_objects += o.num_large_omap_objects;
11fdf7f2
TL
2667 num_objects_manifest += o.num_objects_manifest;
2668 num_omap_bytes += o.num_omap_bytes;
2669 num_omap_keys += o.num_omap_keys;
2670 num_objects_repaired += o.num_objects_repaired;
7c673cae
FG
2671}
2672
2673void object_stat_sum_t::sub(const object_stat_sum_t& o)
2674{
2675 num_bytes -= o.num_bytes;
2676 num_objects -= o.num_objects;
2677 num_object_clones -= o.num_object_clones;
2678 num_object_copies -= o.num_object_copies;
2679 num_objects_missing_on_primary -= o.num_objects_missing_on_primary;
2680 num_objects_missing -= o.num_objects_missing;
2681 num_objects_degraded -= o.num_objects_degraded;
2682 num_objects_misplaced -= o.num_objects_misplaced;
2683 num_rd -= o.num_rd;
2684 num_rd_kb -= o.num_rd_kb;
2685 num_wr -= o.num_wr;
2686 num_wr_kb -= o.num_wr_kb;
2687 num_objects_unfound -= o.num_objects_unfound;
2688 num_scrub_errors -= o.num_scrub_errors;
2689 num_shallow_scrub_errors -= o.num_shallow_scrub_errors;
2690 num_deep_scrub_errors -= o.num_deep_scrub_errors;
2691 num_objects_recovered -= o.num_objects_recovered;
2692 num_bytes_recovered -= o.num_bytes_recovered;
2693 num_keys_recovered -= o.num_keys_recovered;
2694 num_objects_dirty -= o.num_objects_dirty;
2695 num_whiteouts -= o.num_whiteouts;
2696 num_objects_omap -= o.num_objects_omap;
2697 num_objects_hit_set_archive -= o.num_objects_hit_set_archive;
2698 num_bytes_hit_set_archive -= o.num_bytes_hit_set_archive;
2699 num_flush -= o.num_flush;
2700 num_flush_kb -= o.num_flush_kb;
2701 num_evict -= o.num_evict;
2702 num_evict_kb -= o.num_evict_kb;
2703 num_promote -= o.num_promote;
2704 num_flush_mode_high -= o.num_flush_mode_high;
2705 num_flush_mode_low -= o.num_flush_mode_low;
2706 num_evict_mode_some -= o.num_evict_mode_some;
2707 num_evict_mode_full -= o.num_evict_mode_full;
2708 num_objects_pinned -= o.num_objects_pinned;
2709 num_legacy_snapsets -= o.num_legacy_snapsets;
28e407b8 2710 num_large_omap_objects -= o.num_large_omap_objects;
11fdf7f2
TL
2711 num_objects_manifest -= o.num_objects_manifest;
2712 num_omap_bytes -= o.num_omap_bytes;
2713 num_omap_keys -= o.num_omap_keys;
2714 num_objects_repaired -= o.num_objects_repaired;
7c673cae
FG
2715}
2716
2717bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r)
2718{
2719 return
2720 l.num_bytes == r.num_bytes &&
2721 l.num_objects == r.num_objects &&
2722 l.num_object_clones == r.num_object_clones &&
2723 l.num_object_copies == r.num_object_copies &&
2724 l.num_objects_missing_on_primary == r.num_objects_missing_on_primary &&
2725 l.num_objects_missing == r.num_objects_missing &&
2726 l.num_objects_degraded == r.num_objects_degraded &&
2727 l.num_objects_misplaced == r.num_objects_misplaced &&
2728 l.num_objects_unfound == r.num_objects_unfound &&
2729 l.num_rd == r.num_rd &&
2730 l.num_rd_kb == r.num_rd_kb &&
2731 l.num_wr == r.num_wr &&
2732 l.num_wr_kb == r.num_wr_kb &&
2733 l.num_scrub_errors == r.num_scrub_errors &&
2734 l.num_shallow_scrub_errors == r.num_shallow_scrub_errors &&
2735 l.num_deep_scrub_errors == r.num_deep_scrub_errors &&
2736 l.num_objects_recovered == r.num_objects_recovered &&
2737 l.num_bytes_recovered == r.num_bytes_recovered &&
2738 l.num_keys_recovered == r.num_keys_recovered &&
2739 l.num_objects_dirty == r.num_objects_dirty &&
2740 l.num_whiteouts == r.num_whiteouts &&
2741 l.num_objects_omap == r.num_objects_omap &&
2742 l.num_objects_hit_set_archive == r.num_objects_hit_set_archive &&
2743 l.num_bytes_hit_set_archive == r.num_bytes_hit_set_archive &&
2744 l.num_flush == r.num_flush &&
2745 l.num_flush_kb == r.num_flush_kb &&
2746 l.num_evict == r.num_evict &&
2747 l.num_evict_kb == r.num_evict_kb &&
2748 l.num_promote == r.num_promote &&
2749 l.num_flush_mode_high == r.num_flush_mode_high &&
2750 l.num_flush_mode_low == r.num_flush_mode_low &&
2751 l.num_evict_mode_some == r.num_evict_mode_some &&
2752 l.num_evict_mode_full == r.num_evict_mode_full &&
2753 l.num_objects_pinned == r.num_objects_pinned &&
28e407b8 2754 l.num_legacy_snapsets == r.num_legacy_snapsets &&
11fdf7f2
TL
2755 l.num_large_omap_objects == r.num_large_omap_objects &&
2756 l.num_objects_manifest == r.num_objects_manifest &&
2757 l.num_omap_bytes == r.num_omap_bytes &&
2758 l.num_omap_keys == r.num_omap_keys &&
2759 l.num_objects_repaired == r.num_objects_repaired;
7c673cae
FG
2760}
2761
2762// -- object_stat_collection_t --
2763
2764void object_stat_collection_t::dump(Formatter *f) const
2765{
2766 f->open_object_section("stat_sum");
2767 sum.dump(f);
2768 f->close_section();
2769}
2770
9f95a23c 2771void object_stat_collection_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
2772{
2773 ENCODE_START(2, 2, bl);
11fdf7f2
TL
2774 encode(sum, bl);
2775 encode((__u32)0, bl);
7c673cae
FG
2776 ENCODE_FINISH(bl);
2777}
2778
9f95a23c 2779void object_stat_collection_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
2780{
2781 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
11fdf7f2 2782 decode(sum, bl);
7c673cae
FG
2783 {
2784 map<string,object_stat_sum_t> cat_sum;
11fdf7f2 2785 decode(cat_sum, bl);
7c673cae
FG
2786 }
2787 DECODE_FINISH(bl);
2788}
2789
2790void object_stat_collection_t::generate_test_instances(list<object_stat_collection_t*>& o)
2791{
2792 object_stat_collection_t a;
2793 o.push_back(new object_stat_collection_t(a));
2794 list<object_stat_sum_t*> l;
2795 object_stat_sum_t::generate_test_instances(l);
9f95a23c 2796 for (auto p = l.begin(); p != l.end(); ++p) {
7c673cae
FG
2797 a.add(**p);
2798 o.push_back(new object_stat_collection_t(a));
2799 }
2800}
2801
2802
2803// -- pg_stat_t --
2804
2805bool pg_stat_t::is_acting_osd(int32_t osd, bool primary) const
2806{
2807 if (primary && osd == acting_primary) {
2808 return true;
2809 } else if (!primary) {
9f95a23c 2810 for(auto it = acting.cbegin(); it != acting.cend(); ++it)
7c673cae
FG
2811 {
2812 if (*it == osd)
2813 return true;
2814 }
2815 }
2816 return false;
2817}
2818
2819void pg_stat_t::dump(Formatter *f) const
2820{
2821 f->dump_stream("version") << version;
2822 f->dump_stream("reported_seq") << reported_seq;
2823 f->dump_stream("reported_epoch") << reported_epoch;
2824 f->dump_string("state", pg_state_string(state));
2825 f->dump_stream("last_fresh") << last_fresh;
2826 f->dump_stream("last_change") << last_change;
2827 f->dump_stream("last_active") << last_active;
2828 f->dump_stream("last_peered") << last_peered;
2829 f->dump_stream("last_clean") << last_clean;
2830 f->dump_stream("last_became_active") << last_became_active;
2831 f->dump_stream("last_became_peered") << last_became_peered;
2832 f->dump_stream("last_unstale") << last_unstale;
2833 f->dump_stream("last_undegraded") << last_undegraded;
2834 f->dump_stream("last_fullsized") << last_fullsized;
2835 f->dump_unsigned("mapping_epoch", mapping_epoch);
2836 f->dump_stream("log_start") << log_start;
2837 f->dump_stream("ondisk_log_start") << ondisk_log_start;
2838 f->dump_unsigned("created", created);
2839 f->dump_unsigned("last_epoch_clean", last_epoch_clean);
2840 f->dump_stream("parent") << parent;
2841 f->dump_unsigned("parent_split_bits", parent_split_bits);
2842 f->dump_stream("last_scrub") << last_scrub;
2843 f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
2844 f->dump_stream("last_deep_scrub") << last_deep_scrub;
2845 f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
2846 f->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp;
2847 f->dump_int("log_size", log_size);
2848 f->dump_int("ondisk_log_size", ondisk_log_size);
2849 f->dump_bool("stats_invalid", stats_invalid);
2850 f->dump_bool("dirty_stats_invalid", dirty_stats_invalid);
2851 f->dump_bool("omap_stats_invalid", omap_stats_invalid);
2852 f->dump_bool("hitset_stats_invalid", hitset_stats_invalid);
2853 f->dump_bool("hitset_bytes_stats_invalid", hitset_bytes_stats_invalid);
2854 f->dump_bool("pin_stats_invalid", pin_stats_invalid);
11fdf7f2 2855 f->dump_bool("manifest_stats_invalid", manifest_stats_invalid);
b32b8144 2856 f->dump_unsigned("snaptrimq_len", snaptrimq_len);
7c673cae
FG
2857 stats.dump(f);
2858 f->open_array_section("up");
9f95a23c 2859 for (auto p = up.cbegin(); p != up.cend(); ++p)
7c673cae
FG
2860 f->dump_int("osd", *p);
2861 f->close_section();
2862 f->open_array_section("acting");
9f95a23c 2863 for (auto p = acting.cbegin(); p != acting.cend(); ++p)
7c673cae
FG
2864 f->dump_int("osd", *p);
2865 f->close_section();
81eedcae
TL
2866 f->open_array_section("avail_no_missing");
2867 for (auto p = avail_no_missing.cbegin(); p != avail_no_missing.cend(); ++p)
2868 f->dump_stream("shard") << *p;
2869 f->close_section();
2870 f->open_array_section("object_location_counts");
2871 for (auto p = object_location_counts.cbegin(); p != object_location_counts.cend(); ++p) {
2872 f->open_object_section("entry");
2873 f->dump_stream("shards") << p->first;
2874 f->dump_int("objects", p->second);
2875 f->close_section();
2876 }
2877 f->close_section();
7c673cae 2878 f->open_array_section("blocked_by");
9f95a23c 2879 for (auto p = blocked_by.cbegin(); p != blocked_by.cend(); ++p)
7c673cae
FG
2880 f->dump_int("osd", *p);
2881 f->close_section();
2882 f->dump_int("up_primary", up_primary);
2883 f->dump_int("acting_primary", acting_primary);
11fdf7f2 2884 f->open_array_section("purged_snaps");
9f95a23c 2885 for (auto i = purged_snaps.begin(); i != purged_snaps.end(); ++i) {
11fdf7f2
TL
2886 f->open_object_section("interval");
2887 f->dump_stream("start") << i.get_start();
2888 f->dump_stream("length") << i.get_len();
2889 f->close_section();
2890 }
2891 f->close_section();
7c673cae
FG
2892}
2893
2894void pg_stat_t::dump_brief(Formatter *f) const
2895{
2896 f->dump_string("state", pg_state_string(state));
2897 f->open_array_section("up");
9f95a23c 2898 for (auto p = up.cbegin(); p != up.cend(); ++p)
7c673cae
FG
2899 f->dump_int("osd", *p);
2900 f->close_section();
2901 f->open_array_section("acting");
9f95a23c 2902 for (auto p = acting.cbegin(); p != acting.cend(); ++p)
7c673cae
FG
2903 f->dump_int("osd", *p);
2904 f->close_section();
2905 f->dump_int("up_primary", up_primary);
2906 f->dump_int("acting_primary", acting_primary);
2907}
2908
9f95a23c 2909void pg_stat_t::encode(ceph::buffer::list &bl) const
7c673cae 2910{
81eedcae 2911 ENCODE_START(26, 22, bl);
11fdf7f2
TL
2912 encode(version, bl);
2913 encode(reported_seq, bl);
2914 encode(reported_epoch, bl);
2915 encode((__u32)state, bl); // for older peers
2916 encode(log_start, bl);
2917 encode(ondisk_log_start, bl);
2918 encode(created, bl);
2919 encode(last_epoch_clean, bl);
2920 encode(parent, bl);
2921 encode(parent_split_bits, bl);
2922 encode(last_scrub, bl);
2923 encode(last_scrub_stamp, bl);
2924 encode(stats, bl);
2925 encode(log_size, bl);
2926 encode(ondisk_log_size, bl);
2927 encode(up, bl);
2928 encode(acting, bl);
2929 encode(last_fresh, bl);
2930 encode(last_change, bl);
2931 encode(last_active, bl);
2932 encode(last_clean, bl);
2933 encode(last_unstale, bl);
2934 encode(mapping_epoch, bl);
2935 encode(last_deep_scrub, bl);
2936 encode(last_deep_scrub_stamp, bl);
2937 encode(stats_invalid, bl);
2938 encode(last_clean_scrub_stamp, bl);
2939 encode(last_became_active, bl);
2940 encode(dirty_stats_invalid, bl);
2941 encode(up_primary, bl);
2942 encode(acting_primary, bl);
2943 encode(omap_stats_invalid, bl);
2944 encode(hitset_stats_invalid, bl);
2945 encode(blocked_by, bl);
2946 encode(last_undegraded, bl);
2947 encode(last_fullsized, bl);
2948 encode(hitset_bytes_stats_invalid, bl);
2949 encode(last_peered, bl);
2950 encode(last_became_peered, bl);
2951 encode(pin_stats_invalid, bl);
2952 encode(snaptrimq_len, bl);
2953 __u32 top_state = (state >> 32);
2954 encode(top_state, bl);
2955 encode(purged_snaps, bl);
2956 encode(manifest_stats_invalid, bl);
81eedcae
TL
2957 encode(avail_no_missing, bl);
2958 encode(object_location_counts, bl);
7c673cae
FG
2959 ENCODE_FINISH(bl);
2960}
2961
9f95a23c 2962void pg_stat_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
2963{
2964 bool tmp;
11fdf7f2 2965 uint32_t old_state;
81eedcae 2966 DECODE_START(26, bl);
11fdf7f2
TL
2967 decode(version, bl);
2968 decode(reported_seq, bl);
2969 decode(reported_epoch, bl);
2970 decode(old_state, bl);
2971 decode(log_start, bl);
2972 decode(ondisk_log_start, bl);
2973 decode(created, bl);
2974 decode(last_epoch_clean, bl);
2975 decode(parent, bl);
2976 decode(parent_split_bits, bl);
2977 decode(last_scrub, bl);
2978 decode(last_scrub_stamp, bl);
2979 decode(stats, bl);
2980 decode(log_size, bl);
2981 decode(ondisk_log_size, bl);
2982 decode(up, bl);
2983 decode(acting, bl);
2984 decode(last_fresh, bl);
2985 decode(last_change, bl);
2986 decode(last_active, bl);
2987 decode(last_clean, bl);
2988 decode(last_unstale, bl);
2989 decode(mapping_epoch, bl);
2990 decode(last_deep_scrub, bl);
2991 decode(last_deep_scrub_stamp, bl);
2992 decode(tmp, bl);
7c673cae 2993 stats_invalid = tmp;
11fdf7f2
TL
2994 decode(last_clean_scrub_stamp, bl);
2995 decode(last_became_active, bl);
2996 decode(tmp, bl);
7c673cae 2997 dirty_stats_invalid = tmp;
11fdf7f2
TL
2998 decode(up_primary, bl);
2999 decode(acting_primary, bl);
3000 decode(tmp, bl);
7c673cae 3001 omap_stats_invalid = tmp;
11fdf7f2 3002 decode(tmp, bl);
7c673cae 3003 hitset_stats_invalid = tmp;
11fdf7f2
TL
3004 decode(blocked_by, bl);
3005 decode(last_undegraded, bl);
3006 decode(last_fullsized, bl);
3007 decode(tmp, bl);
7c673cae 3008 hitset_bytes_stats_invalid = tmp;
11fdf7f2
TL
3009 decode(last_peered, bl);
3010 decode(last_became_peered, bl);
3011 decode(tmp, bl);
7c673cae 3012 pin_stats_invalid = tmp;
b32b8144 3013 if (struct_v >= 23) {
11fdf7f2
TL
3014 decode(snaptrimq_len, bl);
3015 if (struct_v >= 24) {
3016 __u32 top_state;
3017 decode(top_state, bl);
3018 state = (uint64_t)old_state | ((uint64_t)top_state << 32);
3019 decode(purged_snaps, bl);
3020 } else {
3021 state = old_state;
3022 }
3023 if (struct_v >= 25) {
3024 decode(tmp, bl);
3025 manifest_stats_invalid = tmp;
3026 } else {
3027 manifest_stats_invalid = true;
3028 }
81eedcae
TL
3029 if (struct_v >= 26) {
3030 decode(avail_no_missing, bl);
3031 decode(object_location_counts, bl);
3032 }
b32b8144 3033 }
7c673cae
FG
3034 DECODE_FINISH(bl);
3035}
3036
3037void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o)
3038{
3039 pg_stat_t a;
3040 o.push_back(new pg_stat_t(a));
3041
3042 a.version = eversion_t(1, 3);
3043 a.reported_epoch = 1;
3044 a.reported_seq = 2;
3045 a.state = 123;
3046 a.mapping_epoch = 998;
3047 a.last_fresh = utime_t(1002, 1);
3048 a.last_change = utime_t(1002, 2);
3049 a.last_active = utime_t(1002, 3);
3050 a.last_clean = utime_t(1002, 4);
3051 a.last_unstale = utime_t(1002, 5);
3052 a.last_undegraded = utime_t(1002, 7);
3053 a.last_fullsized = utime_t(1002, 8);
3054 a.log_start = eversion_t(1, 4);
3055 a.ondisk_log_start = eversion_t(1, 5);
3056 a.created = 6;
3057 a.last_epoch_clean = 7;
11fdf7f2 3058 a.parent = pg_t(1, 2);
7c673cae
FG
3059 a.parent_split_bits = 12;
3060 a.last_scrub = eversion_t(9, 10);
3061 a.last_scrub_stamp = utime_t(11, 12);
3062 a.last_deep_scrub = eversion_t(13, 14);
3063 a.last_deep_scrub_stamp = utime_t(15, 16);
3064 a.last_clean_scrub_stamp = utime_t(17, 18);
b32b8144 3065 a.snaptrimq_len = 1048576;
7c673cae
FG
3066 list<object_stat_collection_t*> l;
3067 object_stat_collection_t::generate_test_instances(l);
3068 a.stats = *l.back();
3069 a.log_size = 99;
3070 a.ondisk_log_size = 88;
3071 a.up.push_back(123);
3072 a.up_primary = 123;
3073 a.acting.push_back(456);
81eedcae
TL
3074 a.avail_no_missing.push_back(pg_shard_t(456, shard_id_t::NO_SHARD));
3075 set<pg_shard_t> sset = { pg_shard_t(0), pg_shard_t(1) };
3076 a.object_location_counts.insert(make_pair(sset, 10));
3077 sset.insert(pg_shard_t(2));
3078 a.object_location_counts.insert(make_pair(sset, 5));
7c673cae
FG
3079 a.acting_primary = 456;
3080 o.push_back(new pg_stat_t(a));
3081
3082 a.up.push_back(124);
3083 a.up_primary = 124;
3084 a.acting.push_back(124);
3085 a.acting_primary = 124;
3086 a.blocked_by.push_back(155);
3087 a.blocked_by.push_back(156);
3088 o.push_back(new pg_stat_t(a));
3089}
3090
3091bool operator==(const pg_stat_t& l, const pg_stat_t& r)
3092{
3093 return
3094 l.version == r.version &&
3095 l.reported_seq == r.reported_seq &&
3096 l.reported_epoch == r.reported_epoch &&
3097 l.state == r.state &&
3098 l.last_fresh == r.last_fresh &&
3099 l.last_change == r.last_change &&
3100 l.last_active == r.last_active &&
3101 l.last_peered == r.last_peered &&
3102 l.last_clean == r.last_clean &&
3103 l.last_unstale == r.last_unstale &&
3104 l.last_undegraded == r.last_undegraded &&
3105 l.last_fullsized == r.last_fullsized &&
3106 l.log_start == r.log_start &&
3107 l.ondisk_log_start == r.ondisk_log_start &&
3108 l.created == r.created &&
3109 l.last_epoch_clean == r.last_epoch_clean &&
3110 l.parent == r.parent &&
3111 l.parent_split_bits == r.parent_split_bits &&
3112 l.last_scrub == r.last_scrub &&
3113 l.last_deep_scrub == r.last_deep_scrub &&
3114 l.last_scrub_stamp == r.last_scrub_stamp &&
3115 l.last_deep_scrub_stamp == r.last_deep_scrub_stamp &&
3116 l.last_clean_scrub_stamp == r.last_clean_scrub_stamp &&
3117 l.stats == r.stats &&
3118 l.stats_invalid == r.stats_invalid &&
3119 l.log_size == r.log_size &&
3120 l.ondisk_log_size == r.ondisk_log_size &&
3121 l.up == r.up &&
3122 l.acting == r.acting &&
81eedcae
TL
3123 l.avail_no_missing == r.avail_no_missing &&
3124 l.object_location_counts == r.object_location_counts &&
7c673cae
FG
3125 l.mapping_epoch == r.mapping_epoch &&
3126 l.blocked_by == r.blocked_by &&
3127 l.last_became_active == r.last_became_active &&
3128 l.last_became_peered == r.last_became_peered &&
3129 l.dirty_stats_invalid == r.dirty_stats_invalid &&
3130 l.omap_stats_invalid == r.omap_stats_invalid &&
3131 l.hitset_stats_invalid == r.hitset_stats_invalid &&
3132 l.hitset_bytes_stats_invalid == r.hitset_bytes_stats_invalid &&
3133 l.up_primary == r.up_primary &&
3134 l.acting_primary == r.acting_primary &&
b32b8144 3135 l.pin_stats_invalid == r.pin_stats_invalid &&
11fdf7f2
TL
3136 l.manifest_stats_invalid == r.manifest_stats_invalid &&
3137 l.purged_snaps == r.purged_snaps &&
b32b8144 3138 l.snaptrimq_len == r.snaptrimq_len;
7c673cae
FG
3139}
3140
11fdf7f2
TL
3141// -- store_statfs_t --
3142
3143bool store_statfs_t::operator==(const store_statfs_t& other) const
3144{
3145 return total == other.total
3146 && available == other.available
3147 && allocated == other.allocated
3148 && internally_reserved == other.internally_reserved
3149 && data_stored == other.data_stored
3150 && data_compressed == other.data_compressed
3151 && data_compressed_allocated == other.data_compressed_allocated
3152 && data_compressed_original == other.data_compressed_original
3153 && omap_allocated == other.omap_allocated
3154 && internal_metadata == other.internal_metadata;
3155}
3156
3157void store_statfs_t::dump(Formatter *f) const
3158{
3159 f->dump_int("total", total);
3160 f->dump_int("available", available);
3161 f->dump_int("internally_reserved", internally_reserved);
3162 f->dump_int("allocated", allocated);
3163 f->dump_int("data_stored", data_stored);
3164 f->dump_int("data_compressed", data_compressed);
3165 f->dump_int("data_compressed_allocated", data_compressed_allocated);
3166 f->dump_int("data_compressed_original", data_compressed_original);
3167 f->dump_int("omap_allocated", omap_allocated);
3168 f->dump_int("internal_metadata", internal_metadata);
3169}
3170
3171ostream& operator<<(ostream& out, const store_statfs_t &s)
3172{
3173 out << std::hex
3174 << "store_statfs(0x" << s.available
3175 << "/0x" << s.internally_reserved
3176 << "/0x" << s.total
3177 << ", data 0x" << s.data_stored
3178 << "/0x" << s.allocated
3179 << ", compress 0x" << s.data_compressed
3180 << "/0x" << s.data_compressed_allocated
3181 << "/0x" << s.data_compressed_original
3182 << ", omap 0x" << s.omap_allocated
3183 << ", meta 0x" << s.internal_metadata
3184 << std::dec
3185 << ")";
3186 return out;
3187}
3188
3189void store_statfs_t::generate_test_instances(list<store_statfs_t*>& o)
3190{
3191 store_statfs_t a;
3192 o.push_back(new store_statfs_t(a));
3193 a.total = 234;
3194 a.available = 123;
3195 a.internally_reserved = 33;
3196 a.allocated = 32;
3197 a.data_stored = 44;
3198 a.data_compressed = 21;
3199 a.data_compressed_allocated = 12;
3200 a.data_compressed_original = 13;
3201 a.omap_allocated = 14;
3202 a.internal_metadata = 15;
3203 o.push_back(new store_statfs_t(a));
3204}
3205
7c673cae
FG
3206// -- pool_stat_t --
3207
3208void pool_stat_t::dump(Formatter *f) const
3209{
3210 stats.dump(f);
11fdf7f2
TL
3211 f->open_object_section("store_stats");
3212 store_stats.dump(f);
3213 f->close_section();
7c673cae
FG
3214 f->dump_int("log_size", log_size);
3215 f->dump_int("ondisk_log_size", ondisk_log_size);
3216 f->dump_int("up", up);
3217 f->dump_int("acting", acting);
eafe8130 3218 f->dump_int("num_store_stats", num_store_stats);
7c673cae
FG
3219}
3220
9f95a23c 3221void pool_stat_t::encode(ceph::buffer::list &bl, uint64_t features) const
7c673cae 3222{
11fdf7f2 3223 using ceph::encode;
7c673cae
FG
3224 if ((features & CEPH_FEATURE_OSDENC) == 0) {
3225 __u8 v = 4;
11fdf7f2
TL
3226 encode(v, bl);
3227 encode(stats, bl);
3228 encode(log_size, bl);
3229 encode(ondisk_log_size, bl);
7c673cae
FG
3230 return;
3231 }
3232
11fdf7f2
TL
3233 ENCODE_START(7, 5, bl);
3234 encode(stats, bl);
3235 encode(log_size, bl);
3236 encode(ondisk_log_size, bl);
3237 encode(up, bl);
3238 encode(acting, bl);
3239 encode(store_stats, bl);
3240 encode(num_store_stats, bl);
7c673cae
FG
3241 ENCODE_FINISH(bl);
3242}
3243
9f95a23c 3244void pool_stat_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 3245{
11fdf7f2 3246 DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl);
7c673cae 3247 if (struct_v >= 4) {
11fdf7f2
TL
3248 decode(stats, bl);
3249 decode(log_size, bl);
3250 decode(ondisk_log_size, bl);
7c673cae 3251 if (struct_v >= 6) {
11fdf7f2
TL
3252 decode(up, bl);
3253 decode(acting, bl);
7c673cae
FG
3254 } else {
3255 up = 0;
3256 acting = 0;
3257 }
11fdf7f2
TL
3258 if (struct_v >= 7) {
3259 decode(store_stats, bl);
3260 decode(num_store_stats, bl);
3261 } else {
3262 store_stats.reset();
3263 num_store_stats = 0;
3264 }
3265
7c673cae 3266 } else {
11fdf7f2 3267 decode(stats.sum.num_bytes, bl);
7c673cae 3268 uint64_t num_kb;
11fdf7f2
TL
3269 decode(num_kb, bl);
3270 decode(stats.sum.num_objects, bl);
3271 decode(stats.sum.num_object_clones, bl);
3272 decode(stats.sum.num_object_copies, bl);
3273 decode(stats.sum.num_objects_missing_on_primary, bl);
3274 decode(stats.sum.num_objects_degraded, bl);
3275 decode(log_size, bl);
3276 decode(ondisk_log_size, bl);
7c673cae 3277 if (struct_v >= 2) {
11fdf7f2
TL
3278 decode(stats.sum.num_rd, bl);
3279 decode(stats.sum.num_rd_kb, bl);
3280 decode(stats.sum.num_wr, bl);
3281 decode(stats.sum.num_wr_kb, bl);
7c673cae
FG
3282 }
3283 if (struct_v >= 3) {
11fdf7f2 3284 decode(stats.sum.num_objects_unfound, bl);
7c673cae
FG
3285 }
3286 }
3287 DECODE_FINISH(bl);
3288}
3289
3290void pool_stat_t::generate_test_instances(list<pool_stat_t*>& o)
3291{
3292 pool_stat_t a;
3293 o.push_back(new pool_stat_t(a));
3294
3295 list<object_stat_collection_t*> l;
3296 object_stat_collection_t::generate_test_instances(l);
11fdf7f2
TL
3297 list<store_statfs_t*> ll;
3298 store_statfs_t::generate_test_instances(ll);
7c673cae 3299 a.stats = *l.back();
11fdf7f2 3300 a.store_stats = *ll.back();
7c673cae
FG
3301 a.log_size = 123;
3302 a.ondisk_log_size = 456;
3303 a.acting = 3;
3304 a.up = 4;
11fdf7f2 3305 a.num_store_stats = 1;
7c673cae
FG
3306 o.push_back(new pool_stat_t(a));
3307}
3308
3309
3310// -- pg_history_t --
3311
9f95a23c 3312void pg_history_t::encode(ceph::buffer::list &bl) const
7c673cae 3313{
9f95a23c 3314 ENCODE_START(10, 4, bl);
11fdf7f2
TL
3315 encode(epoch_created, bl);
3316 encode(last_epoch_started, bl);
3317 encode(last_epoch_clean, bl);
3318 encode(last_epoch_split, bl);
3319 encode(same_interval_since, bl);
3320 encode(same_up_since, bl);
3321 encode(same_primary_since, bl);
3322 encode(last_scrub, bl);
3323 encode(last_scrub_stamp, bl);
3324 encode(last_deep_scrub, bl);
3325 encode(last_deep_scrub_stamp, bl);
3326 encode(last_clean_scrub_stamp, bl);
3327 encode(last_epoch_marked_full, bl);
3328 encode(last_interval_started, bl);
3329 encode(last_interval_clean, bl);
3330 encode(epoch_pool_created, bl);
9f95a23c 3331 encode(prior_readable_until_ub, bl);
7c673cae
FG
3332 ENCODE_FINISH(bl);
3333}
3334
9f95a23c 3335void pg_history_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 3336{
9f95a23c 3337 DECODE_START_LEGACY_COMPAT_LEN(10, 4, 4, bl);
11fdf7f2
TL
3338 decode(epoch_created, bl);
3339 decode(last_epoch_started, bl);
7c673cae 3340 if (struct_v >= 3)
11fdf7f2 3341 decode(last_epoch_clean, bl);
7c673cae
FG
3342 else
3343 last_epoch_clean = last_epoch_started; // careful, it's a lie!
11fdf7f2
TL
3344 decode(last_epoch_split, bl);
3345 decode(same_interval_since, bl);
3346 decode(same_up_since, bl);
3347 decode(same_primary_since, bl);
7c673cae 3348 if (struct_v >= 2) {
11fdf7f2
TL
3349 decode(last_scrub, bl);
3350 decode(last_scrub_stamp, bl);
7c673cae
FG
3351 }
3352 if (struct_v >= 5) {
11fdf7f2
TL
3353 decode(last_deep_scrub, bl);
3354 decode(last_deep_scrub_stamp, bl);
7c673cae
FG
3355 }
3356 if (struct_v >= 6) {
11fdf7f2 3357 decode(last_clean_scrub_stamp, bl);
7c673cae
FG
3358 }
3359 if (struct_v >= 7) {
11fdf7f2 3360 decode(last_epoch_marked_full, bl);
7c673cae
FG
3361 }
3362 if (struct_v >= 8) {
11fdf7f2
TL
3363 decode(last_interval_started, bl);
3364 decode(last_interval_clean, bl);
7c673cae
FG
3365 } else {
3366 if (last_epoch_started >= same_interval_since) {
3367 last_interval_started = same_interval_since;
3368 } else {
3369 last_interval_started = last_epoch_started; // best guess
3370 }
3371 if (last_epoch_clean >= same_interval_since) {
3372 last_interval_clean = same_interval_since;
3373 } else {
3374 last_interval_clean = last_epoch_clean; // best guess
3375 }
3376 }
31f18b77 3377 if (struct_v >= 9) {
11fdf7f2 3378 decode(epoch_pool_created, bl);
31f18b77
FG
3379 } else {
3380 epoch_pool_created = epoch_created;
3381 }
9f95a23c
TL
3382 if (struct_v >= 10) {
3383 decode(prior_readable_until_ub, bl);
3384 }
7c673cae
FG
3385 DECODE_FINISH(bl);
3386}
3387
3388void pg_history_t::dump(Formatter *f) const
3389{
3390 f->dump_int("epoch_created", epoch_created);
31f18b77 3391 f->dump_int("epoch_pool_created", epoch_pool_created);
7c673cae
FG
3392 f->dump_int("last_epoch_started", last_epoch_started);
3393 f->dump_int("last_interval_started", last_interval_started);
3394 f->dump_int("last_epoch_clean", last_epoch_clean);
3395 f->dump_int("last_interval_clean", last_interval_clean);
3396 f->dump_int("last_epoch_split", last_epoch_split);
3397 f->dump_int("last_epoch_marked_full", last_epoch_marked_full);
3398 f->dump_int("same_up_since", same_up_since);
3399 f->dump_int("same_interval_since", same_interval_since);
3400 f->dump_int("same_primary_since", same_primary_since);
3401 f->dump_stream("last_scrub") << last_scrub;
3402 f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
3403 f->dump_stream("last_deep_scrub") << last_deep_scrub;
3404 f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
3405 f->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp;
9f95a23c
TL
3406 f->dump_float(
3407 "prior_readable_until_ub",
3408 std::chrono::duration<double>(prior_readable_until_ub).count());
7c673cae
FG
3409}
3410
3411void pg_history_t::generate_test_instances(list<pg_history_t*>& o)
3412{
3413 o.push_back(new pg_history_t);
3414 o.push_back(new pg_history_t);
3415 o.back()->epoch_created = 1;
31f18b77 3416 o.back()->epoch_pool_created = 1;
7c673cae
FG
3417 o.back()->last_epoch_started = 2;
3418 o.back()->last_interval_started = 2;
3419 o.back()->last_epoch_clean = 3;
3420 o.back()->last_interval_clean = 2;
3421 o.back()->last_epoch_split = 4;
9f95a23c 3422 o.back()->prior_readable_until_ub = make_timespan(3.1415);
7c673cae
FG
3423 o.back()->same_up_since = 5;
3424 o.back()->same_interval_since = 6;
3425 o.back()->same_primary_since = 7;
3426 o.back()->last_scrub = eversion_t(8, 9);
3427 o.back()->last_scrub_stamp = utime_t(10, 11);
3428 o.back()->last_deep_scrub = eversion_t(12, 13);
3429 o.back()->last_deep_scrub_stamp = utime_t(14, 15);
3430 o.back()->last_clean_scrub_stamp = utime_t(16, 17);
3431 o.back()->last_epoch_marked_full = 18;
3432}
3433
3434
3435// -- pg_info_t --
3436
9f95a23c 3437void pg_info_t::encode(ceph::buffer::list &bl) const
7c673cae
FG
3438{
3439 ENCODE_START(32, 26, bl);
11fdf7f2
TL
3440 encode(pgid.pgid, bl);
3441 encode(last_update, bl);
3442 encode(last_complete, bl);
3443 encode(log_tail, bl);
9f95a23c 3444 encode(hobject_t(), bl); // old (nibblewise) last_backfill
11fdf7f2 3445 encode(stats, bl);
7c673cae 3446 history.encode(bl);
11fdf7f2
TL
3447 encode(purged_snaps, bl);
3448 encode(last_epoch_started, bl);
3449 encode(last_user_version, bl);
3450 encode(hit_set, bl);
3451 encode(pgid.shard, bl);
3452 encode(last_backfill, bl);
9f95a23c 3453 encode(true, bl); // was last_backfill_bitwise
11fdf7f2 3454 encode(last_interval_started, bl);
7c673cae
FG
3455 ENCODE_FINISH(bl);
3456}
3457
9f95a23c 3458void pg_info_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
3459{
3460 DECODE_START(32, bl);
11fdf7f2
TL
3461 decode(pgid.pgid, bl);
3462 decode(last_update, bl);
3463 decode(last_complete, bl);
3464 decode(log_tail, bl);
7c673cae
FG
3465 {
3466 hobject_t old_last_backfill;
11fdf7f2 3467 decode(old_last_backfill, bl);
7c673cae 3468 }
11fdf7f2 3469 decode(stats, bl);
7c673cae 3470 history.decode(bl);
11fdf7f2
TL
3471 decode(purged_snaps, bl);
3472 decode(last_epoch_started, bl);
3473 decode(last_user_version, bl);
3474 decode(hit_set, bl);
3475 decode(pgid.shard, bl);
3476 decode(last_backfill, bl);
9f95a23c
TL
3477 {
3478 bool last_backfill_bitwise;
3479 decode(last_backfill_bitwise, bl);
3480 // note: we may see a false value here since the default value for
3481 // the member was false, so it often didn't get set to true until
3482 // peering progressed.
3483 }
7c673cae 3484 if (struct_v >= 32) {
11fdf7f2 3485 decode(last_interval_started, bl);
7c673cae
FG
3486 } else {
3487 last_interval_started = last_epoch_started;
3488 }
3489 DECODE_FINISH(bl);
3490}
3491
3492// -- pg_info_t --
3493
3494void pg_info_t::dump(Formatter *f) const
3495{
3496 f->dump_stream("pgid") << pgid;
3497 f->dump_stream("last_update") << last_update;
3498 f->dump_stream("last_complete") << last_complete;
3499 f->dump_stream("log_tail") << log_tail;
3500 f->dump_int("last_user_version", last_user_version);
3501 f->dump_stream("last_backfill") << last_backfill;
7c673cae
FG
3502 f->open_array_section("purged_snaps");
3503 for (interval_set<snapid_t>::const_iterator i=purged_snaps.begin();
3504 i != purged_snaps.end();
3505 ++i) {
3506 f->open_object_section("purged_snap_interval");
3507 f->dump_stream("start") << i.get_start();
3508 f->dump_stream("length") << i.get_len();
3509 f->close_section();
3510 }
3511 f->close_section();
3512 f->open_object_section("history");
3513 history.dump(f);
3514 f->close_section();
3515 f->open_object_section("stats");
3516 stats.dump(f);
3517 f->close_section();
3518
3519 f->dump_int("empty", is_empty());
3520 f->dump_int("dne", dne());
3521 f->dump_int("incomplete", is_incomplete());
3522 f->dump_int("last_epoch_started", last_epoch_started);
3523
3524 f->open_object_section("hit_set_history");
3525 hit_set.dump(f);
3526 f->close_section();
3527}
3528
3529void pg_info_t::generate_test_instances(list<pg_info_t*>& o)
3530{
3531 o.push_back(new pg_info_t);
3532 o.push_back(new pg_info_t);
3533 list<pg_history_t*> h;
3534 pg_history_t::generate_test_instances(h);
3535 o.back()->history = *h.back();
11fdf7f2 3536 o.back()->pgid = spg_t(pg_t(1, 2), shard_id_t::NO_SHARD);
7c673cae
FG
3537 o.back()->last_update = eversion_t(3, 4);
3538 o.back()->last_complete = eversion_t(5, 6);
3539 o.back()->last_user_version = 2;
3540 o.back()->log_tail = eversion_t(7, 8);
3541 o.back()->last_backfill = hobject_t(object_t("objname"), "key", 123, 456, -1, "");
7c673cae
FG
3542 {
3543 list<pg_stat_t*> s;
3544 pg_stat_t::generate_test_instances(s);
3545 o.back()->stats = *s.back();
3546 }
3547 {
3548 list<pg_hit_set_history_t*> s;
3549 pg_hit_set_history_t::generate_test_instances(s);
3550 o.back()->hit_set = *s.back();
3551 }
3552}
3553
3554// -- pg_notify_t --
9f95a23c 3555void pg_notify_t::encode(ceph::buffer::list &bl) const
7c673cae 3556{
9f95a23c 3557 ENCODE_START(3, 2, bl);
11fdf7f2
TL
3558 encode(query_epoch, bl);
3559 encode(epoch_sent, bl);
3560 encode(info, bl);
3561 encode(to, bl);
3562 encode(from, bl);
9f95a23c 3563 encode(past_intervals, bl);
7c673cae
FG
3564 ENCODE_FINISH(bl);
3565}
3566
9f95a23c 3567void pg_notify_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 3568{
9f95a23c 3569 DECODE_START(3, bl);
11fdf7f2
TL
3570 decode(query_epoch, bl);
3571 decode(epoch_sent, bl);
3572 decode(info, bl);
3573 decode(to, bl);
3574 decode(from, bl);
9f95a23c
TL
3575 if (struct_v >= 3) {
3576 decode(past_intervals, bl);
3577 }
7c673cae
FG
3578 DECODE_FINISH(bl);
3579}
3580
3581void pg_notify_t::dump(Formatter *f) const
3582{
3583 f->dump_int("from", from);
3584 f->dump_int("to", to);
3585 f->dump_unsigned("query_epoch", query_epoch);
3586 f->dump_unsigned("epoch_sent", epoch_sent);
3587 {
3588 f->open_object_section("info");
3589 info.dump(f);
3590 f->close_section();
3591 }
9f95a23c 3592 f->dump_object("past_intervals", past_intervals);
7c673cae
FG
3593}
3594
3595void pg_notify_t::generate_test_instances(list<pg_notify_t*>& o)
3596{
9f95a23c
TL
3597 o.push_back(new pg_notify_t(shard_id_t(3), shard_id_t::NO_SHARD, 1, 1,
3598 pg_info_t(), PastIntervals()));
3599 o.push_back(new pg_notify_t(shard_id_t(0), shard_id_t(0), 3, 10,
3600 pg_info_t(), PastIntervals()));
7c673cae
FG
3601}
3602
3603ostream &operator<<(ostream &lhs, const pg_notify_t &notify)
3604{
3605 lhs << "(query:" << notify.query_epoch
3606 << " sent:" << notify.epoch_sent
3607 << " " << notify.info;
3608 if (notify.from != shard_id_t::NO_SHARD ||
3609 notify.to != shard_id_t::NO_SHARD)
3610 lhs << " " << (unsigned)notify.from
3611 << "->" << (unsigned)notify.to;
9f95a23c 3612 lhs << " " << notify.past_intervals;
7c673cae
FG
3613 return lhs << ")";
3614}
3615
3616// -- pg_interval_t --
3617
9f95a23c 3618void PastIntervals::pg_interval_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
3619{
3620 ENCODE_START(4, 2, bl);
11fdf7f2
TL
3621 encode(first, bl);
3622 encode(last, bl);
3623 encode(up, bl);
3624 encode(acting, bl);
3625 encode(maybe_went_rw, bl);
3626 encode(primary, bl);
3627 encode(up_primary, bl);
7c673cae
FG
3628 ENCODE_FINISH(bl);
3629}
3630
9f95a23c 3631void PastIntervals::pg_interval_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
3632{
3633 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
11fdf7f2
TL
3634 decode(first, bl);
3635 decode(last, bl);
3636 decode(up, bl);
3637 decode(acting, bl);
3638 decode(maybe_went_rw, bl);
7c673cae 3639 if (struct_v >= 3) {
11fdf7f2 3640 decode(primary, bl);
7c673cae
FG
3641 } else {
3642 if (acting.size())
3643 primary = acting[0];
3644 }
3645 if (struct_v >= 4) {
11fdf7f2 3646 decode(up_primary, bl);
7c673cae
FG
3647 } else {
3648 if (up.size())
3649 up_primary = up[0];
3650 }
3651 DECODE_FINISH(bl);
3652}
3653
3654void PastIntervals::pg_interval_t::dump(Formatter *f) const
3655{
3656 f->dump_unsigned("first", first);
3657 f->dump_unsigned("last", last);
3658 f->dump_int("maybe_went_rw", maybe_went_rw ? 1 : 0);
3659 f->open_array_section("up");
9f95a23c 3660 for (auto p = up.cbegin(); p != up.cend(); ++p)
7c673cae
FG
3661 f->dump_int("osd", *p);
3662 f->close_section();
3663 f->open_array_section("acting");
9f95a23c 3664 for (auto p = acting.cbegin(); p != acting.cend(); ++p)
7c673cae
FG
3665 f->dump_int("osd", *p);
3666 f->close_section();
3667 f->dump_int("primary", primary);
3668 f->dump_int("up_primary", up_primary);
3669}
3670
3671void PastIntervals::pg_interval_t::generate_test_instances(list<pg_interval_t*>& o)
3672{
3673 o.push_back(new pg_interval_t);
3674 o.push_back(new pg_interval_t);
3675 o.back()->up.push_back(1);
3676 o.back()->acting.push_back(2);
3677 o.back()->acting.push_back(3);
3678 o.back()->first = 4;
3679 o.back()->last = 5;
3680 o.back()->maybe_went_rw = true;
3681}
3682
3683WRITE_CLASS_ENCODER(PastIntervals::pg_interval_t)
3684
7c673cae
FG
3685
3686/**
3687 * pi_compact_rep
3688 *
3689 * PastIntervals only needs to be able to answer two questions:
3690 * 1) Where should the primary look for unfound objects?
3691 * 2) List a set of subsets of the OSDs such that contacting at least
11fdf7f2 3692 * one from each subset guarantees we speak to at least one witness
7c673cae
FG
3693 * of any completed write.
3694 *
3695 * Crucially, 2) does not require keeping *all* past intervals. Certainly,
3696 * we don't need to keep any where maybe_went_rw would be false. We also
3697 * needn't keep two intervals where the actingset in one is a subset
3698 * of the other (only need to keep the smaller of the two sets). In order
3699 * to accurately trim the set of intervals as last_epoch_started changes
3700 * without rebuilding the set from scratch, we'll retain the larger set
3701 * if it in an older interval.
3702 */
3703struct compact_interval_t {
3704 epoch_t first;
3705 epoch_t last;
3706 set<pg_shard_t> acting;
3707 bool supersedes(const compact_interval_t &other) {
3708 for (auto &&i: acting) {
3709 if (!other.acting.count(i))
3710 return false;
3711 }
3712 return true;
3713 }
3714 void dump(Formatter *f) const {
3715 f->open_object_section("compact_interval_t");
3716 f->dump_stream("first") << first;
3717 f->dump_stream("last") << last;
3718 f->dump_stream("acting") << acting;
3719 f->close_section();
3720 }
9f95a23c 3721 void encode(ceph::buffer::list &bl) const {
7c673cae 3722 ENCODE_START(1, 1, bl);
11fdf7f2
TL
3723 encode(first, bl);
3724 encode(last, bl);
3725 encode(acting, bl);
7c673cae
FG
3726 ENCODE_FINISH(bl);
3727 }
9f95a23c 3728 void decode(ceph::buffer::list::const_iterator &bl) {
7c673cae 3729 DECODE_START(1, bl);
11fdf7f2
TL
3730 decode(first, bl);
3731 decode(last, bl);
3732 decode(acting, bl);
7c673cae
FG
3733 DECODE_FINISH(bl);
3734 }
3735 static void generate_test_instances(list<compact_interval_t*> & o) {
3736 /* Not going to be used, we'll generate pi_compact_rep directly */
3737 }
3738};
3739ostream &operator<<(ostream &o, const compact_interval_t &rhs)
3740{
3741 return o << "([" << rhs.first << "," << rhs.last
3742 << "] acting " << rhs.acting << ")";
3743}
3744WRITE_CLASS_ENCODER(compact_interval_t)
3745
3746class pi_compact_rep : public PastIntervals::interval_rep {
3747 epoch_t first = 0;
3748 epoch_t last = 0; // inclusive
3749 set<pg_shard_t> all_participants;
3750 list<compact_interval_t> intervals;
3751 pi_compact_rep(
3752 bool ec_pool,
3753 std::list<PastIntervals::pg_interval_t> &&intervals) {
3754 for (auto &&i: intervals)
3755 add_interval(ec_pool, i);
3756 }
3757public:
3758 pi_compact_rep() = default;
3759 pi_compact_rep(const pi_compact_rep &) = default;
3760 pi_compact_rep(pi_compact_rep &&) = default;
3761 pi_compact_rep &operator=(const pi_compact_rep &) = default;
3762 pi_compact_rep &operator=(pi_compact_rep &&) = default;
3763
3764 size_t size() const override { return intervals.size(); }
3765 bool empty() const override {
3766 return first > last || (first == 0 && last == 0);
3767 }
3768 void clear() override {
3769 *this = pi_compact_rep();
3770 }
3771 pair<epoch_t, epoch_t> get_bounds() const override {
3772 return make_pair(first, last + 1);
3773 }
f67539c2 3774 void adjust_start_backwards(epoch_t last_epoch_clean) override {
11fdf7f2
TL
3775 first = last_epoch_clean;
3776 }
3777
7c673cae
FG
3778 set<pg_shard_t> get_all_participants(
3779 bool ec_pool) const override {
3780 return all_participants;
3781 }
3782 void add_interval(
3783 bool ec_pool, const PastIntervals::pg_interval_t &interval) override {
3784 if (first == 0)
3785 first = interval.first;
11fdf7f2 3786 ceph_assert(interval.last > last);
7c673cae
FG
3787 last = interval.last;
3788 set<pg_shard_t> acting;
3789 for (unsigned i = 0; i < interval.acting.size(); ++i) {
3790 if (interval.acting[i] == CRUSH_ITEM_NONE)
3791 continue;
3792 acting.insert(
3793 pg_shard_t(
3794 interval.acting[i],
3795 ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD));
3796 }
3797 all_participants.insert(acting.begin(), acting.end());
3798 if (!interval.maybe_went_rw)
3799 return;
3800 intervals.push_back(
3801 compact_interval_t{interval.first, interval.last, acting});
3802 auto plast = intervals.end();
3803 --plast;
3804 for (auto cur = intervals.begin(); cur != plast; ) {
3805 if (plast->supersedes(*cur)) {
3806 intervals.erase(cur++);
3807 } else {
3808 ++cur;
3809 }
3810 }
3811 }
3812 unique_ptr<PastIntervals::interval_rep> clone() const override {
3813 return unique_ptr<PastIntervals::interval_rep>(new pi_compact_rep(*this));
3814 }
3815 ostream &print(ostream &out) const override {
3816 return out << "([" << first << "," << last
9f95a23c
TL
3817 << "] all_participants=" << all_participants
3818 << " intervals=" << intervals << ")";
7c673cae 3819 }
9f95a23c 3820 void encode(ceph::buffer::list &bl) const override {
7c673cae 3821 ENCODE_START(1, 1, bl);
11fdf7f2
TL
3822 encode(first, bl);
3823 encode(last, bl);
3824 encode(all_participants, bl);
3825 encode(intervals, bl);
7c673cae
FG
3826 ENCODE_FINISH(bl);
3827 }
9f95a23c 3828 void decode(ceph::buffer::list::const_iterator &bl) override {
7c673cae 3829 DECODE_START(1, bl);
11fdf7f2
TL
3830 decode(first, bl);
3831 decode(last, bl);
3832 decode(all_participants, bl);
3833 decode(intervals, bl);
7c673cae
FG
3834 DECODE_FINISH(bl);
3835 }
3836 void dump(Formatter *f) const override {
3837 f->open_object_section("PastIntervals::compact_rep");
3838 f->dump_stream("first") << first;
3839 f->dump_stream("last") << last;
3840 f->open_array_section("all_participants");
3841 for (auto& i : all_participants) {
3842 f->dump_object("pg_shard", i);
3843 }
3844 f->close_section();
3845 f->open_array_section("intervals");
3846 for (auto &&i: intervals) {
3847 i.dump(f);
3848 }
3849 f->close_section();
3850 f->close_section();
3851 }
7c673cae
FG
3852 static void generate_test_instances(list<pi_compact_rep*> &o) {
3853 using ival = PastIntervals::pg_interval_t;
3854 using ivallst = std::list<ival>;
3855 o.push_back(
3856 new pi_compact_rep(
3857 true, ivallst
3858 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3859 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3860 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3861 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3862 }));
3863 o.push_back(
3864 new pi_compact_rep(
3865 false, ivallst
3866 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3867 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3868 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3869 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3870 }));
3871 o.push_back(
3872 new pi_compact_rep(
3873 true, ivallst
3874 { ival{{2, 1, 0}, {2, 1, 0}, 10, 20, true, 1, 1}
3875 , ival{{ 0, 2}, { 0, 2}, 21, 30, true, 0, 0}
3876 , ival{{ 0, 2}, {2, 0}, 31, 35, true, 2, 2}
3877 , ival{{ 0, 2}, { 0, 2}, 36, 50, true, 0, 0}
3878 }));
3879 }
3880 void iterate_mayberw_back_to(
7c673cae
FG
3881 epoch_t les,
3882 std::function<void(epoch_t, const set<pg_shard_t> &)> &&f) const override {
3883 for (auto i = intervals.rbegin(); i != intervals.rend(); ++i) {
3884 if (i->last < les)
3885 break;
3886 f(i->first, i->acting);
3887 }
3888 }
3889 virtual ~pi_compact_rep() override {}
3890};
3891WRITE_CLASS_ENCODER(pi_compact_rep)
3892
11fdf7f2
TL
3893PastIntervals::PastIntervals()
3894{
3895 past_intervals.reset(new pi_compact_rep);
3896}
3897
7c673cae
FG
3898PastIntervals::PastIntervals(const PastIntervals &rhs)
3899 : past_intervals(rhs.past_intervals ?
3900 rhs.past_intervals->clone() :
3901 nullptr) {}
3902
3903PastIntervals &PastIntervals::operator=(const PastIntervals &rhs)
3904{
3905 PastIntervals other(rhs);
31f18b77 3906 swap(other);
7c673cae
FG
3907 return *this;
3908}
3909
3910ostream& operator<<(ostream& out, const PastIntervals &i)
3911{
3912 if (i.past_intervals) {
3913 return i.past_intervals->print(out);
3914 } else {
3915 return out << "(empty)";
3916 }
3917}
3918
3919ostream& operator<<(ostream& out, const PastIntervals::PriorSet &i)
3920{
3921 return out << "PriorSet("
3922 << "ec_pool: " << i.ec_pool
3923 << ", probe: " << i.probe
3924 << ", down: " << i.down
3925 << ", blocked_by: " << i.blocked_by
3926 << ", pg_down: " << i.pg_down
3927 << ")";
3928}
3929
9f95a23c 3930void PastIntervals::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
3931{
3932 DECODE_START(1, bl);
3933 __u8 type = 0;
11fdf7f2 3934 decode(type, bl);
7c673cae
FG
3935 switch (type) {
3936 case 0:
3937 break;
3938 case 1:
11fdf7f2 3939 ceph_abort_msg("pi_simple_rep support removed post-luminous");
7c673cae
FG
3940 break;
3941 case 2:
3942 past_intervals.reset(new pi_compact_rep);
3943 past_intervals->decode(bl);
3944 break;
3945 }
3946 DECODE_FINISH(bl);
3947}
3948
7c673cae
FG
3949void PastIntervals::generate_test_instances(list<PastIntervals*> &o)
3950{
7c673cae
FG
3951 {
3952 list<pi_compact_rep *> compact;
3953 pi_compact_rep::generate_test_instances(compact);
3954 for (auto &&i: compact) {
3955 // takes ownership of contents
3956 o.push_back(new PastIntervals(i));
3957 }
3958 }
3959 return;
3960}
3961
7c673cae
FG
3962bool PastIntervals::is_new_interval(
3963 int old_acting_primary,
3964 int new_acting_primary,
3965 const vector<int> &old_acting,
3966 const vector<int> &new_acting,
3967 int old_up_primary,
3968 int new_up_primary,
3969 const vector<int> &old_up,
3970 const vector<int> &new_up,
3971 int old_size,
3972 int new_size,
3973 int old_min_size,
3974 int new_min_size,
3975 unsigned old_pg_num,
3976 unsigned new_pg_num,
11fdf7f2
TL
3977 unsigned old_pg_num_pending,
3978 unsigned new_pg_num_pending,
7c673cae
FG
3979 bool old_sort_bitwise,
3980 bool new_sort_bitwise,
c07f9fc5
FG
3981 bool old_recovery_deletes,
3982 bool new_recovery_deletes,
f67539c2
TL
3983 uint32_t old_crush_count,
3984 uint32_t new_crush_count,
3985 uint32_t old_crush_target,
3986 uint32_t new_crush_target,
3987 uint32_t old_crush_barrier,
3988 uint32_t new_crush_barrier,
3989 int32_t old_crush_member,
3990 int32_t new_crush_member,
7c673cae
FG
3991 pg_t pgid) {
3992 return old_acting_primary != new_acting_primary ||
3993 new_acting != old_acting ||
3994 old_up_primary != new_up_primary ||
3995 new_up != old_up ||
3996 old_min_size != new_min_size ||
3997 old_size != new_size ||
3998 pgid.is_split(old_pg_num, new_pg_num, 0) ||
11fdf7f2
TL
3999 // (is or was) pre-merge source
4000 pgid.is_merge_source(old_pg_num_pending, new_pg_num_pending, 0) ||
4001 pgid.is_merge_source(new_pg_num_pending, old_pg_num_pending, 0) ||
4002 // merge source
4003 pgid.is_merge_source(old_pg_num, new_pg_num, 0) ||
4004 // (is or was) pre-merge target
4005 pgid.is_merge_target(old_pg_num_pending, new_pg_num_pending) ||
4006 pgid.is_merge_target(new_pg_num_pending, old_pg_num_pending) ||
4007 // merge target
4008 pgid.is_merge_target(old_pg_num, new_pg_num) ||
c07f9fc5 4009 old_sort_bitwise != new_sort_bitwise ||
f67539c2
TL
4010 old_recovery_deletes != new_recovery_deletes ||
4011 old_crush_count != new_crush_count ||
4012 old_crush_target != new_crush_target ||
4013 old_crush_barrier != new_crush_barrier ||
4014 old_crush_member != new_crush_member;
7c673cae
FG
4015}
4016
4017bool PastIntervals::is_new_interval(
4018 int old_acting_primary,
4019 int new_acting_primary,
4020 const vector<int> &old_acting,
4021 const vector<int> &new_acting,
4022 int old_up_primary,
4023 int new_up_primary,
4024 const vector<int> &old_up,
4025 const vector<int> &new_up,
9f95a23c
TL
4026 const OSDMap *osdmap,
4027 const OSDMap *lastmap,
11fdf7f2
TL
4028 pg_t pgid)
4029{
4030 const pg_pool_t *plast = lastmap->get_pg_pool(pgid.pool());
4031 if (!plast) {
4032 return false; // after pool is deleted there are no more interval changes
4033 }
4034 const pg_pool_t *pi = osdmap->get_pg_pool(pgid.pool());
4035 if (!pi) {
4036 return true; // pool was deleted this epoch -> (final!) interval change
4037 }
4038 return
7c673cae
FG
4039 is_new_interval(old_acting_primary,
4040 new_acting_primary,
4041 old_acting,
4042 new_acting,
4043 old_up_primary,
4044 new_up_primary,
4045 old_up,
4046 new_up,
11fdf7f2
TL
4047 plast->size,
4048 pi->size,
4049 plast->min_size,
4050 pi->min_size,
4051 plast->get_pg_num(),
4052 pi->get_pg_num(),
4053 plast->get_pg_num_pending(),
4054 pi->get_pg_num_pending(),
7c673cae
FG
4055 lastmap->test_flag(CEPH_OSDMAP_SORTBITWISE),
4056 osdmap->test_flag(CEPH_OSDMAP_SORTBITWISE),
c07f9fc5
FG
4057 lastmap->test_flag(CEPH_OSDMAP_RECOVERY_DELETES),
4058 osdmap->test_flag(CEPH_OSDMAP_RECOVERY_DELETES),
f67539c2
TL
4059 plast->peering_crush_bucket_count, pi->peering_crush_bucket_count,
4060 plast->peering_crush_bucket_target, pi->peering_crush_bucket_target,
4061 plast->peering_crush_bucket_barrier, pi->peering_crush_bucket_barrier,
4062 plast->peering_crush_mandatory_member, pi->peering_crush_mandatory_member,
7c673cae
FG
4063 pgid);
4064}
4065
4066bool PastIntervals::check_new_interval(
4067 int old_acting_primary,
4068 int new_acting_primary,
4069 const vector<int> &old_acting,
4070 const vector<int> &new_acting,
4071 int old_up_primary,
4072 int new_up_primary,
4073 const vector<int> &old_up,
4074 const vector<int> &new_up,
4075 epoch_t same_interval_since,
4076 epoch_t last_epoch_clean,
9f95a23c
TL
4077 const OSDMap *osdmap,
4078 const OSDMap *lastmap,
7c673cae 4079 pg_t pgid,
9f95a23c 4080 const IsPGRecoverablePredicate &could_have_gone_active,
7c673cae
FG
4081 PastIntervals *past_intervals,
4082 std::ostream *out)
4083{
4084 /*
4085 * We have to be careful to gracefully deal with situations like
4086 * so. Say we have a power outage or something that takes out both
4087 * OSDs, but the monitor doesn't mark them down in the same epoch.
4088 * The history may look like
4089 *
4090 * 1: A B
4091 * 2: B
4092 * 3: let's say B dies for good, too (say, from the power spike)
4093 * 4: A
4094 *
4095 * which makes it look like B may have applied updates to the PG
4096 * that we need in order to proceed. This sucks...
4097 *
4098 * To minimize the risk of this happening, we CANNOT go active if
4099 * _any_ OSDs in the prior set are down until we send an MOSDAlive
4100 * to the monitor such that the OSDMap sets osd_up_thru to an epoch.
4101 * Then, we have something like
4102 *
4103 * 1: A B
4104 * 2: B up_thru[B]=0
4105 * 3:
4106 * 4: A
4107 *
4108 * -> we can ignore B, bc it couldn't have gone active (up_thru still 0).
4109 *
4110 * or,
4111 *
4112 * 1: A B
4113 * 2: B up_thru[B]=0
4114 * 3: B up_thru[B]=2
4115 * 4:
4116 * 5: A
4117 *
4118 * -> we must wait for B, bc it was alive through 2, and could have
4119 * written to the pg.
4120 *
4121 * If B is really dead, then an administrator will need to manually
4122 * intervene by marking the OSD as "lost."
4123 */
4124
4125 // remember past interval
4126 // NOTE: a change in the up set primary triggers an interval
4127 // change, even though the interval members in the pg_interval_t
4128 // do not change.
11fdf7f2
TL
4129 ceph_assert(past_intervals);
4130 ceph_assert(past_intervals->past_intervals);
7c673cae
FG
4131 if (is_new_interval(
4132 old_acting_primary,
4133 new_acting_primary,
4134 old_acting,
4135 new_acting,
4136 old_up_primary,
4137 new_up_primary,
4138 old_up,
4139 new_up,
4140 osdmap,
4141 lastmap,
4142 pgid)) {
4143 pg_interval_t i;
4144 i.first = same_interval_since;
4145 i.last = osdmap->get_epoch() - 1;
11fdf7f2 4146 ceph_assert(i.first <= i.last);
7c673cae
FG
4147 i.acting = old_acting;
4148 i.up = old_up;
4149 i.primary = old_acting_primary;
4150 i.up_primary = old_up_primary;
4151
4152 unsigned num_acting = 0;
9f95a23c 4153 for (auto p = i.acting.cbegin(); p != i.acting.cend(); ++p)
7c673cae
FG
4154 if (*p != CRUSH_ITEM_NONE)
4155 ++num_acting;
4156
11fdf7f2 4157 ceph_assert(lastmap->get_pools().count(pgid.pool()));
7c673cae
FG
4158 const pg_pool_t& old_pg_pool = lastmap->get_pools().find(pgid.pool())->second;
4159 set<pg_shard_t> old_acting_shards;
4160 old_pg_pool.convert_to_pg_shards(old_acting, &old_acting_shards);
4161
4162 if (num_acting &&
4163 i.primary != -1 &&
4164 num_acting >= old_pg_pool.min_size &&
f67539c2
TL
4165 (!old_pg_pool.is_stretch_pool() ||
4166 old_pg_pool.stretch_set_can_peer(old_acting, *lastmap, out)) &&
9f95a23c 4167 could_have_gone_active(old_acting_shards)) {
7c673cae
FG
4168 if (out)
4169 *out << __func__ << " " << i
7c673cae
FG
4170 << " up_thru " << lastmap->get_up_thru(i.primary)
4171 << " up_from " << lastmap->get_up_from(i.primary)
11fdf7f2 4172 << " last_epoch_clean " << last_epoch_clean;
7c673cae
FG
4173 if (lastmap->get_up_thru(i.primary) >= i.first &&
4174 lastmap->get_up_from(i.primary) <= i.first) {
4175 i.maybe_went_rw = true;
4176 if (out)
11fdf7f2 4177 *out << " " << i
7c673cae
FG
4178 << " : primary up " << lastmap->get_up_from(i.primary)
4179 << "-" << lastmap->get_up_thru(i.primary)
4180 << " includes interval"
11fdf7f2 4181 << std::endl;
7c673cae
FG
4182 } else if (last_epoch_clean >= i.first &&
4183 last_epoch_clean <= i.last) {
4184 // If the last_epoch_clean is included in this interval, then
4185 // the pg must have been rw (for recovery to have completed).
4186 // This is important because we won't know the _real_
4187 // first_epoch because we stop at last_epoch_clean, and we
4188 // don't want the oldest interval to randomly have
4189 // maybe_went_rw false depending on the relative up_thru vs
4190 // last_epoch_clean timing.
4191 i.maybe_went_rw = true;
4192 if (out)
11fdf7f2 4193 *out << " " << i
7c673cae
FG
4194 << " : includes last_epoch_clean " << last_epoch_clean
4195 << " and presumed to have been rw"
4196 << std::endl;
4197 } else {
4198 i.maybe_went_rw = false;
4199 if (out)
11fdf7f2 4200 *out << " " << i
7c673cae
FG
4201 << " : primary up " << lastmap->get_up_from(i.primary)
4202 << "-" << lastmap->get_up_thru(i.primary)
4203 << " does not include interval"
11fdf7f2 4204 << std::endl;
7c673cae
FG
4205 }
4206 } else {
4207 i.maybe_went_rw = false;
4208 if (out)
4209 *out << __func__ << " " << i << " : acting set is too small" << std::endl;
4210 }
11fdf7f2 4211 past_intervals->past_intervals->add_interval(old_pg_pool.is_erasure(), i);
7c673cae
FG
4212 return true;
4213 } else {
4214 return false;
4215 }
4216}
4217
7c673cae
FG
4218// true if the given map affects the prior set
4219bool PastIntervals::PriorSet::affected_by_map(
4220 const OSDMap &osdmap,
4221 const DoutPrefixProvider *dpp) const
4222{
9f95a23c 4223 for (auto p = probe.begin(); p != probe.end(); ++p) {
7c673cae
FG
4224 int o = p->osd;
4225
4226 // did someone in the prior set go down?
4227 if (osdmap.is_down(o) && down.count(o) == 0) {
4228 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now down" << dendl;
4229 return true;
4230 }
4231
4232 // did a down osd in cur get (re)marked as lost?
9f95a23c 4233 auto r = blocked_by.find(o);
7c673cae
FG
4234 if (r != blocked_by.end()) {
4235 if (!osdmap.exists(o)) {
4236 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
4237 return true;
4238 }
4239 if (osdmap.get_info(o).lost_at != r->second) {
4240 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
4241 return true;
4242 }
4243 }
4244 }
4245
4246 // did someone in the prior down set go up?
9f95a23c 4247 for (auto p = down.cbegin(); p != down.cend(); ++p) {
7c673cae
FG
4248 int o = *p;
4249
4250 if (osdmap.is_up(o)) {
4251 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now up" << dendl;
4252 return true;
4253 }
4254
4255 // did someone in the prior set get lost or destroyed?
4256 if (!osdmap.exists(o)) {
4257 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
4258 return true;
4259 }
4260 // did a down osd in down get (re)marked as lost?
9f95a23c 4261 auto r = blocked_by.find(o);
7c673cae
FG
4262 if (r != blocked_by.end()) {
4263 if (osdmap.get_info(o).lost_at != r->second) {
4264 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
4265 return true;
4266 }
4267 }
4268 }
4269
4270 return false;
4271}
4272
4273ostream& operator<<(ostream& out, const PastIntervals::pg_interval_t& i)
4274{
4275 out << "interval(" << i.first << "-" << i.last
4276 << " up " << i.up << "(" << i.up_primary << ")"
4277 << " acting " << i.acting << "(" << i.primary << ")";
4278 if (i.maybe_went_rw)
4279 out << " maybe_went_rw";
4280 out << ")";
4281 return out;
4282}
4283
4284
4285
4286// -- pg_query_t --
4287
9f95a23c 4288void pg_query_t::encode(ceph::buffer::list &bl, uint64_t features) const {
7c673cae 4289 ENCODE_START(3, 3, bl);
11fdf7f2
TL
4290 encode(type, bl);
4291 encode(since, bl);
7c673cae 4292 history.encode(bl);
11fdf7f2
TL
4293 encode(epoch_sent, bl);
4294 encode(to, bl);
4295 encode(from, bl);
7c673cae
FG
4296 ENCODE_FINISH(bl);
4297}
4298
9f95a23c 4299void pg_query_t::decode(ceph::buffer::list::const_iterator &bl) {
7c673cae 4300 DECODE_START(3, bl);
11fdf7f2
TL
4301 decode(type, bl);
4302 decode(since, bl);
7c673cae 4303 history.decode(bl);
11fdf7f2
TL
4304 decode(epoch_sent, bl);
4305 decode(to, bl);
4306 decode(from, bl);
7c673cae
FG
4307 DECODE_FINISH(bl);
4308}
4309
4310void pg_query_t::dump(Formatter *f) const
4311{
4312 f->dump_int("from", from);
4313 f->dump_int("to", to);
4314 f->dump_string("type", get_type_name());
4315 f->dump_stream("since") << since;
4316 f->dump_stream("epoch_sent") << epoch_sent;
4317 f->open_object_section("history");
4318 history.dump(f);
4319 f->close_section();
4320}
4321void pg_query_t::generate_test_instances(list<pg_query_t*>& o)
4322{
4323 o.push_back(new pg_query_t());
4324 list<pg_history_t*> h;
4325 pg_history_t::generate_test_instances(h);
4326 o.push_back(new pg_query_t(pg_query_t::INFO, shard_id_t(1), shard_id_t(2), *h.back(), 4));
4327 o.push_back(new pg_query_t(pg_query_t::MISSING, shard_id_t(2), shard_id_t(3), *h.back(), 4));
4328 o.push_back(new pg_query_t(pg_query_t::LOG, shard_id_t(0), shard_id_t(0),
4329 eversion_t(4, 5), *h.back(), 4));
4330 o.push_back(new pg_query_t(pg_query_t::FULLLOG,
4331 shard_id_t::NO_SHARD, shard_id_t::NO_SHARD,
4332 *h.back(), 5));
4333}
4334
9f95a23c
TL
4335// -- pg_lease_t --
4336
4337void pg_lease_t::encode(bufferlist& bl) const
4338{
4339 ENCODE_START(1, 1, bl);
4340 encode(readable_until, bl);
4341 encode(readable_until_ub, bl);
4342 encode(interval, bl);
4343 ENCODE_FINISH(bl);
4344}
4345
4346void pg_lease_t::decode(bufferlist::const_iterator& p)
4347{
4348 DECODE_START(1, p);
4349 decode(readable_until, p);
4350 decode(readable_until_ub, p);
4351 decode(interval, p);
4352 DECODE_FINISH(p);
4353}
4354
4355void pg_lease_t::dump(Formatter *f) const
4356{
4357 f->dump_stream("readable_until") << readable_until;
4358 f->dump_stream("readable_until_ub") << readable_until_ub;
4359 f->dump_stream("interval") << interval;
4360}
4361
4362void pg_lease_t::generate_test_instances(std::list<pg_lease_t*>& o)
4363{
4364 o.push_back(new pg_lease_t());
4365 o.push_back(new pg_lease_t());
4366 o.back()->readable_until = make_timespan(1.5);
4367 o.back()->readable_until_ub = make_timespan(3.4);
4368 o.back()->interval = make_timespan(1.0);
4369}
4370
4371// -- pg_lease_ack_t --
4372
4373void pg_lease_ack_t::encode(bufferlist& bl) const
4374{
4375 ENCODE_START(1, 1, bl);
4376 encode(readable_until_ub, bl);
4377 ENCODE_FINISH(bl);
4378}
4379
4380void pg_lease_ack_t::decode(bufferlist::const_iterator& p)
4381{
4382 DECODE_START(1, p);
4383 decode(readable_until_ub, p);
4384 DECODE_FINISH(p);
4385}
4386
4387void pg_lease_ack_t::dump(Formatter *f) const
4388{
4389 f->dump_stream("readable_until_ub") << readable_until_ub;
4390}
4391
4392void pg_lease_ack_t::generate_test_instances(std::list<pg_lease_ack_t*>& o)
4393{
4394 o.push_back(new pg_lease_ack_t());
4395 o.push_back(new pg_lease_ack_t());
4396 o.back()->readable_until_ub = make_timespan(3.4);
4397}
4398
4399
7c673cae
FG
4400// -- ObjectModDesc --
4401void ObjectModDesc::visit(Visitor *visitor) const
4402{
11fdf7f2 4403 auto bp = bl.cbegin();
7c673cae
FG
4404 try {
4405 while (!bp.end()) {
4406 DECODE_START(max_required_version, bp);
4407 uint8_t code;
11fdf7f2 4408 decode(code, bp);
7c673cae
FG
4409 switch (code) {
4410 case APPEND: {
4411 uint64_t size;
11fdf7f2 4412 decode(size, bp);
7c673cae
FG
4413 visitor->append(size);
4414 break;
4415 }
4416 case SETATTRS: {
9f95a23c 4417 map<string, std::optional<ceph::buffer::list> > attrs;
11fdf7f2 4418 decode(attrs, bp);
7c673cae
FG
4419 visitor->setattrs(attrs);
4420 break;
4421 }
4422 case DELETE: {
4423 version_t old_version;
11fdf7f2 4424 decode(old_version, bp);
7c673cae
FG
4425 visitor->rmobject(old_version);
4426 break;
4427 }
4428 case CREATE: {
4429 visitor->create();
4430 break;
4431 }
4432 case UPDATE_SNAPS: {
4433 set<snapid_t> snaps;
11fdf7f2 4434 decode(snaps, bp);
7c673cae
FG
4435 visitor->update_snaps(snaps);
4436 break;
4437 }
4438 case TRY_DELETE: {
4439 version_t old_version;
11fdf7f2 4440 decode(old_version, bp);
7c673cae
FG
4441 visitor->try_rmobject(old_version);
4442 break;
4443 }
4444 case ROLLBACK_EXTENTS: {
4445 vector<pair<uint64_t, uint64_t> > extents;
4446 version_t gen;
11fdf7f2
TL
4447 decode(gen, bp);
4448 decode(extents, bp);
7c673cae
FG
4449 visitor->rollback_extents(gen,extents);
4450 break;
4451 }
4452 default:
11fdf7f2 4453 ceph_abort_msg("Invalid rollback code");
7c673cae
FG
4454 }
4455 DECODE_FINISH(bp);
4456 }
4457 } catch (...) {
11fdf7f2 4458 ceph_abort_msg("Invalid encoding");
7c673cae
FG
4459 }
4460}
4461
4462struct DumpVisitor : public ObjectModDesc::Visitor {
4463 Formatter *f;
4464 explicit DumpVisitor(Formatter *f) : f(f) {}
4465 void append(uint64_t old_size) override {
4466 f->open_object_section("op");
4467 f->dump_string("code", "APPEND");
4468 f->dump_unsigned("old_size", old_size);
4469 f->close_section();
4470 }
9f95a23c 4471 void setattrs(map<string, std::optional<ceph::buffer::list> > &attrs) override {
7c673cae
FG
4472 f->open_object_section("op");
4473 f->dump_string("code", "SETATTRS");
4474 f->open_array_section("attrs");
9f95a23c 4475 for (auto i = attrs.begin(); i != attrs.end(); ++i) {
7c673cae
FG
4476 f->dump_string("attr_name", i->first);
4477 }
4478 f->close_section();
4479 f->close_section();
4480 }
4481 void rmobject(version_t old_version) override {
4482 f->open_object_section("op");
4483 f->dump_string("code", "RMOBJECT");
4484 f->dump_unsigned("old_version", old_version);
4485 f->close_section();
4486 }
4487 void try_rmobject(version_t old_version) override {
4488 f->open_object_section("op");
4489 f->dump_string("code", "TRY_RMOBJECT");
4490 f->dump_unsigned("old_version", old_version);
4491 f->close_section();
4492 }
4493 void create() override {
4494 f->open_object_section("op");
4495 f->dump_string("code", "CREATE");
4496 f->close_section();
4497 }
4498 void update_snaps(const set<snapid_t> &snaps) override {
4499 f->open_object_section("op");
4500 f->dump_string("code", "UPDATE_SNAPS");
4501 f->dump_stream("snaps") << snaps;
4502 f->close_section();
4503 }
4504 void rollback_extents(
4505 version_t gen,
4506 const vector<pair<uint64_t, uint64_t> > &extents) override {
4507 f->open_object_section("op");
4508 f->dump_string("code", "ROLLBACK_EXTENTS");
4509 f->dump_unsigned("gen", gen);
4510 f->dump_stream("snaps") << extents;
4511 f->close_section();
4512 }
4513};
4514
4515void ObjectModDesc::dump(Formatter *f) const
4516{
4517 f->open_object_section("object_mod_desc");
4518 f->dump_bool("can_local_rollback", can_local_rollback);
4519 f->dump_bool("rollback_info_completed", rollback_info_completed);
4520 {
4521 f->open_array_section("ops");
4522 DumpVisitor vis(f);
4523 visit(&vis);
4524 f->close_section();
4525 }
4526 f->close_section();
4527}
4528
4529void ObjectModDesc::generate_test_instances(list<ObjectModDesc*>& o)
4530{
9f95a23c 4531 map<string, std::optional<ceph::buffer::list> > attrs;
7c673cae
FG
4532 attrs[OI_ATTR];
4533 attrs[SS_ATTR];
4534 attrs["asdf"];
4535 o.push_back(new ObjectModDesc());
4536 o.back()->append(100);
4537 o.back()->setattrs(attrs);
4538 o.push_back(new ObjectModDesc());
4539 o.back()->rmobject(1001);
4540 o.push_back(new ObjectModDesc());
4541 o.back()->create();
4542 o.back()->setattrs(attrs);
4543 o.push_back(new ObjectModDesc());
4544 o.back()->create();
4545 o.back()->setattrs(attrs);
4546 o.back()->mark_unrollbackable();
4547 o.back()->append(1000);
4548}
4549
9f95a23c 4550void ObjectModDesc::encode(ceph::buffer::list &_bl) const
7c673cae
FG
4551{
4552 ENCODE_START(max_required_version, max_required_version, _bl);
11fdf7f2
TL
4553 encode(can_local_rollback, _bl);
4554 encode(rollback_info_completed, _bl);
4555 encode(bl, _bl);
7c673cae
FG
4556 ENCODE_FINISH(_bl);
4557}
9f95a23c 4558void ObjectModDesc::decode(ceph::buffer::list::const_iterator &_bl)
7c673cae
FG
4559{
4560 DECODE_START(2, _bl);
4561 max_required_version = struct_v;
11fdf7f2
TL
4562 decode(can_local_rollback, _bl);
4563 decode(rollback_info_completed, _bl);
4564 decode(bl, _bl);
9f95a23c 4565 // ensure bl does not pin a larger ceph::buffer in memory
7c673cae 4566 bl.rebuild();
31f18b77 4567 bl.reassign_to_mempool(mempool::mempool_osd_pglog);
7c673cae
FG
4568 DECODE_FINISH(_bl);
4569}
4570
9f95a23c
TL
4571std::atomic<uint32_t> ObjectCleanRegions::max_num_intervals = {10};
4572
4573void ObjectCleanRegions::set_max_num_intervals(uint32_t num)
4574{
4575 max_num_intervals = num;
4576}
4577
4578void ObjectCleanRegions::trim()
4579{
4580 while(clean_offsets.num_intervals() > max_num_intervals) {
4581 typename interval_set<uint64_t>::iterator shortest_interval = clean_offsets.begin();
4582 if (shortest_interval == clean_offsets.end())
4583 break;
4584 for (typename interval_set<uint64_t>::iterator it = clean_offsets.begin();
4585 it != clean_offsets.end();
4586 ++it) {
4587 if (it.get_len() < shortest_interval.get_len())
4588 shortest_interval = it;
4589 }
4590 clean_offsets.erase(shortest_interval);
4591 }
4592}
4593
4594void ObjectCleanRegions::merge(const ObjectCleanRegions &other)
4595{
4596 clean_offsets.intersection_of(other.clean_offsets);
4597 clean_omap = clean_omap && other.clean_omap;
4598 trim();
4599}
4600
4601void ObjectCleanRegions::mark_data_region_dirty(uint64_t offset, uint64_t len)
4602{
4603 interval_set<uint64_t> clean_region;
4604 clean_region.insert(0, (uint64_t)-1);
4605 clean_region.erase(offset, len);
4606 clean_offsets.intersection_of(clean_region);
4607 trim();
4608}
4609
f67539c2
TL
4610bool ObjectCleanRegions::is_clean_region(uint64_t offset, uint64_t len) const
4611{
4612 return clean_offsets.contains(offset, len);
4613}
4614
9f95a23c
TL
4615void ObjectCleanRegions::mark_omap_dirty()
4616{
4617 clean_omap = false;
4618}
4619
4620void ObjectCleanRegions::mark_object_new()
4621{
4622 new_object = true;
4623}
4624
4625void ObjectCleanRegions::mark_fully_dirty()
4626{
4627 mark_data_region_dirty(0, (uint64_t)-1);
4628 mark_omap_dirty();
4629 mark_object_new();
4630}
4631
4632interval_set<uint64_t> ObjectCleanRegions::get_dirty_regions() const
4633{
4634 interval_set<uint64_t> dirty_region;
4635 dirty_region.insert(0, (uint64_t)-1);
4636 dirty_region.subtract(clean_offsets);
4637 return dirty_region;
4638}
4639
4640bool ObjectCleanRegions::omap_is_dirty() const
4641{
4642 return !clean_omap;
4643}
4644
4645bool ObjectCleanRegions::object_is_exist() const
4646{
4647 return !new_object;
4648}
4649
4650void ObjectCleanRegions::encode(bufferlist &bl) const
4651{
4652 ENCODE_START(1, 1, bl);
4653 using ceph::encode;
4654 encode(clean_offsets, bl);
4655 encode(clean_omap, bl);
4656 encode(new_object, bl);
4657 ENCODE_FINISH(bl);
4658}
4659
4660void ObjectCleanRegions::decode(bufferlist::const_iterator &bl)
4661{
4662 DECODE_START(1, bl);
4663 using ceph::decode;
4664 decode(clean_offsets, bl);
4665 decode(clean_omap, bl);
4666 decode(new_object, bl);
4667 DECODE_FINISH(bl);
4668}
4669
4670void ObjectCleanRegions::dump(Formatter *f) const
4671{
4672 f->open_object_section("object_clean_regions");
4673 f->dump_stream("clean_offsets") << clean_offsets;
4674 f->dump_bool("clean_omap", clean_omap);
4675 f->dump_bool("new_object", new_object);
4676 f->close_section();
4677}
4678
4679void ObjectCleanRegions::generate_test_instances(list<ObjectCleanRegions*>& o)
4680{
4681 o.push_back(new ObjectCleanRegions());
4682 o.push_back(new ObjectCleanRegions());
4683 o.back()->mark_data_region_dirty(4096, 40960);
4684 o.back()->mark_omap_dirty();
4685 o.back()->mark_object_new();
4686}
4687
4688ostream& operator<<(ostream& out, const ObjectCleanRegions& ocr)
4689{
4690 return out << "clean_offsets: " << ocr.clean_offsets
4691 << ", clean_omap: " << ocr.clean_omap
4692 << ", new_object: " << ocr.new_object;
4693}
4694
7c673cae
FG
4695// -- pg_log_entry_t --
4696
4697string pg_log_entry_t::get_key_name() const
4698{
4699 return version.get_key_name();
4700}
4701
9f95a23c 4702void pg_log_entry_t::encode_with_checksum(ceph::buffer::list& bl) const
7c673cae 4703{
11fdf7f2 4704 using ceph::encode;
9f95a23c 4705 ceph::buffer::list ebl(sizeof(*this)*2);
11fdf7f2 4706 this->encode(ebl);
7c673cae 4707 __u32 crc = ebl.crc32c(0);
11fdf7f2
TL
4708 encode(ebl, bl);
4709 encode(crc, bl);
7c673cae
FG
4710}
4711
9f95a23c 4712void pg_log_entry_t::decode_with_checksum(ceph::buffer::list::const_iterator& p)
7c673cae 4713{
11fdf7f2 4714 using ceph::decode;
9f95a23c 4715 ceph::buffer::list bl;
11fdf7f2 4716 decode(bl, p);
7c673cae 4717 __u32 crc;
11fdf7f2 4718 decode(crc, p);
7c673cae 4719 if (crc != bl.crc32c(0))
9f95a23c 4720 throw ceph::buffer::malformed_input("bad checksum on pg_log_entry_t");
11fdf7f2
TL
4721 auto q = bl.cbegin();
4722 this->decode(q);
7c673cae
FG
4723}
4724
9f95a23c 4725void pg_log_entry_t::encode(ceph::buffer::list &bl) const
7c673cae 4726{
9f95a23c 4727 ENCODE_START(14, 4, bl);
11fdf7f2
TL
4728 encode(op, bl);
4729 encode(soid, bl);
4730 encode(version, bl);
7c673cae
FG
4731
4732 /**
4733 * Added with reverting_to:
4734 * Previous code used prior_version to encode
4735 * what we now call reverting_to. This will
4736 * allow older code to decode reverting_to
4737 * into prior_version as expected.
4738 */
4739 if (op == LOST_REVERT)
11fdf7f2 4740 encode(reverting_to, bl);
7c673cae 4741 else
11fdf7f2 4742 encode(prior_version, bl);
7c673cae 4743
11fdf7f2
TL
4744 encode(reqid, bl);
4745 encode(mtime, bl);
7c673cae 4746 if (op == LOST_REVERT)
11fdf7f2
TL
4747 encode(prior_version, bl);
4748 encode(snaps, bl);
4749 encode(user_version, bl);
4750 encode(mod_desc, bl);
4751 encode(extra_reqids, bl);
7c673cae 4752 if (op == ERROR)
11fdf7f2
TL
4753 encode(return_code, bl);
4754 if (!extra_reqids.empty())
4755 encode(extra_reqid_return_codes, bl);
9f95a23c
TL
4756 encode(clean_regions, bl);
4757 if (op != ERROR)
4758 encode(return_code, bl);
4759 encode(op_returns, bl);
7c673cae
FG
4760 ENCODE_FINISH(bl);
4761}
4762
9f95a23c 4763void pg_log_entry_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 4764{
9f95a23c 4765 DECODE_START_LEGACY_COMPAT_LEN(14, 4, 4, bl);
11fdf7f2 4766 decode(op, bl);
7c673cae
FG
4767 if (struct_v < 2) {
4768 sobject_t old_soid;
11fdf7f2 4769 decode(old_soid, bl);
7c673cae
FG
4770 soid.oid = old_soid.oid;
4771 soid.snap = old_soid.snap;
4772 invalid_hash = true;
4773 } else {
11fdf7f2 4774 decode(soid, bl);
7c673cae
FG
4775 }
4776 if (struct_v < 3)
4777 invalid_hash = true;
11fdf7f2 4778 decode(version, bl);
7c673cae
FG
4779
4780 if (struct_v >= 6 && op == LOST_REVERT)
11fdf7f2 4781 decode(reverting_to, bl);
7c673cae 4782 else
11fdf7f2 4783 decode(prior_version, bl);
7c673cae 4784
11fdf7f2 4785 decode(reqid, bl);
7c673cae 4786
11fdf7f2 4787 decode(mtime, bl);
7c673cae
FG
4788 if (struct_v < 5)
4789 invalid_pool = true;
4790
4791 if (op == LOST_REVERT) {
4792 if (struct_v >= 6) {
11fdf7f2 4793 decode(prior_version, bl);
7c673cae
FG
4794 } else {
4795 reverting_to = prior_version;
4796 }
4797 }
4798 if (struct_v >= 7 || // for v >= 7, this is for all ops.
4799 op == CLONE) { // for v < 7, it's only present for CLONE.
11fdf7f2 4800 decode(snaps, bl);
9f95a23c 4801 // ensure snaps does not pin a larger ceph::buffer in memory
7c673cae 4802 snaps.rebuild();
31f18b77 4803 snaps.reassign_to_mempool(mempool::mempool_osd_pglog);
7c673cae
FG
4804 }
4805
4806 if (struct_v >= 8)
11fdf7f2 4807 decode(user_version, bl);
7c673cae
FG
4808 else
4809 user_version = version.version;
4810
4811 if (struct_v >= 9)
11fdf7f2 4812 decode(mod_desc, bl);
7c673cae
FG
4813 else
4814 mod_desc.mark_unrollbackable();
4815 if (struct_v >= 10)
11fdf7f2 4816 decode(extra_reqids, bl);
7c673cae 4817 if (struct_v >= 11 && op == ERROR)
11fdf7f2
TL
4818 decode(return_code, bl);
4819 if (struct_v >= 12 && !extra_reqids.empty())
4820 decode(extra_reqid_return_codes, bl);
9f95a23c
TL
4821 if (struct_v >= 13)
4822 decode(clean_regions, bl);
4823 else
4824 clean_regions.mark_fully_dirty();
4825 if (struct_v >= 14) {
4826 if (op != ERROR) {
4827 decode(return_code, bl);
4828 }
4829 decode(op_returns, bl);
4830 }
7c673cae
FG
4831 DECODE_FINISH(bl);
4832}
4833
4834void pg_log_entry_t::dump(Formatter *f) const
4835{
4836 f->dump_string("op", get_op_name());
4837 f->dump_stream("object") << soid;
4838 f->dump_stream("version") << version;
4839 f->dump_stream("prior_version") << prior_version;
4840 f->dump_stream("reqid") << reqid;
4841 f->open_array_section("extra_reqids");
11fdf7f2 4842 uint32_t idx = 0;
31f18b77 4843 for (auto p = extra_reqids.begin();
7c673cae 4844 p != extra_reqids.end();
11fdf7f2 4845 ++idx, ++p) {
7c673cae
FG
4846 f->open_object_section("extra_reqid");
4847 f->dump_stream("reqid") << p->first;
4848 f->dump_stream("user_version") << p->second;
11fdf7f2
TL
4849 auto it = extra_reqid_return_codes.find(idx);
4850 if (it != extra_reqid_return_codes.end()) {
4851 f->dump_int("return_code", it->second);
4852 }
7c673cae
FG
4853 f->close_section();
4854 }
4855 f->close_section();
4856 f->dump_stream("mtime") << mtime;
4857 f->dump_int("return_code", return_code);
9f95a23c
TL
4858 if (!op_returns.empty()) {
4859 f->open_array_section("op_returns");
4860 for (auto& i : op_returns) {
4861 f->dump_object("op", i);
4862 }
4863 f->close_section();
4864 }
7c673cae
FG
4865 if (snaps.length() > 0) {
4866 vector<snapid_t> v;
9f95a23c 4867 ceph::buffer::list c = snaps;
11fdf7f2 4868 auto p = c.cbegin();
7c673cae 4869 try {
11fdf7f2
TL
4870 using ceph::decode;
4871 decode(v, p);
7c673cae
FG
4872 } catch (...) {
4873 v.clear();
4874 }
4875 f->open_object_section("snaps");
9f95a23c 4876 for (auto p = v.begin(); p != v.end(); ++p)
7c673cae
FG
4877 f->dump_unsigned("snap", *p);
4878 f->close_section();
4879 }
4880 {
4881 f->open_object_section("mod_desc");
4882 mod_desc.dump(f);
4883 f->close_section();
4884 }
9f95a23c
TL
4885 {
4886 f->open_object_section("clean_regions");
4887 clean_regions.dump(f);
4888 f->close_section();
4889 }
7c673cae
FG
4890}
4891
4892void pg_log_entry_t::generate_test_instances(list<pg_log_entry_t*>& o)
4893{
4894 o.push_back(new pg_log_entry_t());
4895 hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
4896 o.push_back(new pg_log_entry_t(MODIFY, oid, eversion_t(1,2), eversion_t(3,4),
4897 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4898 utime_t(8,9), 0));
4899 o.push_back(new pg_log_entry_t(ERROR, oid, eversion_t(1,2), eversion_t(3,4),
4900 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4901 utime_t(8,9), -ENOENT));
4902}
4903
4904ostream& operator<<(ostream& out, const pg_log_entry_t& e)
4905{
4906 out << e.version << " (" << e.prior_version << ") "
4907 << std::left << std::setw(8) << e.get_op_name() << ' '
4908 << e.soid << " by " << e.reqid << " " << e.mtime
4909 << " " << e.return_code;
9f95a23c
TL
4910 if (!e.op_returns.empty()) {
4911 out << " " << e.op_returns;
4912 }
7c673cae
FG
4913 if (e.snaps.length()) {
4914 vector<snapid_t> snaps;
9f95a23c 4915 ceph::buffer::list c = e.snaps;
11fdf7f2 4916 auto p = c.cbegin();
7c673cae 4917 try {
11fdf7f2 4918 decode(snaps, p);
7c673cae
FG
4919 } catch (...) {
4920 snaps.clear();
4921 }
4922 out << " snaps " << snaps;
4923 }
9f95a23c 4924 out << " ObjectCleanRegions " << e.clean_regions;
7c673cae
FG
4925 return out;
4926}
4927
c07f9fc5
FG
4928// -- pg_log_dup_t --
4929
11fdf7f2 4930std::string pg_log_dup_t::get_key_name() const
c07f9fc5 4931{
11fdf7f2
TL
4932 static const char prefix[] = "dup_";
4933 std::string key(36, ' ');
4934 memcpy(&key[0], prefix, 4);
4935 version.get_key_name(&key[4]);
4936 key.resize(35); // remove the null terminator
4937 return key;
c07f9fc5
FG
4938}
4939
9f95a23c 4940void pg_log_dup_t::encode(ceph::buffer::list &bl) const
c07f9fc5 4941{
9f95a23c 4942 ENCODE_START(2, 1, bl);
11fdf7f2
TL
4943 encode(reqid, bl);
4944 encode(version, bl);
4945 encode(user_version, bl);
4946 encode(return_code, bl);
9f95a23c 4947 encode(op_returns, bl);
c07f9fc5
FG
4948 ENCODE_FINISH(bl);
4949}
4950
9f95a23c 4951void pg_log_dup_t::decode(ceph::buffer::list::const_iterator &bl)
c07f9fc5 4952{
9f95a23c 4953 DECODE_START(2, bl);
11fdf7f2
TL
4954 decode(reqid, bl);
4955 decode(version, bl);
4956 decode(user_version, bl);
4957 decode(return_code, bl);
9f95a23c
TL
4958 if (struct_v >= 2) {
4959 decode(op_returns, bl);
4960 }
c07f9fc5
FG
4961 DECODE_FINISH(bl);
4962}
4963
4964void pg_log_dup_t::dump(Formatter *f) const
4965{
4966 f->dump_stream("reqid") << reqid;
4967 f->dump_stream("version") << version;
4968 f->dump_stream("user_version") << user_version;
4969 f->dump_stream("return_code") << return_code;
9f95a23c
TL
4970 if (!op_returns.empty()) {
4971 f->open_array_section("op_returns");
4972 for (auto& i : op_returns) {
4973 f->dump_object("op", i);
4974 }
4975 f->close_section();
4976 }
c07f9fc5
FG
4977}
4978
4979void pg_log_dup_t::generate_test_instances(list<pg_log_dup_t*>& o)
4980{
4981 o.push_back(new pg_log_dup_t());
4982 o.push_back(new pg_log_dup_t(eversion_t(1,2),
4983 1,
4984 osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4985 0));
4986 o.push_back(new pg_log_dup_t(eversion_t(1,2),
4987 2,
4988 osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4989 -ENOENT));
4990}
4991
4992
4993std::ostream& operator<<(std::ostream& out, const pg_log_dup_t& e) {
9f95a23c 4994 out << "log_dup(reqid=" << e.reqid <<
c07f9fc5 4995 " v=" << e.version << " uv=" << e.user_version <<
9f95a23c
TL
4996 " rc=" << e.return_code;
4997 if (!e.op_returns.empty()) {
4998 out << " " << e.op_returns;
4999 }
5000 return out << ")";
c07f9fc5
FG
5001}
5002
7c673cae
FG
5003
5004// -- pg_log_t --
5005
5006// out: pg_log_t that only has entries that apply to import_pgid using curmap
5007// reject: Entries rejected from "in" are in the reject.log. Other fields not set.
5008void pg_log_t::filter_log(spg_t import_pgid, const OSDMap &curmap,
5009 const string &hit_set_namespace, const pg_log_t &in,
5010 pg_log_t &out, pg_log_t &reject)
5011{
5012 out = in;
5013 out.log.clear();
5014 reject.log.clear();
5015
9f95a23c 5016 for (auto i = in.log.cbegin(); i != in.log.cend(); ++i) {
7c673cae
FG
5017
5018 // Reject pg log entries for temporary objects
5019 if (i->soid.is_temp()) {
5020 reject.log.push_back(*i);
5021 continue;
5022 }
5023
5024 if (i->soid.nspace != hit_set_namespace) {
5025 object_t oid = i->soid.oid;
5026 object_locator_t loc(i->soid);
5027 pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
5028 pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
5029
5030 if (import_pgid.pgid == pgid) {
5031 out.log.push_back(*i);
5032 } else {
5033 reject.log.push_back(*i);
5034 }
5035 } else {
5036 out.log.push_back(*i);
5037 }
5038 }
5039}
5040
9f95a23c 5041void pg_log_t::encode(ceph::buffer::list& bl) const
7c673cae 5042{
c07f9fc5 5043 ENCODE_START(7, 3, bl);
11fdf7f2
TL
5044 encode(head, bl);
5045 encode(tail, bl);
5046 encode(log, bl);
5047 encode(can_rollback_to, bl);
5048 encode(rollback_info_trimmed_to, bl);
5049 encode(dups, bl);
7c673cae
FG
5050 ENCODE_FINISH(bl);
5051}
5052
9f95a23c 5053void pg_log_t::decode(ceph::buffer::list::const_iterator &bl, int64_t pool)
7c673cae 5054{
c07f9fc5 5055 DECODE_START_LEGACY_COMPAT_LEN(7, 3, 3, bl);
11fdf7f2
TL
5056 decode(head, bl);
5057 decode(tail, bl);
7c673cae
FG
5058 if (struct_v < 2) {
5059 bool backlog;
11fdf7f2 5060 decode(backlog, bl);
7c673cae 5061 }
11fdf7f2 5062 decode(log, bl);
7c673cae 5063 if (struct_v >= 5)
11fdf7f2 5064 decode(can_rollback_to, bl);
7c673cae
FG
5065
5066 if (struct_v >= 6)
11fdf7f2 5067 decode(rollback_info_trimmed_to, bl);
7c673cae
FG
5068 else
5069 rollback_info_trimmed_to = tail;
c07f9fc5
FG
5070
5071 if (struct_v >= 7)
11fdf7f2 5072 decode(dups, bl);
c07f9fc5 5073
7c673cae
FG
5074 DECODE_FINISH(bl);
5075
5076 // handle hobject_t format change
5077 if (struct_v < 4) {
9f95a23c 5078 for (auto i = log.begin(); i != log.end(); ++i) {
7c673cae
FG
5079 if (!i->soid.is_max() && i->soid.pool == -1)
5080 i->soid.pool = pool;
5081 }
5082 }
5083}
5084
5085void pg_log_t::dump(Formatter *f) const
5086{
5087 f->dump_stream("head") << head;
5088 f->dump_stream("tail") << tail;
5089 f->open_array_section("log");
9f95a23c 5090 for (auto p = log.cbegin(); p != log.cend(); ++p) {
7c673cae
FG
5091 f->open_object_section("entry");
5092 p->dump(f);
5093 f->close_section();
5094 }
5095 f->close_section();
c07f9fc5
FG
5096 f->open_array_section("dups");
5097 for (const auto& entry : dups) {
5098 f->open_object_section("entry");
5099 entry.dump(f);
5100 f->close_section();
5101 }
5102 f->close_section();
7c673cae
FG
5103}
5104
5105void pg_log_t::generate_test_instances(list<pg_log_t*>& o)
5106{
5107 o.push_back(new pg_log_t);
5108
5109 // this is nonsensical:
5110 o.push_back(new pg_log_t);
5111 o.back()->head = eversion_t(1,2);
5112 o.back()->tail = eversion_t(3,4);
5113 list<pg_log_entry_t*> e;
5114 pg_log_entry_t::generate_test_instances(e);
9f95a23c 5115 for (auto p = e.begin(); p != e.end(); ++p)
7c673cae
FG
5116 o.back()->log.push_back(**p);
5117}
5118
81eedcae
TL
5119static void _handle_dups(CephContext* cct, pg_log_t &target, const pg_log_t &other, unsigned maxdups)
5120{
5121 auto earliest_dup_version =
5122 target.head.version < maxdups ? 0u : target.head.version - maxdups + 1;
5123 lgeneric_subdout(cct, osd, 20) << "copy_up_to/copy_after earliest_dup_version " << earliest_dup_version << dendl;
5124
5125 for (auto d = other.dups.cbegin(); d != other.dups.cend(); ++d) {
5126 if (d->version.version >= earliest_dup_version) {
5127 lgeneric_subdout(cct, osd, 20)
5128 << "copy_up_to/copy_after copy dup version "
5129 << d->version << dendl;
5130 target.dups.push_back(pg_log_dup_t(*d));
5131 }
5132 }
5133
5134 for (auto i = other.log.cbegin(); i != other.log.cend(); ++i) {
5135 ceph_assert(i->version > other.tail);
5136 if (i->version > target.tail)
5137 break;
5138 if (i->version.version >= earliest_dup_version) {
5139 lgeneric_subdout(cct, osd, 20)
5140 << "copy_up_to/copy_after copy dup from log version "
5141 << i->version << dendl;
5142 target.dups.push_back(pg_log_dup_t(*i));
5143 }
5144 }
5145}
5146
5147
5148void pg_log_t::copy_after(CephContext* cct, const pg_log_t &other, eversion_t v)
7c673cae
FG
5149{
5150 can_rollback_to = other.can_rollback_to;
5151 head = other.head;
5152 tail = other.tail;
81eedcae 5153 lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v << dendl;
9f95a23c 5154 for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) {
11fdf7f2 5155 ceph_assert(i->version > other.tail);
7c673cae
FG
5156 if (i->version <= v) {
5157 // make tail accurate.
5158 tail = i->version;
5159 break;
5160 }
81eedcae 5161 lgeneric_subdout(cct, osd, 20) << __func__ << " copy log version " << i->version << dendl;
7c673cae
FG
5162 log.push_front(*i);
5163 }
81eedcae 5164 _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked);
7c673cae
FG
5165}
5166
81eedcae 5167void pg_log_t::copy_up_to(CephContext* cct, const pg_log_t &other, int max)
7c673cae
FG
5168{
5169 can_rollback_to = other.can_rollback_to;
5170 int n = 0;
5171 head = other.head;
5172 tail = other.tail;
81eedcae 5173 lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max << dendl;
9f95a23c 5174 for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) {
81eedcae 5175 ceph_assert(i->version > other.tail);
7c673cae
FG
5176 if (n++ >= max) {
5177 tail = i->version;
5178 break;
5179 }
81eedcae 5180 lgeneric_subdout(cct, osd, 20) << __func__ << " copy log version " << i->version << dendl;
7c673cae
FG
5181 log.push_front(*i);
5182 }
81eedcae 5183 _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked);
7c673cae
FG
5184}
5185
c07f9fc5 5186ostream& pg_log_t::print(ostream& out) const
7c673cae
FG
5187{
5188 out << *this << std::endl;
9f95a23c 5189 for (auto p = log.cbegin(); p != log.cend(); ++p)
7c673cae 5190 out << *p << std::endl;
c07f9fc5
FG
5191 for (const auto& entry : dups) {
5192 out << " dup entry: " << entry << std::endl;
5193 }
7c673cae
FG
5194 return out;
5195}
5196
5197// -- pg_missing_t --
5198
5199ostream& operator<<(ostream& out, const pg_missing_item& i)
5200{
5201 out << i.need;
5202 if (i.have != eversion_t())
5203 out << "(" << i.have << ")";
9f95a23c
TL
5204 out << " flags = " << i.flag_str()
5205 << " " << i.clean_regions;
7c673cae
FG
5206 return out;
5207}
5208
5209// -- object_copy_cursor_t --
5210
9f95a23c 5211void object_copy_cursor_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
5212{
5213 ENCODE_START(1, 1, bl);
11fdf7f2
TL
5214 encode(attr_complete, bl);
5215 encode(data_offset, bl);
5216 encode(data_complete, bl);
5217 encode(omap_offset, bl);
5218 encode(omap_complete, bl);
7c673cae
FG
5219 ENCODE_FINISH(bl);
5220}
5221
9f95a23c 5222void object_copy_cursor_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
5223{
5224 DECODE_START(1, bl);
11fdf7f2
TL
5225 decode(attr_complete, bl);
5226 decode(data_offset, bl);
5227 decode(data_complete, bl);
5228 decode(omap_offset, bl);
5229 decode(omap_complete, bl);
7c673cae
FG
5230 DECODE_FINISH(bl);
5231}
5232
5233void object_copy_cursor_t::dump(Formatter *f) const
5234{
5235 f->dump_unsigned("attr_complete", (int)attr_complete);
5236 f->dump_unsigned("data_offset", data_offset);
5237 f->dump_unsigned("data_complete", (int)data_complete);
5238 f->dump_string("omap_offset", omap_offset);
5239 f->dump_unsigned("omap_complete", (int)omap_complete);
5240}
5241
5242void object_copy_cursor_t::generate_test_instances(list<object_copy_cursor_t*>& o)
5243{
5244 o.push_back(new object_copy_cursor_t);
5245 o.push_back(new object_copy_cursor_t);
5246 o.back()->attr_complete = true;
5247 o.back()->data_offset = 123;
5248 o.push_back(new object_copy_cursor_t);
5249 o.back()->attr_complete = true;
5250 o.back()->data_complete = true;
5251 o.back()->omap_offset = "foo";
5252 o.push_back(new object_copy_cursor_t);
5253 o.back()->attr_complete = true;
5254 o.back()->data_complete = true;
5255 o.back()->omap_complete = true;
5256}
5257
5258// -- object_copy_data_t --
5259
9f95a23c 5260void object_copy_data_t::encode(ceph::buffer::list& bl, uint64_t features) const
7c673cae 5261{
11fdf7f2
TL
5262 ENCODE_START(8, 5, bl);
5263 encode(size, bl);
5264 encode(mtime, bl);
5265 encode(attrs, bl);
5266 encode(data, bl);
5267 encode(omap_data, bl);
5268 encode(cursor, bl);
5269 encode(omap_header, bl);
5270 encode(snaps, bl);
5271 encode(snap_seq, bl);
5272 encode(flags, bl);
5273 encode(data_digest, bl);
5274 encode(omap_digest, bl);
5275 encode(reqids, bl);
5276 encode(truncate_seq, bl);
5277 encode(truncate_size, bl);
5278 encode(reqid_return_codes, bl);
7c673cae
FG
5279 ENCODE_FINISH(bl);
5280}
5281
9f95a23c 5282void object_copy_data_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae 5283{
9f95a23c 5284 DECODE_START(8, bl);
7c673cae
FG
5285 if (struct_v < 5) {
5286 // old
11fdf7f2
TL
5287 decode(size, bl);
5288 decode(mtime, bl);
7c673cae
FG
5289 {
5290 string category;
11fdf7f2 5291 decode(category, bl); // no longer used
7c673cae 5292 }
11fdf7f2
TL
5293 decode(attrs, bl);
5294 decode(data, bl);
7c673cae 5295 {
9f95a23c 5296 map<string,ceph::buffer::list> omap;
11fdf7f2 5297 decode(omap, bl);
7c673cae 5298 omap_data.clear();
11fdf7f2
TL
5299 if (!omap.empty()) {
5300 using ceph::encode;
5301 encode(omap, omap_data);
5302 }
7c673cae 5303 }
11fdf7f2 5304 decode(cursor, bl);
7c673cae 5305 if (struct_v >= 2)
11fdf7f2 5306 decode(omap_header, bl);
7c673cae 5307 if (struct_v >= 3) {
11fdf7f2
TL
5308 decode(snaps, bl);
5309 decode(snap_seq, bl);
7c673cae
FG
5310 } else {
5311 snaps.clear();
5312 snap_seq = 0;
5313 }
5314 if (struct_v >= 4) {
11fdf7f2
TL
5315 decode(flags, bl);
5316 decode(data_digest, bl);
5317 decode(omap_digest, bl);
7c673cae
FG
5318 }
5319 } else {
5320 // current
11fdf7f2
TL
5321 decode(size, bl);
5322 decode(mtime, bl);
5323 decode(attrs, bl);
5324 decode(data, bl);
5325 decode(omap_data, bl);
5326 decode(cursor, bl);
5327 decode(omap_header, bl);
5328 decode(snaps, bl);
5329 decode(snap_seq, bl);
7c673cae 5330 if (struct_v >= 4) {
11fdf7f2
TL
5331 decode(flags, bl);
5332 decode(data_digest, bl);
5333 decode(omap_digest, bl);
7c673cae
FG
5334 }
5335 if (struct_v >= 6) {
11fdf7f2 5336 decode(reqids, bl);
7c673cae
FG
5337 }
5338 if (struct_v >= 7) {
11fdf7f2
TL
5339 decode(truncate_seq, bl);
5340 decode(truncate_size, bl);
5341 }
5342 if (struct_v >= 8) {
5343 decode(reqid_return_codes, bl);
7c673cae
FG
5344 }
5345 }
5346 DECODE_FINISH(bl);
5347}
5348
5349void object_copy_data_t::generate_test_instances(list<object_copy_data_t*>& o)
5350{
5351 o.push_back(new object_copy_data_t());
5352
5353 list<object_copy_cursor_t*> cursors;
5354 object_copy_cursor_t::generate_test_instances(cursors);
9f95a23c 5355 auto ci = cursors.begin();
7c673cae
FG
5356 o.back()->cursor = **(ci++);
5357
5358 o.push_back(new object_copy_data_t());
5359 o.back()->cursor = **(ci++);
5360
5361 o.push_back(new object_copy_data_t());
5362 o.back()->size = 1234;
5363 o.back()->mtime.set_from_double(1234);
9f95a23c
TL
5364 ceph::buffer::ptr bp("there", 5);
5365 ceph::buffer::list bl;
7c673cae
FG
5366 bl.push_back(bp);
5367 o.back()->attrs["hello"] = bl;
9f95a23c
TL
5368 ceph::buffer::ptr bp2("not", 3);
5369 ceph::buffer::list bl2;
7c673cae 5370 bl2.push_back(bp2);
9f95a23c 5371 map<string,ceph::buffer::list> omap;
7c673cae 5372 omap["why"] = bl2;
11fdf7f2
TL
5373 using ceph::encode;
5374 encode(omap, o.back()->omap_data);
9f95a23c 5375 ceph::buffer::ptr databp("iamsomedatatocontain", 20);
7c673cae
FG
5376 o.back()->data.push_back(databp);
5377 o.back()->omap_header.append("this is an omap header");
5378 o.back()->snaps.push_back(123);
5379 o.back()->reqids.push_back(make_pair(osd_reqid_t(), version_t()));
5380}
5381
5382void object_copy_data_t::dump(Formatter *f) const
5383{
5384 f->open_object_section("cursor");
5385 cursor.dump(f);
5386 f->close_section(); // cursor
5387 f->dump_int("size", size);
5388 f->dump_stream("mtime") << mtime;
9f95a23c 5389 /* we should really print out the attrs here, but ceph::buffer::list
7c673cae
FG
5390 const-correctness prevents that */
5391 f->dump_int("attrs_size", attrs.size());
5392 f->dump_int("flags", flags);
5393 f->dump_unsigned("data_digest", data_digest);
5394 f->dump_unsigned("omap_digest", omap_digest);
5395 f->dump_int("omap_data_length", omap_data.length());
5396 f->dump_int("omap_header_length", omap_header.length());
5397 f->dump_int("data_length", data.length());
5398 f->open_array_section("snaps");
9f95a23c 5399 for (auto p = snaps.cbegin(); p != snaps.cend(); ++p)
7c673cae
FG
5400 f->dump_unsigned("snap", *p);
5401 f->close_section();
5402 f->open_array_section("reqids");
11fdf7f2 5403 uint32_t idx = 0;
31f18b77 5404 for (auto p = reqids.begin();
7c673cae 5405 p != reqids.end();
11fdf7f2 5406 ++idx, ++p) {
7c673cae
FG
5407 f->open_object_section("extra_reqid");
5408 f->dump_stream("reqid") << p->first;
5409 f->dump_stream("user_version") << p->second;
11fdf7f2
TL
5410 auto it = reqid_return_codes.find(idx);
5411 if (it != reqid_return_codes.end()) {
5412 f->dump_int("return_code", it->second);
5413 }
7c673cae
FG
5414 f->close_section();
5415 }
5416 f->close_section();
5417}
5418
5419// -- pg_create_t --
5420
9f95a23c 5421void pg_create_t::encode(ceph::buffer::list &bl) const
7c673cae
FG
5422{
5423 ENCODE_START(1, 1, bl);
11fdf7f2
TL
5424 encode(created, bl);
5425 encode(parent, bl);
5426 encode(split_bits, bl);
7c673cae
FG
5427 ENCODE_FINISH(bl);
5428}
5429
9f95a23c 5430void pg_create_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
5431{
5432 DECODE_START(1, bl);
11fdf7f2
TL
5433 decode(created, bl);
5434 decode(parent, bl);
5435 decode(split_bits, bl);
7c673cae
FG
5436 DECODE_FINISH(bl);
5437}
5438
5439void pg_create_t::dump(Formatter *f) const
5440{
5441 f->dump_unsigned("created", created);
5442 f->dump_stream("parent") << parent;
5443 f->dump_int("split_bits", split_bits);
5444}
5445
5446void pg_create_t::generate_test_instances(list<pg_create_t*>& o)
5447{
5448 o.push_back(new pg_create_t);
11fdf7f2 5449 o.push_back(new pg_create_t(1, pg_t(3, 4), 2));
7c673cae
FG
5450}
5451
5452
5453// -- pg_hit_set_info_t --
5454
9f95a23c 5455void pg_hit_set_info_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
5456{
5457 ENCODE_START(2, 1, bl);
11fdf7f2
TL
5458 encode(begin, bl);
5459 encode(end, bl);
5460 encode(version, bl);
5461 encode(using_gmt, bl);
7c673cae
FG
5462 ENCODE_FINISH(bl);
5463}
5464
9f95a23c 5465void pg_hit_set_info_t::decode(ceph::buffer::list::const_iterator& p)
7c673cae
FG
5466{
5467 DECODE_START(2, p);
11fdf7f2
TL
5468 decode(begin, p);
5469 decode(end, p);
5470 decode(version, p);
7c673cae 5471 if (struct_v >= 2) {
11fdf7f2 5472 decode(using_gmt, p);
7c673cae
FG
5473 } else {
5474 using_gmt = false;
5475 }
5476 DECODE_FINISH(p);
5477}
5478
5479void pg_hit_set_info_t::dump(Formatter *f) const
5480{
5481 f->dump_stream("begin") << begin;
5482 f->dump_stream("end") << end;
5483 f->dump_stream("version") << version;
5484 f->dump_stream("using_gmt") << using_gmt;
5485}
5486
5487void pg_hit_set_info_t::generate_test_instances(list<pg_hit_set_info_t*>& ls)
5488{
5489 ls.push_back(new pg_hit_set_info_t);
5490 ls.push_back(new pg_hit_set_info_t);
5491 ls.back()->begin = utime_t(1, 2);
5492 ls.back()->end = utime_t(3, 4);
5493}
5494
5495
5496// -- pg_hit_set_history_t --
5497
9f95a23c 5498void pg_hit_set_history_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
5499{
5500 ENCODE_START(1, 1, bl);
11fdf7f2 5501 encode(current_last_update, bl);
7c673cae
FG
5502 {
5503 utime_t dummy_stamp;
11fdf7f2 5504 encode(dummy_stamp, bl);
7c673cae
FG
5505 }
5506 {
5507 pg_hit_set_info_t dummy_info;
11fdf7f2 5508 encode(dummy_info, bl);
7c673cae 5509 }
11fdf7f2 5510 encode(history, bl);
7c673cae
FG
5511 ENCODE_FINISH(bl);
5512}
5513
9f95a23c 5514void pg_hit_set_history_t::decode(ceph::buffer::list::const_iterator& p)
7c673cae
FG
5515{
5516 DECODE_START(1, p);
11fdf7f2 5517 decode(current_last_update, p);
7c673cae
FG
5518 {
5519 utime_t dummy_stamp;
11fdf7f2 5520 decode(dummy_stamp, p);
7c673cae
FG
5521 }
5522 {
5523 pg_hit_set_info_t dummy_info;
11fdf7f2 5524 decode(dummy_info, p);
7c673cae 5525 }
11fdf7f2 5526 decode(history, p);
7c673cae
FG
5527 DECODE_FINISH(p);
5528}
5529
5530void pg_hit_set_history_t::dump(Formatter *f) const
5531{
5532 f->dump_stream("current_last_update") << current_last_update;
5533 f->open_array_section("history");
9f95a23c 5534 for (auto p = history.cbegin(); p != history.cend(); ++p) {
7c673cae
FG
5535 f->open_object_section("info");
5536 p->dump(f);
5537 f->close_section();
5538 }
5539 f->close_section();
5540}
5541
5542void pg_hit_set_history_t::generate_test_instances(list<pg_hit_set_history_t*>& ls)
5543{
5544 ls.push_back(new pg_hit_set_history_t);
5545 ls.push_back(new pg_hit_set_history_t);
5546 ls.back()->current_last_update = eversion_t(1, 2);
5547 ls.back()->history.push_back(pg_hit_set_info_t());
5548}
5549
7c673cae
FG
5550// -- OSDSuperblock --
5551
9f95a23c 5552void OSDSuperblock::encode(ceph::buffer::list &bl) const
7c673cae 5553{
9f95a23c 5554 ENCODE_START(9, 5, bl);
11fdf7f2
TL
5555 encode(cluster_fsid, bl);
5556 encode(whoami, bl);
5557 encode(current_epoch, bl);
5558 encode(oldest_map, bl);
5559 encode(newest_map, bl);
5560 encode(weight, bl);
7c673cae 5561 compat_features.encode(bl);
11fdf7f2
TL
5562 encode(clean_thru, bl);
5563 encode(mounted, bl);
5564 encode(osd_fsid, bl);
5565 encode((epoch_t)0, bl); // epoch_t last_epoch_marked_full
5566 encode((uint32_t)0, bl); // map<int64_t,epoch_t> pool_last_epoch_marked_full
9f95a23c
TL
5567 encode(purged_snaps_last, bl);
5568 encode(last_purged_snaps_scrub, bl);
7c673cae
FG
5569 ENCODE_FINISH(bl);
5570}
5571
9f95a23c 5572void OSDSuperblock::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 5573{
9f95a23c 5574 DECODE_START_LEGACY_COMPAT_LEN(9, 5, 5, bl);
7c673cae
FG
5575 if (struct_v < 3) {
5576 string magic;
11fdf7f2
TL
5577 decode(magic, bl);
5578 }
5579 decode(cluster_fsid, bl);
5580 decode(whoami, bl);
5581 decode(current_epoch, bl);
5582 decode(oldest_map, bl);
5583 decode(newest_map, bl);
5584 decode(weight, bl);
7c673cae
FG
5585 if (struct_v >= 2) {
5586 compat_features.decode(bl);
5587 } else { //upgrade it!
5588 compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
5589 }
11fdf7f2
TL
5590 decode(clean_thru, bl);
5591 decode(mounted, bl);
7c673cae 5592 if (struct_v >= 4)
11fdf7f2 5593 decode(osd_fsid, bl);
7c673cae
FG
5594 if (struct_v >= 6) {
5595 epoch_t last_map_marked_full;
11fdf7f2 5596 decode(last_map_marked_full, bl);
7c673cae
FG
5597 }
5598 if (struct_v >= 7) {
5599 map<int64_t,epoch_t> pool_last_map_marked_full;
11fdf7f2 5600 decode(pool_last_map_marked_full, bl);
7c673cae 5601 }
9f95a23c
TL
5602 if (struct_v >= 9) {
5603 decode(purged_snaps_last, bl);
5604 decode(last_purged_snaps_scrub, bl);
5605 } else {
5606 purged_snaps_last = 0;
5607 }
7c673cae
FG
5608 DECODE_FINISH(bl);
5609}
5610
5611void OSDSuperblock::dump(Formatter *f) const
5612{
5613 f->dump_stream("cluster_fsid") << cluster_fsid;
5614 f->dump_stream("osd_fsid") << osd_fsid;
5615 f->dump_int("whoami", whoami);
5616 f->dump_int("current_epoch", current_epoch);
5617 f->dump_int("oldest_map", oldest_map);
5618 f->dump_int("newest_map", newest_map);
5619 f->dump_float("weight", weight);
5620 f->open_object_section("compat");
5621 compat_features.dump(f);
5622 f->close_section();
5623 f->dump_int("clean_thru", clean_thru);
5624 f->dump_int("last_epoch_mounted", mounted);
9f95a23c
TL
5625 f->dump_unsigned("purged_snaps_last", purged_snaps_last);
5626 f->dump_stream("last_purged_snaps_scrub") << last_purged_snaps_scrub;
7c673cae
FG
5627}
5628
5629void OSDSuperblock::generate_test_instances(list<OSDSuperblock*>& o)
5630{
5631 OSDSuperblock z;
5632 o.push_back(new OSDSuperblock(z));
11fdf7f2
TL
5633 z.cluster_fsid.parse("01010101-0101-0101-0101-010101010101");
5634 z.osd_fsid.parse("02020202-0202-0202-0202-020202020202");
7c673cae
FG
5635 z.whoami = 3;
5636 z.current_epoch = 4;
5637 z.oldest_map = 5;
5638 z.newest_map = 9;
5639 z.mounted = 8;
5640 z.clean_thru = 7;
5641 o.push_back(new OSDSuperblock(z));
5642 o.push_back(new OSDSuperblock(z));
5643}
5644
5645// -- SnapSet --
5646
9f95a23c 5647void SnapSet::encode(ceph::buffer::list& bl) const
7c673cae
FG
5648{
5649 ENCODE_START(3, 2, bl);
11fdf7f2
TL
5650 encode(seq, bl);
5651 encode(true, bl); // head_exists
5652 encode(snaps, bl);
5653 encode(clones, bl);
5654 encode(clone_overlap, bl);
5655 encode(clone_size, bl);
5656 encode(clone_snaps, bl);
7c673cae
FG
5657 ENCODE_FINISH(bl);
5658}
5659
9f95a23c 5660void SnapSet::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
5661{
5662 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
11fdf7f2 5663 decode(seq, bl);
9f95a23c 5664 bl += 1u; // skip legacy head_exists (always true)
11fdf7f2
TL
5665 decode(snaps, bl);
5666 decode(clones, bl);
5667 decode(clone_overlap, bl);
5668 decode(clone_size, bl);
7c673cae 5669 if (struct_v >= 3) {
11fdf7f2 5670 decode(clone_snaps, bl);
7c673cae
FG
5671 } else {
5672 clone_snaps.clear();
5673 }
5674 DECODE_FINISH(bl);
5675}
5676
5677void SnapSet::dump(Formatter *f) const
5678{
9f95a23c 5679 f->dump_unsigned("seq", seq);
7c673cae 5680 f->open_array_section("clones");
9f95a23c 5681 for (auto p = clones.cbegin(); p != clones.cend(); ++p) {
7c673cae
FG
5682 f->open_object_section("clone");
5683 f->dump_unsigned("snap", *p);
94b18763
FG
5684 auto cs = clone_size.find(*p);
5685 if (cs != clone_size.end())
5686 f->dump_unsigned("size", cs->second);
5687 else
5688 f->dump_string("size", "????");
5689 auto co = clone_overlap.find(*p);
5690 if (co != clone_overlap.end())
5691 f->dump_stream("overlap") << co->second;
5692 else
5693 f->dump_stream("overlap") << "????";
7c673cae
FG
5694 auto q = clone_snaps.find(*p);
5695 if (q != clone_snaps.end()) {
5696 f->open_array_section("snaps");
5697 for (auto s : q->second) {
5698 f->dump_unsigned("snap", s);
5699 }
5700 f->close_section();
5701 }
5702 f->close_section();
5703 }
5704 f->close_section();
5705}
5706
5707void SnapSet::generate_test_instances(list<SnapSet*>& o)
5708{
5709 o.push_back(new SnapSet);
5710 o.push_back(new SnapSet);
7c673cae
FG
5711 o.back()->seq = 123;
5712 o.back()->snaps.push_back(123);
5713 o.back()->snaps.push_back(12);
5714 o.push_back(new SnapSet);
7c673cae
FG
5715 o.back()->seq = 123;
5716 o.back()->snaps.push_back(123);
5717 o.back()->snaps.push_back(12);
5718 o.back()->clones.push_back(12);
5719 o.back()->clone_size[12] = 12345;
5720 o.back()->clone_overlap[12];
5721 o.back()->clone_snaps[12] = {12, 10, 8};
5722}
5723
5724ostream& operator<<(ostream& out, const SnapSet& cs)
5725{
11fdf7f2
TL
5726 return out << cs.seq << "=" << cs.snaps << ":"
5727 << cs.clone_snaps;
7c673cae
FG
5728}
5729
5730void SnapSet::from_snap_set(const librados::snap_set_t& ss, bool legacy)
5731{
5732 // NOTE: our reconstruction of snaps (and the snapc) is not strictly
5733 // correct: it will not include snaps that still logically exist
5734 // but for which there was no clone that is defined. For all
5735 // practical purposes this doesn't matter, since we only use that
5736 // information to clone on the OSD, and we have already moved
5737 // forward past that part of the object history.
5738
5739 seq = ss.seq;
5740 set<snapid_t> _snaps;
5741 set<snapid_t> _clones;
9f95a23c 5742 for (auto p = ss.clones.cbegin(); p != ss.clones.cend(); ++p) {
11fdf7f2 5743 if (p->cloneid != librados::SNAP_HEAD) {
7c673cae
FG
5744 _clones.insert(p->cloneid);
5745 _snaps.insert(p->snaps.begin(), p->snaps.end());
5746 clone_size[p->cloneid] = p->size;
5747 clone_overlap[p->cloneid]; // the entry must exist, even if it's empty.
9f95a23c 5748 for (auto q = p->overlap.cbegin(); q != p->overlap.cend(); ++q)
7c673cae
FG
5749 clone_overlap[p->cloneid].insert(q->first, q->second);
5750 if (!legacy) {
5751 // p->snaps is ascending; clone_snaps is descending
5752 vector<snapid_t>& v = clone_snaps[p->cloneid];
5753 for (auto q = p->snaps.rbegin(); q != p->snaps.rend(); ++q) {
5754 v.push_back(*q);
5755 }
5756 }
5757 }
5758 }
5759
5760 // ascending
5761 clones.clear();
5762 clones.reserve(_clones.size());
9f95a23c 5763 for (auto p = _clones.begin(); p != _clones.end(); ++p)
7c673cae
FG
5764 clones.push_back(*p);
5765
5766 // descending
5767 snaps.clear();
5768 snaps.reserve(_snaps.size());
9f95a23c 5769 for (auto p = _snaps.rbegin();
7c673cae
FG
5770 p != _snaps.rend(); ++p)
5771 snaps.push_back(*p);
5772}
5773
5774uint64_t SnapSet::get_clone_bytes(snapid_t clone) const
5775{
11fdf7f2 5776 ceph_assert(clone_size.count(clone));
7c673cae 5777 uint64_t size = clone_size.find(clone)->second;
11fdf7f2 5778 ceph_assert(clone_overlap.count(clone));
7c673cae 5779 const interval_set<uint64_t> &overlap = clone_overlap.find(clone)->second;
11fdf7f2
TL
5780 ceph_assert(size >= (uint64_t)overlap.size());
5781 return size - overlap.size();
7c673cae
FG
5782}
5783
5784void SnapSet::filter(const pg_pool_t &pinfo)
5785{
5786 vector<snapid_t> oldsnaps;
5787 oldsnaps.swap(snaps);
9f95a23c 5788 for (auto i = oldsnaps.cbegin(); i != oldsnaps.cend(); ++i) {
7c673cae
FG
5789 if (!pinfo.is_removed_snap(*i))
5790 snaps.push_back(*i);
5791 }
5792}
5793
5794SnapSet SnapSet::get_filtered(const pg_pool_t &pinfo) const
5795{
5796 SnapSet ss = *this;
5797 ss.filter(pinfo);
5798 return ss;
5799}
5800
5801// -- watch_info_t --
5802
9f95a23c 5803void watch_info_t::encode(ceph::buffer::list& bl, uint64_t features) const
7c673cae
FG
5804{
5805 ENCODE_START(4, 3, bl);
11fdf7f2
TL
5806 encode(cookie, bl);
5807 encode(timeout_seconds, bl);
5808 encode(addr, bl, features);
7c673cae
FG
5809 ENCODE_FINISH(bl);
5810}
5811
9f95a23c 5812void watch_info_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
5813{
5814 DECODE_START_LEGACY_COMPAT_LEN(4, 3, 3, bl);
11fdf7f2 5815 decode(cookie, bl);
7c673cae
FG
5816 if (struct_v < 2) {
5817 uint64_t ver;
11fdf7f2 5818 decode(ver, bl);
7c673cae 5819 }
11fdf7f2 5820 decode(timeout_seconds, bl);
7c673cae 5821 if (struct_v >= 4) {
11fdf7f2 5822 decode(addr, bl);
7c673cae
FG
5823 }
5824 DECODE_FINISH(bl);
5825}
5826
5827void watch_info_t::dump(Formatter *f) const
5828{
5829 f->dump_unsigned("cookie", cookie);
5830 f->dump_unsigned("timeout_seconds", timeout_seconds);
5831 f->open_object_section("addr");
5832 addr.dump(f);
5833 f->close_section();
5834}
5835
5836void watch_info_t::generate_test_instances(list<watch_info_t*>& o)
5837{
5838 o.push_back(new watch_info_t);
5839 o.push_back(new watch_info_t);
5840 o.back()->cookie = 123;
5841 o.back()->timeout_seconds = 99;
5842 entity_addr_t ea;
5843 ea.set_type(entity_addr_t::TYPE_LEGACY);
5844 ea.set_nonce(1);
5845 ea.set_family(AF_INET);
5846 ea.set_in4_quad(0, 127);
5847 ea.set_in4_quad(1, 0);
5848 ea.set_in4_quad(2, 1);
5849 ea.set_in4_quad(3, 2);
5850 ea.set_port(2);
5851 o.back()->addr = ea;
5852}
5853
11fdf7f2
TL
5854// -- chunk_info_t --
5855
9f95a23c 5856void chunk_info_t::encode(ceph::buffer::list& bl) const
11fdf7f2
TL
5857{
5858 ENCODE_START(1, 1, bl);
5859 encode(offset, bl);
5860 encode(length, bl);
5861 encode(oid, bl);
5862 __u32 _flags = flags;
5863 encode(_flags, bl);
5864 ENCODE_FINISH(bl);
5865}
5866
9f95a23c 5867void chunk_info_t::decode(ceph::buffer::list::const_iterator& bl)
11fdf7f2
TL
5868{
5869 DECODE_START(1, bl);
5870 decode(offset, bl);
5871 decode(length, bl);
5872 decode(oid, bl);
5873 __u32 _flags;
5874 decode(_flags, bl);
5875 flags = (cflag_t)_flags;
5876 DECODE_FINISH(bl);
5877}
5878
5879void chunk_info_t::dump(Formatter *f) const
5880{
5881 f->dump_unsigned("length", length);
5882 f->open_object_section("oid");
5883 oid.dump(f);
5884 f->close_section();
5885 f->dump_unsigned("flags", flags);
5886}
5887
f67539c2
TL
5888
5889bool chunk_info_t::operator==(const chunk_info_t& cit) const
5890{
5891 if (has_fingerprint()) {
5892 if (oid.oid.name == cit.oid.oid.name) {
5893 return true;
5894 }
5895 } else {
5896 if (offset == cit.offset && length == cit.length &&
5897 oid.oid.name == cit.oid.oid.name) {
5898 return true;
5899 }
5900
5901 }
5902 return false;
5903}
5904
5905bool operator==(const std::pair<const long unsigned int, chunk_info_t> & l,
5906 const std::pair<const long unsigned int, chunk_info_t> & r)
5907{
5908 return l.first == r.first &&
5909 l.second == r.second;
5910}
5911
11fdf7f2
TL
5912ostream& operator<<(ostream& out, const chunk_info_t& ci)
5913{
5914 return out << "(len: " << ci.length << " oid: " << ci.oid
5915 << " offset: " << ci.offset
5916 << " flags: " << ci.get_flag_string(ci.flags) << ")";
5917}
5918
31f18b77
FG
5919// -- object_manifest_t --
5920
f67539c2
TL
5921std::ostream& operator<<(std::ostream& out, const object_ref_delta_t & ci)
5922{
5923 return out << ci.ref_delta << std::endl;
5924}
5925
5926void object_manifest_t::calc_refs_to_inc_on_set(
5927 const object_manifest_t* _g,
5928 const object_manifest_t* _l,
5929 object_ref_delta_t &refs) const
5930{
5931 /* avoid to increment the same reference on adjacent clones */
5932 auto iter = chunk_map.begin();
5933 auto find_chunk = [](decltype(iter) &i, const object_manifest_t* cur)
5934 -> bool {
5935 if (cur) {
5936 auto c = cur->chunk_map.find(i->first);
5937 if (c != cur->chunk_map.end() && c->second == i->second) {
5938 return true;
5939
5940 }
5941 }
5942 return false;
5943 };
5944
5945 /* If at least a same chunk exists on either _g or _l, do not increment
5946 * the reference
5947 *
5948 * head: [0, 2) ccc, [6, 2) bbb, [8, 2) ccc
5949 * 20: [0, 2) aaa, <- set_chunk
5950 * 30: [0, 2) abc, [6, 2) bbb, [8, 2) ccc
5951 * --> incremnt the reference
5952 *
5953 * head: [0, 2) ccc, [6, 2) bbb, [8, 2) ccc
5954 * 20: [0, 2) ccc, <- set_chunk
5955 * 30: [0, 2) abc, [6, 2) bbb, [8, 2) ccc
5956 * --> do not need to increment
5957 *
5958 * head: [0, 2) ccc, [6, 2) bbb, [8, 2) ccc
5959 * 20: [0, 2) ccc, <- set_chunk
5960 * 30: [0, 2) ccc, [6, 2) bbb, [8, 2) ccc
5961 * --> decrement the reference of ccc
5962 *
5963 */
5964 for (; iter != chunk_map.end(); ++iter) {
5965 auto found_g = find_chunk(iter, _g);
5966 auto found_l = find_chunk(iter, _l);
5967 if (!found_g && !found_l) {
5968 refs.inc_ref(iter->second.oid);
5969 } else if (found_g && found_l) {
5970 refs.dec_ref(iter->second.oid);
5971 }
5972 }
5973}
5974
5975void object_manifest_t::calc_refs_to_drop_on_modify(
5976 const object_manifest_t* _l,
5977 const ObjectCleanRegions& clean_regions,
5978 object_ref_delta_t &refs) const
5979{
5980 for (auto &p : chunk_map) {
5981 if (!clean_regions.is_clean_region(p.first, p.second.length)) {
5982 // has previous snapshot
5983 if (_l) {
5984 /*
5985 * Let's assume that there is a manifest snapshotted object which has three chunks
5986 * head: [0, 2) aaa, [6, 2) bbb, [8, 2) ccc
5987 * 20: [0, 2) aaa, [6, 2) bbb, [8, 2) ccc
5988 *
5989 * If we modify [6, 2) at head, we shouldn't decrement bbb's refcount because
5990 * 20 has the reference for bbb. Therefore, we only drop the reference if two chunks
5991 * (head: [6, 2) and 20: [6, 2)) are different.
5992 *
5993 */
5994 auto c = _l->chunk_map.find(p.first);
5995 if (c != _l->chunk_map.end()) {
5996 if (p.second == c->second) {
5997 continue;
5998 }
5999 }
6000 refs.dec_ref(p.second.oid);
6001 } else {
6002 // decrement the reference of the updated chunks if the manifest object has no snapshot
6003 refs.dec_ref(p.second.oid);
6004 }
6005 }
6006 }
6007}
6008
6009void object_manifest_t::calc_refs_to_drop_on_removal(
6010 const object_manifest_t* _g,
6011 const object_manifest_t* _l,
6012 object_ref_delta_t &refs) const
6013{
6014 /* At a high level, the rule is that consecutive clones with the same reference
6015 * at the same offset share a reference. As such, removing *this may result
6016 * in removing references in two cases:
6017 * 1) *this has a reference which it shares with neither _g nor _l
6018 * 2) _g and _l have a reference which they share with each other but not
6019 * *this.
6020 *
6021 * For a particular offset, both 1 and 2 can happen.
6022 *
6023 * Notably, this means that to evaluate the reference change from removing
6024 * the object with *this, we only need to look at the two adjacent clones.
6025 */
6026
6027 // Paper over possibly missing _g or _l -- nullopt is semantically the same
6028 // as an empty chunk_map
6029 static const object_manifest_t empty;
6030 const object_manifest_t &g = _g ? *_g : empty;
6031 const object_manifest_t &l = _l ? *_l : empty;
6032
6033 auto giter = g.chunk_map.begin();
6034 auto iter = chunk_map.begin();
6035 auto liter = l.chunk_map.begin();
6036
6037 // Translate iter, map pair to the current offset, end() -> max
6038 auto get_offset = [](decltype(iter) &i, const object_manifest_t &manifest)
6039 -> uint64_t {
6040 return i == manifest.chunk_map.end() ?
6041 std::numeric_limits<uint64_t>::max() : i->first;
6042 };
6043
6044 /* If current matches the offset at iter, returns the chunk at *iter
6045 * and increments iter. Otherwise, returns nullptr.
6046 *
6047 * current will always be derived from the min of *giter, *iter, and
6048 * *liter on each cycle, so the result will be that each loop iteration
6049 * will pick up all chunks at the offest being considered, each offset
6050 * will be considered once, and all offsets will be considered.
6051 */
6052 auto get_chunk = [](
6053 uint64_t current, decltype(iter) &i, const object_manifest_t &manifest)
6054 -> const chunk_info_t * {
6055 if (i == manifest.chunk_map.end() || current != i->first) {
6056 return nullptr;
6057 } else {
6058 return &(i++)->second;
6059 }
6060 };
6061
6062 while (giter != g.chunk_map.end() ||
6063 iter != chunk_map.end() ||
6064 liter != l.chunk_map.end()) {
6065 auto current = std::min(
6066 std::min(get_offset(giter, g), get_offset(iter, *this)),
6067 get_offset(liter, l));
6068
6069 auto gchunk = get_chunk(current, giter, g);
6070 auto chunk = get_chunk(current, iter, *this);
6071 auto lchunk = get_chunk(current, liter, l);
6072
6073 if (gchunk && lchunk && *gchunk == *lchunk &&
6074 (!chunk || *gchunk != *chunk)) {
6075 // case 1 from above: l and g match, chunk does not
6076 refs.dec_ref(gchunk->oid);
6077 }
6078
6079 if (chunk &&
6080 (!gchunk || chunk->oid != gchunk->oid) &&
6081 (!lchunk || chunk->oid != lchunk->oid)) {
6082 // case 2 from above: *this matches neither
6083 refs.dec_ref(chunk->oid);
6084 }
6085 }
6086}
6087
9f95a23c 6088void object_manifest_t::encode(ceph::buffer::list& bl) const
31f18b77
FG
6089{
6090 ENCODE_START(1, 1, bl);
11fdf7f2 6091 encode(type, bl);
31f18b77
FG
6092 switch (type) {
6093 case TYPE_NONE: break;
6094 case TYPE_REDIRECT:
11fdf7f2
TL
6095 encode(redirect_target, bl);
6096 break;
6097 case TYPE_CHUNKED:
9f95a23c 6098 encode(chunk_map, bl);
31f18b77
FG
6099 break;
6100 default:
6101 ceph_abort();
6102 }
6103 ENCODE_FINISH(bl);
6104}
6105
9f95a23c 6106void object_manifest_t::decode(ceph::buffer::list::const_iterator& bl)
31f18b77
FG
6107{
6108 DECODE_START(1, bl);
11fdf7f2 6109 decode(type, bl);
31f18b77
FG
6110 switch (type) {
6111 case TYPE_NONE: break;
6112 case TYPE_REDIRECT:
11fdf7f2
TL
6113 decode(redirect_target, bl);
6114 break;
6115 case TYPE_CHUNKED:
6116 decode(chunk_map, bl);
31f18b77
FG
6117 break;
6118 default:
6119 ceph_abort();
6120 }
6121 DECODE_FINISH(bl);
6122}
6123
6124void object_manifest_t::dump(Formatter *f) const
6125{
6126 f->dump_unsigned("type", type);
11fdf7f2
TL
6127 if (type == TYPE_REDIRECT) {
6128 f->open_object_section("redirect_target");
6129 redirect_target.dump(f);
6130 f->close_section();
6131 } else if (type == TYPE_CHUNKED) {
6132 f->open_array_section("chunk_map");
6133 for (auto& p : chunk_map) {
6134 f->open_object_section("chunk");
6135 f->dump_unsigned("offset", p.first);
6136 p.second.dump(f);
6137 f->close_section();
6138 }
6139 f->close_section();
6140 }
31f18b77
FG
6141}
6142
6143void object_manifest_t::generate_test_instances(list<object_manifest_t*>& o)
6144{
6145 o.push_back(new object_manifest_t());
6146 o.back()->type = TYPE_REDIRECT;
6147}
6148
6149ostream& operator<<(ostream& out, const object_manifest_t& om)
6150{
11fdf7f2
TL
6151 out << "manifest(" << om.get_type_name();
6152 if (om.is_redirect()) {
6153 out << " " << om.redirect_target;
6154 } else if (om.is_chunked()) {
6155 out << " " << om.chunk_map;
6156 }
6157 out << ")";
6158 return out;
31f18b77 6159}
7c673cae
FG
6160
6161// -- object_info_t --
6162
6163void object_info_t::copy_user_bits(const object_info_t& other)
6164{
6165 // these bits are copied from head->clone.
6166 size = other.size;
6167 mtime = other.mtime;
6168 local_mtime = other.local_mtime;
6169 last_reqid = other.last_reqid;
6170 truncate_seq = other.truncate_seq;
6171 truncate_size = other.truncate_size;
6172 flags = other.flags;
6173 user_version = other.user_version;
6174 data_digest = other.data_digest;
6175 omap_digest = other.omap_digest;
6176}
6177
9f95a23c 6178void object_info_t::encode(ceph::buffer::list& bl, uint64_t features) const
7c673cae
FG
6179{
6180 object_locator_t myoloc(soid);
6181 map<entity_name_t, watch_info_t> old_watchers;
9f95a23c 6182 for (auto i = watchers.cbegin(); i != watchers.cend(); ++i) {
7c673cae
FG
6183 old_watchers.insert(make_pair(i->first.second, i->second));
6184 }
31f18b77 6185 ENCODE_START(17, 8, bl);
11fdf7f2
TL
6186 encode(soid, bl);
6187 encode(myoloc, bl); //Retained for compatibility
6188 encode((__u32)0, bl); // was category, no longer used
6189 encode(version, bl);
6190 encode(prior_version, bl);
6191 encode(last_reqid, bl);
6192 encode(size, bl);
6193 encode(mtime, bl);
7c673cae 6194 if (soid.snap == CEPH_NOSNAP)
11fdf7f2 6195 encode(osd_reqid_t(), bl); // used to be wrlock_by
7c673cae 6196 else
11fdf7f2
TL
6197 encode((uint32_t)0, bl); // was legacy_snaps
6198 encode(truncate_seq, bl);
6199 encode(truncate_size, bl);
6200 encode(is_lost(), bl);
6201 encode(old_watchers, bl, features);
7c673cae
FG
6202 /* shenanigans to avoid breaking backwards compatibility in the disk format.
6203 * When we can, switch this out for simply putting the version_t on disk. */
6204 eversion_t user_eversion(0, user_version);
11fdf7f2
TL
6205 encode(user_eversion, bl);
6206 encode(test_flag(FLAG_USES_TMAP), bl);
6207 encode(watchers, bl, features);
7c673cae 6208 __u32 _flags = flags;
11fdf7f2
TL
6209 encode(_flags, bl);
6210 encode(local_mtime, bl);
6211 encode(data_digest, bl);
6212 encode(omap_digest, bl);
6213 encode(expected_object_size, bl);
6214 encode(expected_write_size, bl);
6215 encode(alloc_hint_flags, bl);
31f18b77 6216 if (has_manifest()) {
11fdf7f2 6217 encode(manifest, bl);
31f18b77 6218 }
7c673cae
FG
6219 ENCODE_FINISH(bl);
6220}
6221
9f95a23c 6222void object_info_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
6223{
6224 object_locator_t myoloc;
31f18b77 6225 DECODE_START_LEGACY_COMPAT_LEN(17, 8, 8, bl);
7c673cae 6226 map<entity_name_t, watch_info_t> old_watchers;
11fdf7f2
TL
6227 decode(soid, bl);
6228 decode(myoloc, bl);
7c673cae
FG
6229 {
6230 string category;
11fdf7f2 6231 decode(category, bl); // no longer used
7c673cae 6232 }
11fdf7f2
TL
6233 decode(version, bl);
6234 decode(prior_version, bl);
6235 decode(last_reqid, bl);
6236 decode(size, bl);
6237 decode(mtime, bl);
7c673cae
FG
6238 if (soid.snap == CEPH_NOSNAP) {
6239 osd_reqid_t wrlock_by;
11fdf7f2 6240 decode(wrlock_by, bl);
7c673cae 6241 } else {
11fdf7f2
TL
6242 vector<snapid_t> legacy_snaps;
6243 decode(legacy_snaps, bl);
7c673cae 6244 }
11fdf7f2
TL
6245 decode(truncate_seq, bl);
6246 decode(truncate_size, bl);
7c673cae
FG
6247
6248 // if this is struct_v >= 13, we will overwrite this
6249 // below since this field is just here for backwards
6250 // compatibility
6251 __u8 lo;
11fdf7f2 6252 decode(lo, bl);
7c673cae
FG
6253 flags = (flag_t)lo;
6254
11fdf7f2 6255 decode(old_watchers, bl);
7c673cae 6256 eversion_t user_eversion;
11fdf7f2 6257 decode(user_eversion, bl);
7c673cae
FG
6258 user_version = user_eversion.version;
6259
6260 if (struct_v >= 9) {
6261 bool uses_tmap = false;
11fdf7f2 6262 decode(uses_tmap, bl);
7c673cae
FG
6263 if (uses_tmap)
6264 set_flag(FLAG_USES_TMAP);
6265 } else {
6266 set_flag(FLAG_USES_TMAP);
6267 }
6268 if (struct_v < 10)
6269 soid.pool = myoloc.pool;
6270 if (struct_v >= 11) {
11fdf7f2 6271 decode(watchers, bl);
7c673cae 6272 } else {
9f95a23c 6273 for (auto i = old_watchers.begin(); i != old_watchers.end(); ++i) {
7c673cae
FG
6274 watchers.insert(
6275 make_pair(
6276 make_pair(i->second.cookie, i->first), i->second));
6277 }
6278 }
6279 if (struct_v >= 13) {
6280 __u32 _flags;
11fdf7f2 6281 decode(_flags, bl);
7c673cae
FG
6282 flags = (flag_t)_flags;
6283 }
6284 if (struct_v >= 14) {
11fdf7f2 6285 decode(local_mtime, bl);
7c673cae
FG
6286 } else {
6287 local_mtime = utime_t();
6288 }
6289 if (struct_v >= 15) {
11fdf7f2
TL
6290 decode(data_digest, bl);
6291 decode(omap_digest, bl);
7c673cae
FG
6292 } else {
6293 data_digest = omap_digest = -1;
6294 clear_flag(FLAG_DATA_DIGEST);
6295 clear_flag(FLAG_OMAP_DIGEST);
6296 }
6297 if (struct_v >= 16) {
11fdf7f2
TL
6298 decode(expected_object_size, bl);
6299 decode(expected_write_size, bl);
6300 decode(alloc_hint_flags, bl);
7c673cae
FG
6301 } else {
6302 expected_object_size = 0;
6303 expected_write_size = 0;
6304 alloc_hint_flags = 0;
6305 }
31f18b77
FG
6306 if (struct_v >= 17) {
6307 if (has_manifest()) {
11fdf7f2 6308 decode(manifest, bl);
31f18b77
FG
6309 }
6310 }
7c673cae
FG
6311 DECODE_FINISH(bl);
6312}
6313
6314void object_info_t::dump(Formatter *f) const
6315{
6316 f->open_object_section("oid");
6317 soid.dump(f);
6318 f->close_section();
6319 f->dump_stream("version") << version;
6320 f->dump_stream("prior_version") << prior_version;
6321 f->dump_stream("last_reqid") << last_reqid;
6322 f->dump_unsigned("user_version", user_version);
6323 f->dump_unsigned("size", size);
6324 f->dump_stream("mtime") << mtime;
6325 f->dump_stream("local_mtime") << local_mtime;
6326 f->dump_unsigned("lost", (int)is_lost());
94b18763
FG
6327 vector<string> sv = get_flag_vector(flags);
6328 f->open_array_section("flags");
6329 for (auto str: sv)
6330 f->dump_string("flags", str);
6331 f->close_section();
7c673cae
FG
6332 f->dump_unsigned("truncate_seq", truncate_seq);
6333 f->dump_unsigned("truncate_size", truncate_size);
94b18763
FG
6334 f->dump_format("data_digest", "0x%08x", data_digest);
6335 f->dump_format("omap_digest", "0x%08x", omap_digest);
7c673cae
FG
6336 f->dump_unsigned("expected_object_size", expected_object_size);
6337 f->dump_unsigned("expected_write_size", expected_write_size);
6338 f->dump_unsigned("alloc_hint_flags", alloc_hint_flags);
31f18b77 6339 f->dump_object("manifest", manifest);
7c673cae 6340 f->open_object_section("watchers");
9f95a23c 6341 for (auto p = watchers.cbegin(); p != watchers.cend(); ++p) {
f67539c2
TL
6342 CachedStackStringStream css;
6343 *css << p->first.second;
6344 f->open_object_section(css->strv());
7c673cae
FG
6345 p->second.dump(f);
6346 f->close_section();
6347 }
6348 f->close_section();
6349}
6350
6351void object_info_t::generate_test_instances(list<object_info_t*>& o)
6352{
6353 o.push_back(new object_info_t());
6354
6355 // fixme
6356}
6357
6358
6359ostream& operator<<(ostream& out, const object_info_t& oi)
6360{
6361 out << oi.soid << "(" << oi.version
6362 << " " << oi.last_reqid;
7c673cae
FG
6363 if (oi.flags)
6364 out << " " << oi.get_flag_string();
6365 out << " s " << oi.size;
6366 out << " uv " << oi.user_version;
6367 if (oi.is_data_digest())
6368 out << " dd " << std::hex << oi.data_digest << std::dec;
6369 if (oi.is_omap_digest())
6370 out << " od " << std::hex << oi.omap_digest << std::dec;
6371 out << " alloc_hint [" << oi.expected_object_size
6372 << " " << oi.expected_write_size
6373 << " " << oi.alloc_hint_flags << "]";
31f18b77
FG
6374 if (oi.has_manifest())
6375 out << " " << oi.manifest;
7c673cae
FG
6376 out << ")";
6377 return out;
6378}
6379
6380// -- ObjectRecovery --
9f95a23c 6381void ObjectRecoveryProgress::encode(ceph::buffer::list &bl) const
7c673cae
FG
6382{
6383 ENCODE_START(1, 1, bl);
11fdf7f2
TL
6384 encode(first, bl);
6385 encode(data_complete, bl);
6386 encode(data_recovered_to, bl);
6387 encode(omap_recovered_to, bl);
6388 encode(omap_complete, bl);
7c673cae
FG
6389 ENCODE_FINISH(bl);
6390}
6391
9f95a23c 6392void ObjectRecoveryProgress::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
6393{
6394 DECODE_START(1, bl);
11fdf7f2
TL
6395 decode(first, bl);
6396 decode(data_complete, bl);
6397 decode(data_recovered_to, bl);
6398 decode(omap_recovered_to, bl);
6399 decode(omap_complete, bl);
7c673cae
FG
6400 DECODE_FINISH(bl);
6401}
6402
6403ostream &operator<<(ostream &out, const ObjectRecoveryProgress &prog)
6404{
6405 return prog.print(out);
6406}
6407
6408void ObjectRecoveryProgress::generate_test_instances(
6409 list<ObjectRecoveryProgress*>& o)
6410{
6411 o.push_back(new ObjectRecoveryProgress);
6412 o.back()->first = false;
6413 o.back()->data_complete = true;
6414 o.back()->omap_complete = true;
6415 o.back()->data_recovered_to = 100;
6416
6417 o.push_back(new ObjectRecoveryProgress);
6418 o.back()->first = true;
6419 o.back()->data_complete = false;
6420 o.back()->omap_complete = false;
6421 o.back()->data_recovered_to = 0;
6422}
6423
6424ostream &ObjectRecoveryProgress::print(ostream &out) const
6425{
6426 return out << "ObjectRecoveryProgress("
6427 << ( first ? "" : "!" ) << "first, "
6428 << "data_recovered_to:" << data_recovered_to
6429 << ", data_complete:" << ( data_complete ? "true" : "false" )
6430 << ", omap_recovered_to:" << omap_recovered_to
6431 << ", omap_complete:" << ( omap_complete ? "true" : "false" )
224ce89b 6432 << ", error:" << ( error ? "true" : "false" )
7c673cae
FG
6433 << ")";
6434}
6435
6436void ObjectRecoveryProgress::dump(Formatter *f) const
6437{
6438 f->dump_int("first?", first);
6439 f->dump_int("data_complete?", data_complete);
6440 f->dump_unsigned("data_recovered_to", data_recovered_to);
6441 f->dump_int("omap_complete?", omap_complete);
6442 f->dump_string("omap_recovered_to", omap_recovered_to);
6443}
6444
9f95a23c 6445void ObjectRecoveryInfo::encode(ceph::buffer::list &bl, uint64_t features) const
7c673cae 6446{
9f95a23c 6447 ENCODE_START(3, 1, bl);
11fdf7f2
TL
6448 encode(soid, bl);
6449 encode(version, bl);
6450 encode(size, bl);
6451 encode(oi, bl, features);
6452 encode(ss, bl);
6453 encode(copy_subset, bl);
6454 encode(clone_subset, bl);
9f95a23c 6455 encode(object_exist, bl);
7c673cae
FG
6456 ENCODE_FINISH(bl);
6457}
6458
9f95a23c 6459void ObjectRecoveryInfo::decode(ceph::buffer::list::const_iterator &bl,
7c673cae
FG
6460 int64_t pool)
6461{
9f95a23c 6462 DECODE_START(3, bl);
11fdf7f2
TL
6463 decode(soid, bl);
6464 decode(version, bl);
6465 decode(size, bl);
6466 decode(oi, bl);
6467 decode(ss, bl);
6468 decode(copy_subset, bl);
6469 decode(clone_subset, bl);
9f95a23c
TL
6470 if (struct_v > 2)
6471 decode(object_exist, bl);
6472 else
6473 object_exist = false;
7c673cae 6474 DECODE_FINISH(bl);
7c673cae
FG
6475 if (struct_v < 2) {
6476 if (!soid.is_max() && soid.pool == -1)
6477 soid.pool = pool;
6478 map<hobject_t, interval_set<uint64_t>> tmp;
6479 tmp.swap(clone_subset);
9f95a23c 6480 for (auto i = tmp.begin(); i != tmp.end(); ++i) {
7c673cae
FG
6481 hobject_t first(i->first);
6482 if (!first.is_max() && first.pool == -1)
6483 first.pool = pool;
6484 clone_subset[first].swap(i->second);
6485 }
6486 }
6487}
6488
6489void ObjectRecoveryInfo::generate_test_instances(
6490 list<ObjectRecoveryInfo*>& o)
6491{
6492 o.push_back(new ObjectRecoveryInfo);
6493 o.back()->soid = hobject_t(sobject_t("key", CEPH_NOSNAP));
6494 o.back()->version = eversion_t(0,0);
6495 o.back()->size = 100;
9f95a23c 6496 o.back()->object_exist = false;
7c673cae
FG
6497}
6498
6499
6500void ObjectRecoveryInfo::dump(Formatter *f) const
6501{
6502 f->dump_stream("object") << soid;
6503 f->dump_stream("at_version") << version;
6504 f->dump_stream("size") << size;
6505 {
6506 f->open_object_section("object_info");
6507 oi.dump(f);
6508 f->close_section();
6509 }
6510 {
6511 f->open_object_section("snapset");
6512 ss.dump(f);
6513 f->close_section();
6514 }
6515 f->dump_stream("copy_subset") << copy_subset;
6516 f->dump_stream("clone_subset") << clone_subset;
9f95a23c 6517 f->dump_stream("object_exist") << object_exist;
7c673cae
FG
6518}
6519
6520ostream& operator<<(ostream& out, const ObjectRecoveryInfo &inf)
6521{
6522 return inf.print(out);
6523}
6524
6525ostream &ObjectRecoveryInfo::print(ostream &out) const
6526{
6527 return out << "ObjectRecoveryInfo("
6528 << soid << "@" << version
6529 << ", size: " << size
6530 << ", copy_subset: " << copy_subset
6531 << ", clone_subset: " << clone_subset
6532 << ", snapset: " << ss
9f95a23c 6533 << ", object_exist: " << object_exist
7c673cae
FG
6534 << ")";
6535}
6536
6537// -- PushReplyOp --
6538void PushReplyOp::generate_test_instances(list<PushReplyOp*> &o)
6539{
6540 o.push_back(new PushReplyOp);
6541 o.push_back(new PushReplyOp);
6542 o.back()->soid = hobject_t(sobject_t("asdf", 2));
6543 o.push_back(new PushReplyOp);
6544 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
6545}
6546
9f95a23c 6547void PushReplyOp::encode(ceph::buffer::list &bl) const
7c673cae
FG
6548{
6549 ENCODE_START(1, 1, bl);
11fdf7f2 6550 encode(soid, bl);
7c673cae
FG
6551 ENCODE_FINISH(bl);
6552}
6553
9f95a23c 6554void PushReplyOp::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
6555{
6556 DECODE_START(1, bl);
11fdf7f2 6557 decode(soid, bl);
7c673cae
FG
6558 DECODE_FINISH(bl);
6559}
6560
6561void PushReplyOp::dump(Formatter *f) const
6562{
6563 f->dump_stream("soid") << soid;
6564}
6565
6566ostream &PushReplyOp::print(ostream &out) const
6567{
6568 return out
6569 << "PushReplyOp(" << soid
6570 << ")";
6571}
6572
6573ostream& operator<<(ostream& out, const PushReplyOp &op)
6574{
6575 return op.print(out);
6576}
6577
6578uint64_t PushReplyOp::cost(CephContext *cct) const
6579{
6580
6581 return cct->_conf->osd_push_per_object_cost +
6582 cct->_conf->osd_recovery_max_chunk;
6583}
6584
6585// -- PullOp --
6586void PullOp::generate_test_instances(list<PullOp*> &o)
6587{
6588 o.push_back(new PullOp);
6589 o.push_back(new PullOp);
6590 o.back()->soid = hobject_t(sobject_t("asdf", 2));
6591 o.back()->recovery_info.version = eversion_t(3, 10);
6592 o.push_back(new PullOp);
6593 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
6594 o.back()->recovery_info.version = eversion_t(0, 0);
6595}
6596
9f95a23c 6597void PullOp::encode(ceph::buffer::list &bl, uint64_t features) const
7c673cae
FG
6598{
6599 ENCODE_START(1, 1, bl);
11fdf7f2
TL
6600 encode(soid, bl);
6601 encode(recovery_info, bl, features);
6602 encode(recovery_progress, bl);
7c673cae
FG
6603 ENCODE_FINISH(bl);
6604}
6605
9f95a23c 6606void PullOp::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
6607{
6608 DECODE_START(1, bl);
11fdf7f2
TL
6609 decode(soid, bl);
6610 decode(recovery_info, bl);
6611 decode(recovery_progress, bl);
7c673cae
FG
6612 DECODE_FINISH(bl);
6613}
6614
6615void PullOp::dump(Formatter *f) const
6616{
6617 f->dump_stream("soid") << soid;
6618 {
6619 f->open_object_section("recovery_info");
6620 recovery_info.dump(f);
6621 f->close_section();
6622 }
6623 {
6624 f->open_object_section("recovery_progress");
6625 recovery_progress.dump(f);
6626 f->close_section();
6627 }
6628}
6629
6630ostream &PullOp::print(ostream &out) const
6631{
6632 return out
6633 << "PullOp(" << soid
6634 << ", recovery_info: " << recovery_info
6635 << ", recovery_progress: " << recovery_progress
6636 << ")";
6637}
6638
6639ostream& operator<<(ostream& out, const PullOp &op)
6640{
6641 return op.print(out);
6642}
6643
6644uint64_t PullOp::cost(CephContext *cct) const
6645{
6646 return cct->_conf->osd_push_per_object_cost +
6647 cct->_conf->osd_recovery_max_chunk;
6648}
6649
6650// -- PushOp --
6651void PushOp::generate_test_instances(list<PushOp*> &o)
6652{
6653 o.push_back(new PushOp);
6654 o.push_back(new PushOp);
6655 o.back()->soid = hobject_t(sobject_t("asdf", 2));
6656 o.back()->version = eversion_t(3, 10);
6657 o.push_back(new PushOp);
6658 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
6659 o.back()->version = eversion_t(0, 0);
6660}
6661
9f95a23c 6662void PushOp::encode(ceph::buffer::list &bl, uint64_t features) const
7c673cae
FG
6663{
6664 ENCODE_START(1, 1, bl);
11fdf7f2
TL
6665 encode(soid, bl);
6666 encode(version, bl);
6667 encode(data, bl);
6668 encode(data_included, bl);
6669 encode(omap_header, bl);
6670 encode(omap_entries, bl);
6671 encode(attrset, bl);
6672 encode(recovery_info, bl, features);
6673 encode(after_progress, bl);
6674 encode(before_progress, bl);
7c673cae
FG
6675 ENCODE_FINISH(bl);
6676}
6677
9f95a23c 6678void PushOp::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
6679{
6680 DECODE_START(1, bl);
11fdf7f2
TL
6681 decode(soid, bl);
6682 decode(version, bl);
6683 decode(data, bl);
6684 decode(data_included, bl);
6685 decode(omap_header, bl);
6686 decode(omap_entries, bl);
6687 decode(attrset, bl);
6688 decode(recovery_info, bl);
6689 decode(after_progress, bl);
6690 decode(before_progress, bl);
7c673cae
FG
6691 DECODE_FINISH(bl);
6692}
6693
6694void PushOp::dump(Formatter *f) const
6695{
6696 f->dump_stream("soid") << soid;
6697 f->dump_stream("version") << version;
6698 f->dump_int("data_len", data.length());
6699 f->dump_stream("data_included") << data_included;
6700 f->dump_int("omap_header_len", omap_header.length());
6701 f->dump_int("omap_entries_len", omap_entries.size());
6702 f->dump_int("attrset_len", attrset.size());
6703 {
6704 f->open_object_section("recovery_info");
6705 recovery_info.dump(f);
6706 f->close_section();
6707 }
6708 {
6709 f->open_object_section("after_progress");
6710 after_progress.dump(f);
6711 f->close_section();
6712 }
6713 {
6714 f->open_object_section("before_progress");
6715 before_progress.dump(f);
6716 f->close_section();
6717 }
6718}
6719
6720ostream &PushOp::print(ostream &out) const
6721{
6722 return out
6723 << "PushOp(" << soid
6724 << ", version: " << version
6725 << ", data_included: " << data_included
6726 << ", data_size: " << data.length()
6727 << ", omap_header_size: " << omap_header.length()
6728 << ", omap_entries_size: " << omap_entries.size()
6729 << ", attrset_size: " << attrset.size()
6730 << ", recovery_info: " << recovery_info
6731 << ", after_progress: " << after_progress
6732 << ", before_progress: " << before_progress
6733 << ")";
6734}
6735
6736ostream& operator<<(ostream& out, const PushOp &op)
6737{
6738 return op.print(out);
6739}
6740
6741uint64_t PushOp::cost(CephContext *cct) const
6742{
6743 uint64_t cost = data_included.size();
9f95a23c 6744 for (auto i = omap_entries.cbegin(); i != omap_entries.cend(); ++i) {
7c673cae
FG
6745 cost += i->second.length();
6746 }
6747 cost += cct->_conf->osd_push_per_object_cost;
6748 return cost;
6749}
6750
6751// -- ScrubMap --
6752
6753void ScrubMap::merge_incr(const ScrubMap &l)
6754{
11fdf7f2 6755 ceph_assert(valid_through == l.incr_since);
7c673cae
FG
6756 valid_through = l.valid_through;
6757
9f95a23c 6758 for (auto p = l.objects.cbegin(); p != l.objects.cend(); ++p){
7c673cae 6759 if (p->second.negative) {
9f95a23c 6760 auto q = objects.find(p->first);
7c673cae
FG
6761 if (q != objects.end()) {
6762 objects.erase(q);
6763 }
6764 } else {
6765 objects[p->first] = p->second;
6766 }
6767 }
6768}
6769
9f95a23c 6770void ScrubMap::encode(ceph::buffer::list& bl) const
7c673cae
FG
6771{
6772 ENCODE_START(3, 2, bl);
11fdf7f2
TL
6773 encode(objects, bl);
6774 encode((__u32)0, bl); // used to be attrs; now deprecated
9f95a23c 6775 ceph::buffer::list old_logbl; // not used
11fdf7f2
TL
6776 encode(old_logbl, bl);
6777 encode(valid_through, bl);
6778 encode(incr_since, bl);
7c673cae
FG
6779 ENCODE_FINISH(bl);
6780}
6781
9f95a23c 6782void ScrubMap::decode(ceph::buffer::list::const_iterator& bl, int64_t pool)
7c673cae
FG
6783{
6784 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
11fdf7f2 6785 decode(objects, bl);
7c673cae
FG
6786 {
6787 map<string,string> attrs; // deprecated
11fdf7f2 6788 decode(attrs, bl);
7c673cae 6789 }
9f95a23c 6790 ceph::buffer::list old_logbl; // not used
11fdf7f2
TL
6791 decode(old_logbl, bl);
6792 decode(valid_through, bl);
6793 decode(incr_since, bl);
7c673cae
FG
6794 DECODE_FINISH(bl);
6795
6796 // handle hobject_t upgrade
6797 if (struct_v < 3) {
6798 map<hobject_t, object> tmp;
6799 tmp.swap(objects);
9f95a23c 6800 for (auto i = tmp.begin(); i != tmp.end(); ++i) {
7c673cae
FG
6801 hobject_t first(i->first);
6802 if (!first.is_max() && first.pool == -1)
6803 first.pool = pool;
6804 objects[first] = i->second;
6805 }
6806 }
6807}
6808
6809void ScrubMap::dump(Formatter *f) const
6810{
6811 f->dump_stream("valid_through") << valid_through;
6812 f->dump_stream("incremental_since") << incr_since;
6813 f->open_array_section("objects");
9f95a23c 6814 for (auto p = objects.cbegin(); p != objects.cend(); ++p) {
7c673cae
FG
6815 f->open_object_section("object");
6816 f->dump_string("name", p->first.oid.name);
6817 f->dump_unsigned("hash", p->first.get_hash());
6818 f->dump_string("key", p->first.get_key());
6819 f->dump_int("snapid", p->first.snap);
6820 p->second.dump(f);
6821 f->close_section();
6822 }
6823 f->close_section();
6824}
6825
6826void ScrubMap::generate_test_instances(list<ScrubMap*>& o)
6827{
6828 o.push_back(new ScrubMap);
6829 o.push_back(new ScrubMap);
6830 o.back()->valid_through = eversion_t(1, 2);
6831 o.back()->incr_since = eversion_t(3, 4);
6832 list<object*> obj;
6833 object::generate_test_instances(obj);
6834 o.back()->objects[hobject_t(object_t("foo"), "fookey", 123, 456, 0, "")] = *obj.back();
6835 obj.pop_back();
6836 o.back()->objects[hobject_t(object_t("bar"), string(), 123, 456, 0, "")] = *obj.back();
6837}
6838
6839// -- ScrubMap::object --
6840
9f95a23c 6841void ScrubMap::object::encode(ceph::buffer::list& bl) const
7c673cae
FG
6842{
6843 bool compat_read_error = read_error || ec_hash_mismatch || ec_size_mismatch;
11fdf7f2
TL
6844 ENCODE_START(10, 7, bl);
6845 encode(size, bl);
6846 encode(negative, bl);
6847 encode(attrs, bl);
6848 encode(digest, bl);
6849 encode(digest_present, bl);
6850 encode((uint32_t)0, bl); // obsolete nlinks
6851 encode((uint32_t)0, bl); // snapcolls
6852 encode(omap_digest, bl);
6853 encode(omap_digest_present, bl);
6854 encode(compat_read_error, bl);
6855 encode(stat_error, bl);
6856 encode(read_error, bl);
6857 encode(ec_hash_mismatch, bl);
6858 encode(ec_size_mismatch, bl);
6859 encode(large_omap_object_found, bl);
6860 encode(large_omap_object_key_count, bl);
6861 encode(large_omap_object_value_size, bl);
6862 encode(object_omap_bytes, bl);
6863 encode(object_omap_keys, bl);
7c673cae
FG
6864 ENCODE_FINISH(bl);
6865}
6866
9f95a23c 6867void ScrubMap::object::decode(ceph::buffer::list::const_iterator& bl)
7c673cae 6868{
11fdf7f2
TL
6869 DECODE_START(10, bl);
6870 decode(size, bl);
7c673cae 6871 bool tmp, compat_read_error = false;
11fdf7f2 6872 decode(tmp, bl);
7c673cae 6873 negative = tmp;
11fdf7f2
TL
6874 decode(attrs, bl);
6875 decode(digest, bl);
6876 decode(tmp, bl);
7c673cae
FG
6877 digest_present = tmp;
6878 {
6879 uint32_t nlinks;
11fdf7f2 6880 decode(nlinks, bl);
7c673cae 6881 set<snapid_t> snapcolls;
11fdf7f2 6882 decode(snapcolls, bl);
7c673cae 6883 }
11fdf7f2
TL
6884 decode(omap_digest, bl);
6885 decode(tmp, bl);
7c673cae 6886 omap_digest_present = tmp;
11fdf7f2
TL
6887 decode(compat_read_error, bl);
6888 decode(tmp, bl);
7c673cae
FG
6889 stat_error = tmp;
6890 if (struct_v >= 8) {
11fdf7f2 6891 decode(tmp, bl);
7c673cae 6892 read_error = tmp;
11fdf7f2 6893 decode(tmp, bl);
7c673cae 6894 ec_hash_mismatch = tmp;
11fdf7f2 6895 decode(tmp, bl);
7c673cae
FG
6896 ec_size_mismatch = tmp;
6897 }
6898 // If older encoder found a read_error, set read_error
6899 if (compat_read_error && !read_error && !ec_hash_mismatch && !ec_size_mismatch)
6900 read_error = true;
28e407b8 6901 if (struct_v >= 9) {
11fdf7f2 6902 decode(tmp, bl);
28e407b8 6903 large_omap_object_found = tmp;
11fdf7f2
TL
6904 decode(large_omap_object_key_count, bl);
6905 decode(large_omap_object_value_size, bl);
6906 }
6907 if (struct_v >= 10) {
6908 decode(object_omap_bytes, bl);
6909 decode(object_omap_keys, bl);
28e407b8 6910 }
7c673cae
FG
6911 DECODE_FINISH(bl);
6912}
6913
6914void ScrubMap::object::dump(Formatter *f) const
6915{
6916 f->dump_int("size", size);
6917 f->dump_int("negative", negative);
6918 f->open_array_section("attrs");
9f95a23c 6919 for (auto p = attrs.cbegin(); p != attrs.cend(); ++p) {
7c673cae
FG
6920 f->open_object_section("attr");
6921 f->dump_string("name", p->first);
6922 f->dump_int("length", p->second.length());
6923 f->close_section();
6924 }
6925 f->close_section();
6926}
6927
6928void ScrubMap::object::generate_test_instances(list<object*>& o)
6929{
6930 o.push_back(new object);
6931 o.push_back(new object);
6932 o.back()->negative = true;
6933 o.push_back(new object);
6934 o.back()->size = 123;
9f95a23c
TL
6935 o.back()->attrs["foo"] = ceph::buffer::copy("foo", 3);
6936 o.back()->attrs["bar"] = ceph::buffer::copy("barval", 6);
7c673cae
FG
6937}
6938
6939// -- OSDOp --
6940
6941ostream& operator<<(ostream& out, const OSDOp& op)
6942{
6943 out << ceph_osd_op_name(op.op.op);
6944 if (ceph_osd_op_type_data(op.op.op)) {
6945 // data extent
6946 switch (op.op.op) {
6947 case CEPH_OSD_OP_ASSERT_VER:
6948 out << " v" << op.op.assert_ver.ver;
6949 break;
6950 case CEPH_OSD_OP_TRUNCATE:
6951 out << " " << op.op.extent.offset;
6952 break;
6953 case CEPH_OSD_OP_MASKTRUNC:
6954 case CEPH_OSD_OP_TRIMTRUNC:
6955 out << " " << op.op.extent.truncate_seq << "@"
6956 << (int64_t)op.op.extent.truncate_size;
6957 break;
6958 case CEPH_OSD_OP_ROLLBACK:
6959 out << " " << snapid_t(op.op.snap.snapid);
6960 break;
6961 case CEPH_OSD_OP_WATCH:
6962 out << " " << ceph_osd_watch_op_name(op.op.watch.op)
6963 << " cookie " << op.op.watch.cookie;
6964 if (op.op.watch.gen)
6965 out << " gen " << op.op.watch.gen;
6966 break;
6967 case CEPH_OSD_OP_NOTIFY:
7c673cae
FG
6968 out << " cookie " << op.op.notify.cookie;
6969 break;
6970 case CEPH_OSD_OP_COPY_GET:
6971 out << " max " << op.op.copy_get.max;
6972 break;
6973 case CEPH_OSD_OP_COPY_FROM:
6974 out << " ver " << op.op.copy_from.src_version;
6975 break;
6976 case CEPH_OSD_OP_SETALLOCHINT:
6977 out << " object_size " << op.op.alloc_hint.expected_object_size
6978 << " write_size " << op.op.alloc_hint.expected_write_size;
6979 break;
6980 case CEPH_OSD_OP_READ:
6981 case CEPH_OSD_OP_SPARSE_READ:
6982 case CEPH_OSD_OP_SYNC_READ:
6983 case CEPH_OSD_OP_WRITE:
6984 case CEPH_OSD_OP_WRITEFULL:
6985 case CEPH_OSD_OP_ZERO:
6986 case CEPH_OSD_OP_APPEND:
6987 case CEPH_OSD_OP_MAPEXT:
11fdf7f2 6988 case CEPH_OSD_OP_CMPEXT:
7c673cae
FG
6989 out << " " << op.op.extent.offset << "~" << op.op.extent.length;
6990 if (op.op.extent.truncate_seq)
6991 out << " [" << op.op.extent.truncate_seq << "@"
6992 << (int64_t)op.op.extent.truncate_size << "]";
6993 if (op.op.flags)
6994 out << " [" << ceph_osd_op_flag_string(op.op.flags) << "]";
6995 default:
6996 // don't show any arg info
6997 break;
6998 }
6999 } else if (ceph_osd_op_type_attr(op.op.op)) {
7000 // xattr name
7001 if (op.op.xattr.name_len && op.indata.length()) {
7002 out << " ";
7003 op.indata.write(0, op.op.xattr.name_len, out);
7004 }
7005 if (op.op.xattr.value_len)
7006 out << " (" << op.op.xattr.value_len << ")";
7007 if (op.op.op == CEPH_OSD_OP_CMPXATTR)
7008 out << " op " << (int)op.op.xattr.cmp_op
7009 << " mode " << (int)op.op.xattr.cmp_mode;
7010 } else if (ceph_osd_op_type_exec(op.op.op)) {
7011 // class.method
7012 if (op.op.cls.class_len && op.indata.length()) {
7013 out << " ";
7014 op.indata.write(0, op.op.cls.class_len, out);
7015 out << ".";
7016 op.indata.write(op.op.cls.class_len, op.op.cls.method_len, out);
7017 }
7018 } else if (ceph_osd_op_type_pg(op.op.op)) {
7019 switch (op.op.op) {
7020 case CEPH_OSD_OP_PGLS:
7021 case CEPH_OSD_OP_PGLS_FILTER:
7022 case CEPH_OSD_OP_PGNLS:
7023 case CEPH_OSD_OP_PGNLS_FILTER:
7024 out << " start_epoch " << op.op.pgls.start_epoch;
7025 break;
7026 case CEPH_OSD_OP_PG_HITSET_LS:
7027 break;
7028 case CEPH_OSD_OP_PG_HITSET_GET:
7029 out << " " << utime_t(op.op.hit_set_get.stamp);
7030 break;
7031 case CEPH_OSD_OP_SCRUBLS:
7032 break;
7033 }
7034 }
9f95a23c
TL
7035 if (op.indata.length()) {
7036 out << " in=" << op.indata.length() << "b";
7037 }
7038 if (op.outdata.length()) {
7039 out << " out=" << op.outdata.length() << "b";
7040 }
7c673cae
FG
7041 return out;
7042}
7043
7044
9f95a23c 7045void OSDOp::split_osd_op_vector_out_data(vector<OSDOp>& ops, ceph::buffer::list& in)
7c673cae 7046{
9f95a23c 7047 auto datap = in.begin();
7c673cae
FG
7048 for (unsigned i = 0; i < ops.size(); i++) {
7049 if (ops[i].op.payload_len) {
7050 datap.copy(ops[i].op.payload_len, ops[i].outdata);
7051 }
7052 }
7053}
7054
9f95a23c 7055void OSDOp::merge_osd_op_vector_out_data(vector<OSDOp>& ops, ceph::buffer::list& out)
7c673cae
FG
7056{
7057 for (unsigned i = 0; i < ops.size(); i++) {
9f95a23c 7058 ops[i].op.payload_len = ops[i].outdata.length();
7c673cae 7059 if (ops[i].outdata.length()) {
7c673cae
FG
7060 out.append(ops[i].outdata);
7061 }
7062 }
7063}
7064
9f95a23c
TL
7065int prepare_info_keymap(
7066 CephContext* cct,
7067 map<string,bufferlist> *km,
7068 string *key_to_remove,
7069 epoch_t epoch,
7070 pg_info_t &info,
7071 pg_info_t &last_written_info,
7072 PastIntervals &past_intervals,
7073 bool dirty_big_info,
7074 bool dirty_epoch,
7075 bool try_fast_info,
7076 PerfCounters *logger,
7077 DoutPrefixProvider *dpp)
7078{
7079 if (dirty_epoch) {
7080 encode(epoch, (*km)[string(epoch_key)]);
7081 }
7082
7083 if (logger)
7084 logger->inc(l_osd_pg_info);
7085
7086 // try to do info efficiently?
7087 if (!dirty_big_info && try_fast_info &&
7088 info.last_update > last_written_info.last_update) {
7089 pg_fast_info_t fast;
7090 fast.populate_from(info);
7091 bool did = fast.try_apply_to(&last_written_info);
7092 ceph_assert(did); // we verified last_update increased above
7093 if (info == last_written_info) {
7094 encode(fast, (*km)[string(fastinfo_key)]);
7095 if (logger)
7096 logger->inc(l_osd_pg_fastinfo);
7097 return 0;
7098 }
7099 if (dpp) {
7100 ldpp_dout(dpp, 30) << __func__ << " fastinfo failed, info:\n";
7101 {
7102 JSONFormatter jf(true);
7103 jf.dump_object("info", info);
7104 jf.flush(*_dout);
7105 }
7106 {
7107 *_dout << "\nlast_written_info:\n";
7108 JSONFormatter jf(true);
7109 jf.dump_object("last_written_info", last_written_info);
7110 jf.flush(*_dout);
7111 }
7112 *_dout << dendl;
7113 }
7114 } else if (info.last_update <= last_written_info.last_update) {
7115 // clean up any potentially stale fastinfo key resulting from last_update
7116 // not moving forwards (e.g., a backwards jump during peering)
7117 *key_to_remove = fastinfo_key;
7118 }
7119
7120 last_written_info = info;
7121
7122 // info. store purged_snaps separately.
7123 interval_set<snapid_t> purged_snaps;
7124 purged_snaps.swap(info.purged_snaps);
7125 encode(info, (*km)[string(info_key)]);
7126 purged_snaps.swap(info.purged_snaps);
7127
7128 if (dirty_big_info) {
7129 // potentially big stuff
7130 bufferlist& bigbl = (*km)[string(biginfo_key)];
7131 encode(past_intervals, bigbl);
7132 encode(info.purged_snaps, bigbl);
7133 //dout(20) << "write_info bigbl " << bigbl.length() << dendl;
7134 if (logger)
7135 logger->inc(l_osd_pg_biginfo);
7136 }
7137
7138 return 0;
7139}
7140
7141void create_pg_collection(
7142 ceph::os::Transaction& t, spg_t pgid, int bits)
7143{
7144 coll_t coll(pgid);
7145 t.create_collection(coll, bits);
7146}
7147
7148void init_pg_ondisk(
7149 ceph::os::Transaction& t,
7150 spg_t pgid,
7151 const pg_pool_t *pool)
7152{
7153 coll_t coll(pgid);
7154 if (pool) {
7155 // Give a hint to the PG collection
7156 bufferlist hint;
7157 uint32_t pg_num = pool->get_pg_num();
7158 uint64_t expected_num_objects_pg = pool->expected_num_objects / pg_num;
7159 encode(pg_num, hint);
7160 encode(expected_num_objects_pg, hint);
7161 uint32_t hint_type = ceph::os::Transaction::COLL_HINT_EXPECTED_NUM_OBJECTS;
7162 t.collection_hint(coll, hint_type, hint);
7163 }
7164
7165 ghobject_t pgmeta_oid(pgid.make_pgmeta_oid());
7166 t.touch(coll, pgmeta_oid);
7167 map<string,bufferlist> values;
7168 __u8 struct_v = pg_latest_struct_v;
7169 encode(struct_v, values[string(infover_key)]);
7170 t.omap_setkeys(coll, pgmeta_oid, values);
7171}
7172
7173PGLSFilter::PGLSFilter() : cct(nullptr)
7174{
7175}
7176
7177PGLSFilter::~PGLSFilter()
7178{
7179}
7180
7181int PGLSPlainFilter::init(ceph::bufferlist::const_iterator &params)
7182{
7183 try {
7184 decode(xattr, params);
7185 decode(val, params);
f67539c2 7186 } catch (ceph::buffer::error &e) {
9f95a23c
TL
7187 return -EINVAL;
7188 }
7189 return 0;
7190}
7191
7192bool PGLSPlainFilter::filter(const hobject_t& obj,
7193 const ceph::bufferlist& xattr_data) const
7194{
7195 return xattr_data.contents_equal(val.c_str(), val.size());
7196}