]> git.proxmox.com Git - ceph.git/blame - ceph/src/osd/osd_types.cc
import ceph 15.2.14
[ceph.git] / ceph / src / osd / osd_types.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2011 New Dream Network
7 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License version 2.1, as published by the Free Software
14 * Foundation. See file COPYING.
15 *
16 */
17
9f95a23c
TL
18#include <list>
19#include <map>
20#include <ostream>
21#include <sstream>
22#include <set>
23#include <string>
24#include <utility>
25#include <vector>
26
27
7c673cae
FG
28#include <boost/assign/list_of.hpp>
29
7c673cae 30#include "include/ceph_features.h"
9f95a23c 31#include "include/encoding.h"
11fdf7f2 32#include "include/stringify.h"
7c673cae
FG
33extern "C" {
34#include "crush/hash.h"
35}
9f95a23c
TL
36
37#include "common/Formatter.h"
7c673cae 38#include "OSDMap.h"
9f95a23c
TL
39#include "osd_types.h"
40#include "os/Transaction.h"
41
42using std::list;
43using std::make_pair;
44using std::map;
45using std::ostream;
46using std::ostringstream;
47using std::pair;
48using std::set;
49using std::string;
50using std::stringstream;
51using std::unique_ptr;
52using std::vector;
53
54using ceph::decode;
55using ceph::decode_nohead;
56using ceph::encode;
57using ceph::encode_nohead;
58using ceph::Formatter;
59
60using namespace std::literals;
7c673cae
FG
61
62const char *ceph_osd_flag_name(unsigned flag)
63{
64 switch (flag) {
65 case CEPH_OSD_FLAG_ACK: return "ack";
66 case CEPH_OSD_FLAG_ONNVRAM: return "onnvram";
67 case CEPH_OSD_FLAG_ONDISK: return "ondisk";
68 case CEPH_OSD_FLAG_RETRY: return "retry";
69 case CEPH_OSD_FLAG_READ: return "read";
70 case CEPH_OSD_FLAG_WRITE: return "write";
71 case CEPH_OSD_FLAG_ORDERSNAP: return "ordersnap";
72 case CEPH_OSD_FLAG_PEERSTAT_OLD: return "peerstat_old";
73 case CEPH_OSD_FLAG_BALANCE_READS: return "balance_reads";
74 case CEPH_OSD_FLAG_PARALLELEXEC: return "parallelexec";
75 case CEPH_OSD_FLAG_PGOP: return "pgop";
76 case CEPH_OSD_FLAG_EXEC: return "exec";
77 case CEPH_OSD_FLAG_EXEC_PUBLIC: return "exec_public";
78 case CEPH_OSD_FLAG_LOCALIZE_READS: return "localize_reads";
79 case CEPH_OSD_FLAG_RWORDERED: return "rwordered";
80 case CEPH_OSD_FLAG_IGNORE_CACHE: return "ignore_cache";
81 case CEPH_OSD_FLAG_SKIPRWLOCKS: return "skiprwlocks";
82 case CEPH_OSD_FLAG_IGNORE_OVERLAY: return "ignore_overlay";
83 case CEPH_OSD_FLAG_FLUSH: return "flush";
84 case CEPH_OSD_FLAG_MAP_SNAP_CLONE: return "map_snap_clone";
85 case CEPH_OSD_FLAG_ENFORCE_SNAPC: return "enforce_snapc";
86 case CEPH_OSD_FLAG_REDIRECTED: return "redirected";
87 case CEPH_OSD_FLAG_KNOWN_REDIR: return "known_if_redirected";
88 case CEPH_OSD_FLAG_FULL_TRY: return "full_try";
89 case CEPH_OSD_FLAG_FULL_FORCE: return "full_force";
224ce89b 90 case CEPH_OSD_FLAG_IGNORE_REDIRECT: return "ignore_redirect";
9f95a23c 91 case CEPH_OSD_FLAG_RETURNVEC: return "returnvec";
7c673cae
FG
92 default: return "???";
93 }
94}
95
96string ceph_osd_flag_string(unsigned flags)
97{
98 string s;
99 for (unsigned i=0; i<32; ++i) {
100 if (flags & (1u<<i)) {
101 if (s.length())
102 s += "+";
103 s += ceph_osd_flag_name(1u << i);
104 }
105 }
106 if (s.length())
107 return s;
108 return string("-");
109}
110
111const char * ceph_osd_op_flag_name(unsigned flag)
112{
113 const char *name;
114
115 switch(flag) {
116 case CEPH_OSD_OP_FLAG_EXCL:
117 name = "excl";
118 break;
119 case CEPH_OSD_OP_FLAG_FAILOK:
120 name = "failok";
121 break;
122 case CEPH_OSD_OP_FLAG_FADVISE_RANDOM:
123 name = "fadvise_random";
124 break;
125 case CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL:
126 name = "fadvise_sequential";
127 break;
128 case CEPH_OSD_OP_FLAG_FADVISE_WILLNEED:
129 name = "favise_willneed";
130 break;
131 case CEPH_OSD_OP_FLAG_FADVISE_DONTNEED:
132 name = "fadvise_dontneed";
133 break;
134 case CEPH_OSD_OP_FLAG_FADVISE_NOCACHE:
135 name = "fadvise_nocache";
136 break;
11fdf7f2
TL
137 case CEPH_OSD_OP_FLAG_WITH_REFERENCE:
138 name = "with_reference";
139 break;
91327a77
AA
140 case CEPH_OSD_OP_FLAG_BYPASS_CLEAN_CACHE:
141 name = "bypass_clean_cache";
142 break;
7c673cae
FG
143 default:
144 name = "???";
145 };
146
147 return name;
148}
149
150string ceph_osd_op_flag_string(unsigned flags)
151{
152 string s;
153 for (unsigned i=0; i<32; ++i) {
154 if (flags & (1u<<i)) {
155 if (s.length())
156 s += "+";
157 s += ceph_osd_op_flag_name(1u << i);
158 }
159 }
160 if (s.length())
161 return s;
162 return string("-");
163}
164
165string ceph_osd_alloc_hint_flag_string(unsigned flags)
166{
167 string s;
168 for (unsigned i=0; i<32; ++i) {
169 if (flags & (1u<<i)) {
170 if (s.length())
171 s += "+";
172 s += ceph_osd_alloc_hint_flag_name(1u << i);
173 }
174 }
175 if (s.length())
176 return s;
177 return string("-");
178}
179
9f95a23c 180void pg_shard_t::encode(ceph::buffer::list &bl) const
7c673cae
FG
181{
182 ENCODE_START(1, 1, bl);
11fdf7f2
TL
183 encode(osd, bl);
184 encode(shard, bl);
7c673cae
FG
185 ENCODE_FINISH(bl);
186}
9f95a23c 187void pg_shard_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
188{
189 DECODE_START(1, bl);
11fdf7f2
TL
190 decode(osd, bl);
191 decode(shard, bl);
7c673cae
FG
192 DECODE_FINISH(bl);
193}
194
195ostream &operator<<(ostream &lhs, const pg_shard_t &rhs)
196{
197 if (rhs.is_undefined())
198 return lhs << "?";
199 if (rhs.shard == shard_id_t::NO_SHARD)
b32b8144
FG
200 return lhs << rhs.get_osd();
201 return lhs << rhs.get_osd() << '(' << (unsigned)(rhs.shard) << ')';
7c673cae
FG
202}
203
11fdf7f2
TL
204void dump(Formatter* f, const osd_alerts_t& alerts)
205{
206 for (auto& a : alerts) {
207 string s0 = " osd: ";
208 s0 += stringify(a.first);
209 string s;
210 for (auto& aa : a.second) {
211 s = s0;
212 s += " ";
213 s += aa.first;
214 s += ":";
215 s += aa.second;
216 f->dump_string("alert", s);
217 }
218 }
219}
220
7c673cae
FG
221// -- osd_reqid_t --
222void osd_reqid_t::dump(Formatter *f) const
223{
224 f->dump_stream("name") << name;
225 f->dump_int("inc", inc);
226 f->dump_unsigned("tid", tid);
227}
228
229void osd_reqid_t::generate_test_instances(list<osd_reqid_t*>& o)
230{
231 o.push_back(new osd_reqid_t);
232 o.push_back(new osd_reqid_t(entity_name_t::CLIENT(123), 1, 45678));
233}
234
235// -- object_locator_t --
236
9f95a23c 237void object_locator_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
238{
239 // verify that nobody's corrupted the locator
11fdf7f2 240 ceph_assert(hash == -1 || key.empty());
7c673cae
FG
241 __u8 encode_compat = 3;
242 ENCODE_START(6, encode_compat, bl);
11fdf7f2 243 encode(pool, bl);
7c673cae 244 int32_t preferred = -1; // tell old code there is no preferred osd (-1).
11fdf7f2
TL
245 encode(preferred, bl);
246 encode(key, bl);
247 encode(nspace, bl);
248 encode(hash, bl);
7c673cae 249 if (hash != -1)
11fdf7f2 250 encode_compat = std::max<std::uint8_t>(encode_compat, 6); // need to interpret the hash
7c673cae
FG
251 ENCODE_FINISH_NEW_COMPAT(bl, encode_compat);
252}
253
9f95a23c 254void object_locator_t::decode(ceph::buffer::list::const_iterator& p)
7c673cae
FG
255{
256 DECODE_START_LEGACY_COMPAT_LEN(6, 3, 3, p);
257 if (struct_v < 2) {
258 int32_t op;
11fdf7f2 259 decode(op, p);
7c673cae
FG
260 pool = op;
261 int16_t pref;
11fdf7f2 262 decode(pref, p);
7c673cae 263 } else {
11fdf7f2 264 decode(pool, p);
7c673cae 265 int32_t preferred;
11fdf7f2 266 decode(preferred, p);
7c673cae 267 }
11fdf7f2 268 decode(key, p);
7c673cae 269 if (struct_v >= 5)
11fdf7f2 270 decode(nspace, p);
7c673cae 271 if (struct_v >= 6)
11fdf7f2 272 decode(hash, p);
7c673cae
FG
273 else
274 hash = -1;
275 DECODE_FINISH(p);
276 // verify that nobody's corrupted the locator
11fdf7f2 277 ceph_assert(hash == -1 || key.empty());
7c673cae
FG
278}
279
280void object_locator_t::dump(Formatter *f) const
281{
282 f->dump_int("pool", pool);
283 f->dump_string("key", key);
284 f->dump_string("namespace", nspace);
285 f->dump_int("hash", hash);
286}
287
288void object_locator_t::generate_test_instances(list<object_locator_t*>& o)
289{
290 o.push_back(new object_locator_t);
291 o.push_back(new object_locator_t(123));
292 o.push_back(new object_locator_t(123, 876));
293 o.push_back(new object_locator_t(1, "n2"));
294 o.push_back(new object_locator_t(1234, "", "key"));
295 o.push_back(new object_locator_t(12, "n1", "key2"));
296}
297
298// -- request_redirect_t --
9f95a23c 299void request_redirect_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
300{
301 ENCODE_START(1, 1, bl);
11fdf7f2
TL
302 encode(redirect_locator, bl);
303 encode(redirect_object, bl);
304 // legacy of the removed osd_instructions member
305 encode((uint32_t)0, bl);
7c673cae
FG
306 ENCODE_FINISH(bl);
307}
308
9f95a23c 309void request_redirect_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
310{
311 DECODE_START(1, bl);
11fdf7f2
TL
312 uint32_t legacy_osd_instructions_len;
313 decode(redirect_locator, bl);
314 decode(redirect_object, bl);
315 decode(legacy_osd_instructions_len, bl);
316 if (legacy_osd_instructions_len) {
9f95a23c 317 bl += legacy_osd_instructions_len;
11fdf7f2 318 }
7c673cae
FG
319 DECODE_FINISH(bl);
320}
321
322void request_redirect_t::dump(Formatter *f) const
323{
324 f->dump_string("object", redirect_object);
325 f->open_object_section("locator");
326 redirect_locator.dump(f);
327 f->close_section(); // locator
328}
329
330void request_redirect_t::generate_test_instances(list<request_redirect_t*>& o)
331{
332 object_locator_t loc(1, "redir_obj");
333 o.push_back(new request_redirect_t());
334 o.push_back(new request_redirect_t(loc, 0));
335 o.push_back(new request_redirect_t(loc, "redir_obj"));
336 o.push_back(new request_redirect_t(loc));
337}
338
339void objectstore_perf_stat_t::dump(Formatter *f) const
340{
11fdf7f2
TL
341 // *_ms values just for compatibility.
342 f->dump_float("commit_latency_ms", os_commit_latency_ns / 1000000.0);
343 f->dump_float("apply_latency_ms", os_apply_latency_ns / 1000000.0);
344 f->dump_unsigned("commit_latency_ns", os_commit_latency_ns);
345 f->dump_unsigned("apply_latency_ns", os_apply_latency_ns);
7c673cae
FG
346}
347
9f95a23c 348void objectstore_perf_stat_t::encode(ceph::buffer::list &bl, uint64_t features) const
7c673cae 349{
11fdf7f2
TL
350 uint8_t target_v = 2;
351 if (!HAVE_FEATURE(features, OS_PERF_STAT_NS)) {
352 target_v = 1;
353 }
354 ENCODE_START(target_v, target_v, bl);
355 if (target_v >= 2) {
356 encode(os_commit_latency_ns, bl);
357 encode(os_apply_latency_ns, bl);
358 } else {
359 constexpr auto NS_PER_MS = std::chrono::nanoseconds(1ms).count();
360 uint32_t commit_latency_ms = os_commit_latency_ns / NS_PER_MS;
361 uint32_t apply_latency_ms = os_apply_latency_ns / NS_PER_MS;
362 encode(commit_latency_ms, bl); // for compatibility with older monitor.
363 encode(apply_latency_ms, bl); // for compatibility with older monitor.
364 }
7c673cae
FG
365 ENCODE_FINISH(bl);
366}
367
9f95a23c 368void objectstore_perf_stat_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 369{
11fdf7f2
TL
370 DECODE_START(2, bl);
371 if (struct_v >= 2) {
372 decode(os_commit_latency_ns, bl);
373 decode(os_apply_latency_ns, bl);
374 } else {
375 uint32_t commit_latency_ms;
376 uint32_t apply_latency_ms;
377 decode(commit_latency_ms, bl);
378 decode(apply_latency_ms, bl);
379 constexpr auto NS_PER_MS = std::chrono::nanoseconds(1ms).count();
380 os_commit_latency_ns = commit_latency_ms * NS_PER_MS;
381 os_apply_latency_ns = apply_latency_ms * NS_PER_MS;
382 }
7c673cae
FG
383 DECODE_FINISH(bl);
384}
385
386void objectstore_perf_stat_t::generate_test_instances(std::list<objectstore_perf_stat_t*>& o)
387{
388 o.push_back(new objectstore_perf_stat_t());
389 o.push_back(new objectstore_perf_stat_t());
11fdf7f2
TL
390 o.back()->os_commit_latency_ns = 20000000;
391 o.back()->os_apply_latency_ns = 30000000;
7c673cae
FG
392}
393
394// -- osd_stat_t --
ded94939 395void osd_stat_t::dump(Formatter *f, bool with_net) const
7c673cae 396{
31f18b77
FG
397 f->dump_unsigned("up_from", up_from);
398 f->dump_unsigned("seq", seq);
35e4c445 399 f->dump_unsigned("num_pgs", num_pgs);
81eedcae
TL
400 f->dump_unsigned("num_osds", num_osds);
401 f->dump_unsigned("num_per_pool_osds", num_per_pool_osds);
9f95a23c 402 f->dump_unsigned("num_per_pool_omap_osds", num_per_pool_omap_osds);
11fdf7f2
TL
403
404 /// dump legacy stats fields to ensure backward compatibility.
405 f->dump_unsigned("kb", statfs.kb());
406 f->dump_unsigned("kb_used", statfs.kb_used_raw());
407 f->dump_unsigned("kb_used_data", statfs.kb_used_data());
408 f->dump_unsigned("kb_used_omap", statfs.kb_used_omap());
409 f->dump_unsigned("kb_used_meta", statfs.kb_used_internal_metadata());
410 f->dump_unsigned("kb_avail", statfs.kb_avail());
411 ////////////////////
412
413 f->open_object_section("statfs");
414 statfs.dump(f);
415 f->close_section();
7c673cae
FG
416 f->open_array_section("hb_peers");
417 for (auto p : hb_peers)
418 f->dump_int("osd", p);
419 f->close_section();
420 f->dump_int("snap_trim_queue_len", snap_trim_queue_len);
421 f->dump_int("num_snap_trimming", num_snap_trimming);
11fdf7f2 422 f->dump_int("num_shards_repaired", num_shards_repaired);
7c673cae
FG
423 f->open_object_section("op_queue_age_hist");
424 op_queue_age_hist.dump(f);
425 f->close_section();
426 f->open_object_section("perf_stat");
427 os_perf_stat.dump(f);
428 f->close_section();
11fdf7f2
TL
429 f->open_array_section("alerts");
430 ::dump(f, os_alerts);
431 f->close_section();
ded94939 432 if (with_net) {
9f95a23c
TL
433 dump_ping_time(f);
434 }
435}
436
437void osd_stat_t::dump_ping_time(Formatter *f) const
438{
eafe8130
TL
439 f->open_array_section("network_ping_times");
440 for (auto &i : hb_pingtime) {
441 f->open_object_section("entry");
442 f->dump_int("osd", i.first);
443 const time_t lu(i.second.last_update);
444 char buffer[26];
445 string lustr(ctime_r(&lu, buffer));
446 lustr.pop_back(); // Remove trailing \n
447 f->dump_string("last update", lustr);
448 f->open_array_section("interfaces");
449 f->open_object_section("interface");
450 f->dump_string("interface", "back");
451 f->open_object_section("average");
9f95a23c
TL
452 f->dump_float("1min", i.second.back_pingtime[0]/1000.0);
453 f->dump_float("5min", i.second.back_pingtime[1]/1000.0);
454 f->dump_float("15min", i.second.back_pingtime[2]/1000.0);
eafe8130
TL
455 f->close_section(); // average
456 f->open_object_section("min");
9f95a23c
TL
457 f->dump_float("1min", i.second.back_min[0]/1000.0);
458 f->dump_float("5min", i.second.back_min[1]/1000.0);
459 f->dump_float("15min", i.second.back_min[2]/1000.0);
eafe8130
TL
460 f->close_section(); // min
461 f->open_object_section("max");
9f95a23c
TL
462 f->dump_float("1min", i.second.back_max[0]/1000.0);
463 f->dump_float("5min", i.second.back_max[1]/1000.0);
464 f->dump_float("15min", i.second.back_max[2]/1000.0);
eafe8130 465 f->close_section(); // max
9f95a23c 466 f->dump_float("last", i.second.back_last/1000.0);
eafe8130
TL
467 f->close_section(); // interface
468
469 if (i.second.front_pingtime[0] != 0) {
470 f->open_object_section("interface");
471 f->dump_string("interface", "front");
472 f->open_object_section("average");
9f95a23c
TL
473 f->dump_float("1min", i.second.front_pingtime[0]/1000.0);
474 f->dump_float("5min", i.second.front_pingtime[1]/1000.0);
475 f->dump_float("15min", i.second.front_pingtime[2]/1000.0);
eafe8130
TL
476 f->close_section(); // average
477 f->open_object_section("min");
9f95a23c
TL
478 f->dump_float("1min", i.second.front_min[0]/1000.0);
479 f->dump_float("5min", i.second.front_min[1]/1000.0);
480 f->dump_float("15min", i.second.front_min[2]/1000.0);
eafe8130
TL
481 f->close_section(); // min
482 f->open_object_section("max");
9f95a23c
TL
483 f->dump_float("1min", i.second.front_max[0]/1000.0);
484 f->dump_float("5min", i.second.front_max[1]/1000.0);
485 f->dump_float("15min", i.second.front_max[2]/1000.0);
eafe8130 486 f->close_section(); // max
9f95a23c 487 f->dump_float("last", i.second.front_last/1000.0);
eafe8130
TL
488 f->close_section(); // interface
489 }
490 f->close_section(); // interfaces
491 f->close_section(); // entry
492 }
493 f->close_section(); // network_ping_time
7c673cae
FG
494}
495
9f95a23c 496void osd_stat_t::encode(ceph::buffer::list &bl, uint64_t features) const
11fdf7f2 497{
eafe8130 498 ENCODE_START(14, 2, bl);
11fdf7f2
TL
499
500 //////// for compatibility ////////
501 int64_t kb = statfs.kb();
502 int64_t kb_used = statfs.kb_used_raw();
503 int64_t kb_avail = statfs.kb_avail();
504 encode(kb, bl);
505 encode(kb_used, bl);
506 encode(kb_avail, bl);
507 ///////////////////////////////////
508
509 encode(snap_trim_queue_len, bl);
510 encode(num_snap_trimming, bl);
511 encode(hb_peers, bl);
512 encode((uint32_t)0, bl);
513 encode(op_queue_age_hist, bl);
514 encode(os_perf_stat, bl, features);
515 encode(up_from, bl);
516 encode(seq, bl);
517 encode(num_pgs, bl);
518
519 //////// for compatibility ////////
520 int64_t kb_used_data = statfs.kb_used_data();
521 int64_t kb_used_omap = statfs.kb_used_omap();
522 int64_t kb_used_meta = statfs.kb_used_internal_metadata();
523 encode(kb_used_data, bl);
524 encode(kb_used_omap, bl);
525 encode(kb_used_meta, bl);
526 encode(statfs, bl);
527 ///////////////////////////////////
528 encode(os_alerts, bl);
529 encode(num_shards_repaired, bl);
81eedcae
TL
530 encode(num_osds, bl);
531 encode(num_per_pool_osds, bl);
9f95a23c 532 encode(num_per_pool_omap_osds, bl);
eafe8130
TL
533
534 // hb_pingtime map
535 encode((int)hb_pingtime.size(), bl);
536 for (auto i : hb_pingtime) {
537 encode(i.first, bl); // osd
538 encode(i.second.last_update, bl);
539 encode(i.second.back_pingtime[0], bl);
540 encode(i.second.back_pingtime[1], bl);
541 encode(i.second.back_pingtime[2], bl);
542 encode(i.second.back_min[0], bl);
543 encode(i.second.back_min[1], bl);
544 encode(i.second.back_min[2], bl);
545 encode(i.second.back_max[0], bl);
546 encode(i.second.back_max[1], bl);
547 encode(i.second.back_max[2], bl);
548 encode(i.second.back_last, bl);
549 encode(i.second.front_pingtime[0], bl);
550 encode(i.second.front_pingtime[1], bl);
551 encode(i.second.front_pingtime[2], bl);
552 encode(i.second.front_min[0], bl);
553 encode(i.second.front_min[1], bl);
554 encode(i.second.front_min[2], bl);
555 encode(i.second.front_max[0], bl);
556 encode(i.second.front_max[1], bl);
557 encode(i.second.front_max[2], bl);
558 encode(i.second.front_last, bl);
559 }
7c673cae
FG
560 ENCODE_FINISH(bl);
561}
562
9f95a23c 563void osd_stat_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 564{
11fdf7f2
TL
565 int64_t kb, kb_used,kb_avail;
566 int64_t kb_used_data, kb_used_omap, kb_used_meta;
eafe8130 567 DECODE_START_LEGACY_COMPAT_LEN(14, 2, 2, bl);
11fdf7f2
TL
568 decode(kb, bl);
569 decode(kb_used, bl);
570 decode(kb_avail, bl);
571 decode(snap_trim_queue_len, bl);
572 decode(num_snap_trimming, bl);
573 decode(hb_peers, bl);
7c673cae 574 vector<int> num_hb_out;
11fdf7f2 575 decode(num_hb_out, bl);
7c673cae 576 if (struct_v >= 3)
11fdf7f2 577 decode(op_queue_age_hist, bl);
7c673cae 578 if (struct_v >= 4)
11fdf7f2 579 decode(os_perf_stat, bl);
31f18b77 580 if (struct_v >= 6) {
11fdf7f2
TL
581 decode(up_from, bl);
582 decode(seq, bl);
31f18b77 583 }
35e4c445 584 if (struct_v >= 7) {
11fdf7f2
TL
585 decode(num_pgs, bl);
586 }
587 if (struct_v >= 8) {
588 decode(kb_used_data, bl);
589 decode(kb_used_omap, bl);
590 decode(kb_used_meta, bl);
591 } else {
592 kb_used_data = kb_used;
593 kb_used_omap = 0;
594 kb_used_meta = 0;
595 }
596 if (struct_v >= 9) {
597 decode(statfs, bl);
598 } else {
599 statfs.reset();
600 statfs.total = kb << 10;
601 statfs.available = kb_avail << 10;
602 // actually it's totally unexpected to have ststfs.total < statfs.available
603 // here but unfortunately legacy generate_test_instances produced such a
604 // case hence inserting some handling rather than assert
605 statfs.internally_reserved =
606 statfs.total > statfs.available ? statfs.total - statfs.available : 0;
607 kb_used <<= 10;
608 if ((int64_t)statfs.internally_reserved > kb_used) {
609 statfs.internally_reserved -= kb_used;
610 } else {
611 statfs.internally_reserved = 0;
612 }
613 statfs.allocated = kb_used_data << 10;
614 statfs.omap_allocated = kb_used_omap << 10;
615 statfs.internal_metadata = kb_used_meta << 10;
616 }
617 if (struct_v >= 10) {
618 decode(os_alerts, bl);
619 } else {
620 os_alerts.clear();
621 }
622 if (struct_v >= 11) {
623 decode(num_shards_repaired, bl);
624 } else {
625 num_shards_repaired = 0;
35e4c445 626 }
81eedcae
TL
627 if (struct_v >= 12) {
628 decode(num_osds, bl);
629 decode(num_per_pool_osds, bl);
630 } else {
631 num_osds = 0;
632 num_per_pool_osds = 0;
633 }
eafe8130 634 if (struct_v >= 13) {
9f95a23c
TL
635 decode(num_per_pool_omap_osds, bl);
636 } else {
637 num_per_pool_omap_osds = 0;
eafe8130
TL
638 }
639 hb_pingtime.clear();
640 if (struct_v >= 14) {
641 int count;
642 decode(count, bl);
643 for (int i = 0 ; i < count ; i++) {
644 int osd;
645 decode(osd, bl);
646 struct Interfaces ifs;
647 decode(ifs.last_update, bl);
648 decode(ifs.back_pingtime[0],bl);
649 decode(ifs.back_pingtime[1], bl);
650 decode(ifs.back_pingtime[2], bl);
651 decode(ifs.back_min[0],bl);
652 decode(ifs.back_min[1], bl);
653 decode(ifs.back_min[2], bl);
654 decode(ifs.back_max[0],bl);
655 decode(ifs.back_max[1], bl);
656 decode(ifs.back_max[2], bl);
657 decode(ifs.back_last, bl);
658 decode(ifs.front_pingtime[0], bl);
659 decode(ifs.front_pingtime[1], bl);
660 decode(ifs.front_pingtime[2], bl);
661 decode(ifs.front_min[0], bl);
662 decode(ifs.front_min[1], bl);
663 decode(ifs.front_min[2], bl);
664 decode(ifs.front_max[0], bl);
665 decode(ifs.front_max[1], bl);
666 decode(ifs.front_max[2], bl);
667 decode(ifs.front_last, bl);
668 hb_pingtime[osd] = ifs;
669 }
670 }
7c673cae
FG
671 DECODE_FINISH(bl);
672}
673
674void osd_stat_t::generate_test_instances(std::list<osd_stat_t*>& o)
675{
676 o.push_back(new osd_stat_t);
677
678 o.push_back(new osd_stat_t);
11fdf7f2
TL
679 list<store_statfs_t*> ll;
680 store_statfs_t::generate_test_instances(ll);
681 o.back()->statfs = *ll.back();
7c673cae
FG
682 o.back()->hb_peers.push_back(7);
683 o.back()->snap_trim_queue_len = 8;
684 o.back()->num_snap_trimming = 99;
11fdf7f2
TL
685 o.back()->num_shards_repaired = 101;
686 o.back()->os_alerts[0].emplace(
687 "some alert", "some alert details");
688 o.back()->os_alerts[1].emplace(
689 "some alert2", "some alert2 details");
eafe8130
TL
690 struct Interfaces gen_interfaces = {
691 123456789, { 1000, 900, 800 }, { 990, 890, 790 }, { 1010, 910, 810 }, 1001,
692 { 1100, 1000, 900 }, { 1090, 990, 890 }, { 1110, 1010, 910 }, 1101 };
693 o.back()->hb_pingtime[20] = gen_interfaces;
694 gen_interfaces = {
695 987654321, { 100, 200, 300 }, { 90, 190, 290 }, { 110, 210, 310 }, 101 };
696 o.back()->hb_pingtime[30] = gen_interfaces;
7c673cae
FG
697}
698
699// -- pg_t --
700
701int pg_t::print(char *o, int maxlen) const
702{
11fdf7f2 703 return snprintf(o, maxlen, "%llu.%x", (unsigned long long)pool(), ps());
7c673cae
FG
704}
705
706bool pg_t::parse(const char *s)
707{
708 uint64_t ppool;
709 uint32_t pseed;
11fdf7f2 710 int r = sscanf(s, "%llu.%x", (long long unsigned *)&ppool, &pseed);
7c673cae
FG
711 if (r < 2)
712 return false;
713 m_pool = ppool;
714 m_seed = pseed;
7c673cae
FG
715 return true;
716}
717
718bool spg_t::parse(const char *s)
719{
7c673cae
FG
720 shard = shard_id_t::NO_SHARD;
721 uint64_t ppool;
722 uint32_t pseed;
7c673cae
FG
723 uint32_t pshard;
724 int r = sscanf(s, "%llu.%x", (long long unsigned *)&ppool, &pseed);
725 if (r < 2)
726 return false;
727 pgid.set_pool(ppool);
728 pgid.set_ps(pseed);
729
11fdf7f2 730 const char *p = strchr(s, 's');
7c673cae 731 if (p) {
11fdf7f2 732 r = sscanf(p, "s%u", &pshard);
7c673cae
FG
733 if (r == 1) {
734 shard = shard_id_t(pshard);
735 } else {
736 return false;
737 }
738 }
739 return true;
740}
741
742char *spg_t::calc_name(char *buf, const char *suffix_backwords) const
743{
744 while (*suffix_backwords)
745 *--buf = *suffix_backwords++;
746
747 if (!is_no_shard()) {
748 buf = ritoa<uint8_t, 10>((uint8_t)shard.id, buf);
749 *--buf = 's';
750 }
751
752 return pgid.calc_name(buf, "");
753}
754
755ostream& operator<<(ostream& out, const spg_t &pg)
756{
757 char buf[spg_t::calc_name_buf_size];
758 buf[spg_t::calc_name_buf_size - 1] = '\0';
759 out << pg.calc_name(buf + spg_t::calc_name_buf_size - 1, "");
760 return out;
761}
762
763pg_t pg_t::get_ancestor(unsigned old_pg_num) const
764{
765 int old_bits = cbits(old_pg_num);
766 int old_mask = (1 << old_bits) - 1;
767 pg_t ret = *this;
768 ret.m_seed = ceph_stable_mod(m_seed, old_pg_num, old_mask);
769 return ret;
770}
771
772bool pg_t::is_split(unsigned old_pg_num, unsigned new_pg_num, set<pg_t> *children) const
773{
11fdf7f2
TL
774 //ceph_assert(m_seed < old_pg_num);
775 if (m_seed >= old_pg_num) {
776 // degenerate case
777 return false;
778 }
7c673cae
FG
779 if (new_pg_num <= old_pg_num)
780 return false;
781
782 bool split = false;
783 if (true) {
784 unsigned old_bits = cbits(old_pg_num);
785 unsigned old_mask = (1 << old_bits) - 1;
786 for (unsigned n = 1; ; n++) {
787 unsigned next_bit = (n << (old_bits-1));
788 unsigned s = next_bit | m_seed;
789
790 if (s < old_pg_num || s == m_seed)
791 continue;
792 if (s >= new_pg_num)
793 break;
794 if ((unsigned)ceph_stable_mod(s, old_pg_num, old_mask) == m_seed) {
795 split = true;
796 if (children)
11fdf7f2 797 children->insert(pg_t(s, m_pool));
7c673cae
FG
798 }
799 }
800 }
801 if (false) {
802 // brute force
803 int old_bits = cbits(old_pg_num);
804 int old_mask = (1 << old_bits) - 1;
805 for (unsigned x = old_pg_num; x < new_pg_num; ++x) {
806 unsigned o = ceph_stable_mod(x, old_pg_num, old_mask);
807 if (o == m_seed) {
808 split = true;
11fdf7f2 809 children->insert(pg_t(x, m_pool));
7c673cae
FG
810 }
811 }
812 }
813 return split;
814}
815
816unsigned pg_t::get_split_bits(unsigned pg_num) const {
817 if (pg_num == 1)
818 return 0;
11fdf7f2 819 ceph_assert(pg_num > 1);
7c673cae
FG
820
821 // Find unique p such that pg_num \in [2^(p-1), 2^p)
822 unsigned p = cbits(pg_num);
11fdf7f2 823 ceph_assert(p); // silence coverity #751330
7c673cae
FG
824
825 if ((m_seed % (1<<(p-1))) < (pg_num % (1<<(p-1))))
826 return p;
827 else
828 return p - 1;
829}
830
11fdf7f2
TL
831bool pg_t::is_merge_source(
832 unsigned old_pg_num,
833 unsigned new_pg_num,
834 pg_t *parent) const
835{
836 if (m_seed < old_pg_num &&
837 m_seed >= new_pg_num) {
838 if (parent) {
839 pg_t t = *this;
840 while (t.m_seed >= new_pg_num) {
841 t = t.get_parent();
842 }
843 *parent = t;
844 }
845 return true;
846 }
847 return false;
848}
849
7c673cae
FG
850pg_t pg_t::get_parent() const
851{
852 unsigned bits = cbits(m_seed);
11fdf7f2 853 ceph_assert(bits);
7c673cae
FG
854 pg_t retval = *this;
855 retval.m_seed &= ~((~0)<<(bits - 1));
856 return retval;
857}
858
859hobject_t pg_t::get_hobj_start() const
860{
11fdf7f2 861 return hobject_t(object_t(), string(), 0, m_seed, m_pool,
7c673cae
FG
862 string());
863}
864
865hobject_t pg_t::get_hobj_end(unsigned pg_num) const
866{
867 // note: this assumes a bitwise sort; with the legacy nibblewise
868 // sort a PG did not always cover a single contiguous range of the
869 // (bit-reversed) hash range.
870 unsigned bits = get_split_bits(pg_num);
871 uint64_t rev_start = hobject_t::_reverse_bits(m_seed);
872 uint64_t rev_end = (rev_start | (0xffffffff >> bits)) + 1;
873 if (rev_end >= 0x100000000) {
11fdf7f2 874 ceph_assert(rev_end == 0x100000000);
7c673cae
FG
875 return hobject_t::get_max();
876 } else {
877 return hobject_t(object_t(), string(), CEPH_NOSNAP,
878 hobject_t::_reverse_bits(rev_end), m_pool,
879 string());
880 }
881}
882
883void pg_t::dump(Formatter *f) const
884{
885 f->dump_unsigned("pool", m_pool);
886 f->dump_unsigned("seed", m_seed);
7c673cae
FG
887}
888
889void pg_t::generate_test_instances(list<pg_t*>& o)
890{
891 o.push_back(new pg_t);
11fdf7f2
TL
892 o.push_back(new pg_t(1, 2));
893 o.push_back(new pg_t(13123, 3));
894 o.push_back(new pg_t(131223, 4));
7c673cae
FG
895}
896
897char *pg_t::calc_name(char *buf, const char *suffix_backwords) const
898{
899 while (*suffix_backwords)
900 *--buf = *suffix_backwords++;
901
7c673cae
FG
902 buf = ritoa<uint32_t, 16>(m_seed, buf);
903
904 *--buf = '.';
905
906 return ritoa<uint64_t, 10>(m_pool, buf);
907}
908
909ostream& operator<<(ostream& out, const pg_t &pg)
910{
911 char buf[pg_t::calc_name_buf_size];
912 buf[pg_t::calc_name_buf_size - 1] = '\0';
913 out << pg.calc_name(buf + pg_t::calc_name_buf_size - 1, "");
914 return out;
915}
916
917
918// -- coll_t --
919
920void coll_t::calc_str()
921{
922 switch (type) {
923 case TYPE_META:
924 strcpy(_str_buff, "meta");
925 _str = _str_buff;
926 break;
927 case TYPE_PG:
928 _str_buff[spg_t::calc_name_buf_size - 1] = '\0';
929 _str = pgid.calc_name(_str_buff + spg_t::calc_name_buf_size - 1, "daeh_");
930 break;
931 case TYPE_PG_TEMP:
932 _str_buff[spg_t::calc_name_buf_size - 1] = '\0';
933 _str = pgid.calc_name(_str_buff + spg_t::calc_name_buf_size - 1, "PMET_");
934 break;
935 default:
11fdf7f2 936 ceph_abort_msg("unknown collection type");
7c673cae
FG
937 }
938}
939
940bool coll_t::parse(const std::string& s)
941{
942 if (s == "meta") {
943 type = TYPE_META;
944 pgid = spg_t();
945 removal_seq = 0;
946 calc_str();
11fdf7f2 947 ceph_assert(s == _str);
7c673cae
FG
948 return true;
949 }
950 if (s.find("_head") == s.length() - 5 &&
951 pgid.parse(s.substr(0, s.length() - 5))) {
952 type = TYPE_PG;
953 removal_seq = 0;
954 calc_str();
11fdf7f2 955 ceph_assert(s == _str);
7c673cae
FG
956 return true;
957 }
958 if (s.find("_TEMP") == s.length() - 5 &&
959 pgid.parse(s.substr(0, s.length() - 5))) {
960 type = TYPE_PG_TEMP;
961 removal_seq = 0;
962 calc_str();
11fdf7f2 963 ceph_assert(s == _str);
7c673cae
FG
964 return true;
965 }
966 return false;
967}
968
9f95a23c 969void coll_t::encode(ceph::buffer::list& bl) const
7c673cae 970{
11fdf7f2 971 using ceph::encode;
7c673cae
FG
972 // when changing this, remember to update encoded_size() too.
973 if (is_temp()) {
974 // can't express this as v2...
975 __u8 struct_v = 3;
11fdf7f2
TL
976 encode(struct_v, bl);
977 encode(to_str(), bl);
7c673cae
FG
978 } else {
979 __u8 struct_v = 2;
11fdf7f2
TL
980 encode(struct_v, bl);
981 encode((__u8)type, bl);
982 encode(pgid, bl);
7c673cae 983 snapid_t snap = CEPH_NOSNAP;
11fdf7f2 984 encode(snap, bl);
7c673cae
FG
985 }
986}
987
988size_t coll_t::encoded_size() const
989{
990 size_t r = sizeof(__u8);
991 if (is_temp()) {
992 // v3
993 r += sizeof(__u32);
994 if (_str) {
995 r += strlen(_str);
996 }
997 } else {
998 // v2
999 // 1. type
1000 r += sizeof(__u8);
1001 // 2. pgid
1002 // - encoding header
1003 r += sizeof(ceph_le32) + 2 * sizeof(__u8);
1004 // - pg_t
1005 r += sizeof(__u8) + sizeof(uint64_t) + 2 * sizeof(uint32_t);
1006 // - shard_id_t
1007 r += sizeof(int8_t);
1008 // 3. snapid_t
1009 r += sizeof(uint64_t);
1010 }
1011
1012 return r;
1013}
1014
9f95a23c 1015void coll_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae 1016{
11fdf7f2 1017 using ceph::decode;
7c673cae 1018 __u8 struct_v;
11fdf7f2 1019 decode(struct_v, bl);
7c673cae
FG
1020 switch (struct_v) {
1021 case 1:
1022 {
1023 snapid_t snap;
11fdf7f2
TL
1024 decode(pgid, bl);
1025 decode(snap, bl);
7c673cae
FG
1026
1027 // infer the type
1028 if (pgid == spg_t() && snap == 0) {
1029 type = TYPE_META;
1030 } else {
1031 type = TYPE_PG;
1032 }
1033 removal_seq = 0;
1034 }
1035 break;
1036
1037 case 2:
1038 {
1039 __u8 _type;
1040 snapid_t snap;
11fdf7f2
TL
1041 decode(_type, bl);
1042 decode(pgid, bl);
1043 decode(snap, bl);
7c673cae
FG
1044 type = (type_t)_type;
1045 removal_seq = 0;
1046 }
1047 break;
1048
1049 case 3:
1050 {
1051 string str;
11fdf7f2 1052 decode(str, bl);
7c673cae
FG
1053 bool ok = parse(str);
1054 if (!ok)
1055 throw std::domain_error(std::string("unable to parse pg ") + str);
1056 }
1057 break;
1058
1059 default:
1060 {
1061 ostringstream oss;
1062 oss << "coll_t::decode(): don't know how to decode version "
1063 << struct_v;
1064 throw std::domain_error(oss.str());
1065 }
1066 }
1067}
1068
1069void coll_t::dump(Formatter *f) const
1070{
1071 f->dump_unsigned("type_id", (unsigned)type);
1072 if (type != TYPE_META)
1073 f->dump_stream("pgid") << pgid;
1074 f->dump_string("name", to_str());
1075}
1076
1077void coll_t::generate_test_instances(list<coll_t*>& o)
1078{
1079 o.push_back(new coll_t());
1080 o.push_back(new coll_t(spg_t(pg_t(1, 0), shard_id_t::NO_SHARD)));
1081 o.push_back(new coll_t(o.back()->get_temp()));
1082 o.push_back(new coll_t(spg_t(pg_t(3, 2), shard_id_t(12))));
1083 o.push_back(new coll_t(o.back()->get_temp()));
1084 o.push_back(new coll_t());
1085}
1086
1087// ---
1088
1089std::string pg_vector_string(const vector<int32_t> &a)
1090{
1091 ostringstream oss;
1092 oss << "[";
9f95a23c
TL
1093 for (auto i = a.cbegin(); i != a.cend(); ++i) {
1094 if (i != a.begin())
7c673cae 1095 oss << ",";
9f95a23c 1096 if (*i != CRUSH_ITEM_NONE)
7c673cae 1097 oss << *i;
9f95a23c 1098 else
7c673cae
FG
1099 oss << "NONE";
1100 }
1101 oss << "]";
1102 return oss.str();
1103}
1104
11fdf7f2 1105std::string pg_state_string(uint64_t state)
7c673cae
FG
1106{
1107 ostringstream oss;
1108 if (state & PG_STATE_STALE)
1109 oss << "stale+";
1110 if (state & PG_STATE_CREATING)
1111 oss << "creating+";
1112 if (state & PG_STATE_ACTIVE)
1113 oss << "active+";
1114 if (state & PG_STATE_ACTIVATING)
1115 oss << "activating+";
1116 if (state & PG_STATE_CLEAN)
1117 oss << "clean+";
1118 if (state & PG_STATE_RECOVERY_WAIT)
1119 oss << "recovery_wait+";
1120 if (state & PG_STATE_RECOVERY_TOOFULL)
1121 oss << "recovery_toofull+";
1122 if (state & PG_STATE_RECOVERING)
1123 oss << "recovering+";
c07f9fc5
FG
1124 if (state & PG_STATE_FORCED_RECOVERY)
1125 oss << "forced_recovery+";
7c673cae
FG
1126 if (state & PG_STATE_DOWN)
1127 oss << "down+";
b32b8144
FG
1128 if (state & PG_STATE_RECOVERY_UNFOUND)
1129 oss << "recovery_unfound+";
1130 if (state & PG_STATE_BACKFILL_UNFOUND)
1131 oss << "backfill_unfound+";
7c673cae
FG
1132 if (state & PG_STATE_UNDERSIZED)
1133 oss << "undersized+";
1134 if (state & PG_STATE_DEGRADED)
1135 oss << "degraded+";
1136 if (state & PG_STATE_REMAPPED)
1137 oss << "remapped+";
11fdf7f2
TL
1138 if (state & PG_STATE_PREMERGE)
1139 oss << "premerge+";
7c673cae
FG
1140 if (state & PG_STATE_SCRUBBING)
1141 oss << "scrubbing+";
1142 if (state & PG_STATE_DEEP_SCRUB)
1143 oss << "deep+";
1144 if (state & PG_STATE_INCONSISTENT)
1145 oss << "inconsistent+";
1146 if (state & PG_STATE_PEERING)
1147 oss << "peering+";
1148 if (state & PG_STATE_REPAIR)
1149 oss << "repair+";
3efd9988 1150 if (state & PG_STATE_BACKFILL_WAIT)
7c673cae 1151 oss << "backfill_wait+";
3efd9988 1152 if (state & PG_STATE_BACKFILLING)
7c673cae 1153 oss << "backfilling+";
c07f9fc5
FG
1154 if (state & PG_STATE_FORCED_BACKFILL)
1155 oss << "forced_backfill+";
7c673cae
FG
1156 if (state & PG_STATE_BACKFILL_TOOFULL)
1157 oss << "backfill_toofull+";
1158 if (state & PG_STATE_INCOMPLETE)
1159 oss << "incomplete+";
1160 if (state & PG_STATE_PEERED)
1161 oss << "peered+";
1162 if (state & PG_STATE_SNAPTRIM)
1163 oss << "snaptrim+";
1164 if (state & PG_STATE_SNAPTRIM_WAIT)
1165 oss << "snaptrim_wait+";
224ce89b
WB
1166 if (state & PG_STATE_SNAPTRIM_ERROR)
1167 oss << "snaptrim_error+";
11fdf7f2
TL
1168 if (state & PG_STATE_FAILED_REPAIR)
1169 oss << "failed_repair+";
9f95a23c
TL
1170 if (state & PG_STATE_LAGGY)
1171 oss << "laggy+";
1172 if (state & PG_STATE_WAIT)
1173 oss << "wait+";
7c673cae
FG
1174 string ret(oss.str());
1175 if (ret.length() > 0)
1176 ret.resize(ret.length() - 1);
1177 else
31f18b77 1178 ret = "unknown";
7c673cae
FG
1179 return ret;
1180}
1181
9f95a23c 1182std::optional<uint64_t> pg_string_state(const std::string& state)
7c673cae 1183{
9f95a23c 1184 std::optional<uint64_t> type;
7c673cae
FG
1185 if (state == "active")
1186 type = PG_STATE_ACTIVE;
1187 else if (state == "clean")
1188 type = PG_STATE_CLEAN;
1189 else if (state == "down")
1190 type = PG_STATE_DOWN;
b32b8144
FG
1191 else if (state == "recovery_unfound")
1192 type = PG_STATE_RECOVERY_UNFOUND;
1193 else if (state == "backfill_unfound")
1194 type = PG_STATE_BACKFILL_UNFOUND;
11fdf7f2
TL
1195 else if (state == "premerge")
1196 type = PG_STATE_PREMERGE;
7c673cae
FG
1197 else if (state == "scrubbing")
1198 type = PG_STATE_SCRUBBING;
1199 else if (state == "degraded")
1200 type = PG_STATE_DEGRADED;
1201 else if (state == "inconsistent")
1202 type = PG_STATE_INCONSISTENT;
1203 else if (state == "peering")
1204 type = PG_STATE_PEERING;
1205 else if (state == "repair")
1206 type = PG_STATE_REPAIR;
1207 else if (state == "recovering")
1208 type = PG_STATE_RECOVERING;
c07f9fc5
FG
1209 else if (state == "forced_recovery")
1210 type = PG_STATE_FORCED_RECOVERY;
7c673cae
FG
1211 else if (state == "backfill_wait")
1212 type = PG_STATE_BACKFILL_WAIT;
1213 else if (state == "incomplete")
1214 type = PG_STATE_INCOMPLETE;
1215 else if (state == "stale")
1216 type = PG_STATE_STALE;
1217 else if (state == "remapped")
1218 type = PG_STATE_REMAPPED;
94b18763 1219 else if (state == "deep")
7c673cae 1220 type = PG_STATE_DEEP_SCRUB;
3efd9988
FG
1221 else if (state == "backfilling")
1222 type = PG_STATE_BACKFILLING;
c07f9fc5
FG
1223 else if (state == "forced_backfill")
1224 type = PG_STATE_FORCED_BACKFILL;
7c673cae
FG
1225 else if (state == "backfill_toofull")
1226 type = PG_STATE_BACKFILL_TOOFULL;
1227 else if (state == "recovery_wait")
1228 type = PG_STATE_RECOVERY_WAIT;
1229 else if (state == "recovery_toofull")
1230 type = PG_STATE_RECOVERY_TOOFULL;
1231 else if (state == "undersized")
1232 type = PG_STATE_UNDERSIZED;
1233 else if (state == "activating")
1234 type = PG_STATE_ACTIVATING;
1235 else if (state == "peered")
1236 type = PG_STATE_PEERED;
1237 else if (state == "snaptrim")
1238 type = PG_STATE_SNAPTRIM;
1239 else if (state == "snaptrim_wait")
1240 type = PG_STATE_SNAPTRIM_WAIT;
224ce89b
WB
1241 else if (state == "snaptrim_error")
1242 type = PG_STATE_SNAPTRIM_ERROR;
91327a77
AA
1243 else if (state == "creating")
1244 type = PG_STATE_CREATING;
11fdf7f2
TL
1245 else if (state == "failed_repair")
1246 type = PG_STATE_FAILED_REPAIR;
9f95a23c
TL
1247 else if (state == "laggy")
1248 type = PG_STATE_LAGGY;
1249 else if (state == "wait")
1250 type = PG_STATE_WAIT;
11fdf7f2
TL
1251 else if (state == "unknown")
1252 type = 0;
7c673cae 1253 else
9f95a23c 1254 type = std::nullopt;
7c673cae
FG
1255 return type;
1256}
1257
1258// -- eversion_t --
1259string eversion_t::get_key_name() const
1260{
11fdf7f2
TL
1261 std::string key(32, ' ');
1262 get_key_name(&key[0]);
1263 key.resize(31); // remove the null terminator
1264 return key;
7c673cae
FG
1265}
1266
7c673cae
FG
1267// -- pool_snap_info_t --
1268void pool_snap_info_t::dump(Formatter *f) const
1269{
1270 f->dump_unsigned("snapid", snapid);
1271 f->dump_stream("stamp") << stamp;
1272 f->dump_string("name", name);
1273}
1274
9f95a23c 1275void pool_snap_info_t::encode(ceph::buffer::list& bl, uint64_t features) const
7c673cae 1276{
11fdf7f2 1277 using ceph::encode;
7c673cae
FG
1278 if ((features & CEPH_FEATURE_PGPOOL3) == 0) {
1279 __u8 struct_v = 1;
11fdf7f2
TL
1280 encode(struct_v, bl);
1281 encode(snapid, bl);
1282 encode(stamp, bl);
1283 encode(name, bl);
7c673cae
FG
1284 return;
1285 }
1286 ENCODE_START(2, 2, bl);
11fdf7f2
TL
1287 encode(snapid, bl);
1288 encode(stamp, bl);
1289 encode(name, bl);
7c673cae
FG
1290 ENCODE_FINISH(bl);
1291}
1292
9f95a23c 1293void pool_snap_info_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
1294{
1295 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
11fdf7f2
TL
1296 decode(snapid, bl);
1297 decode(stamp, bl);
1298 decode(name, bl);
7c673cae
FG
1299 DECODE_FINISH(bl);
1300}
1301
1302void pool_snap_info_t::generate_test_instances(list<pool_snap_info_t*>& o)
1303{
1304 o.push_back(new pool_snap_info_t);
1305 o.push_back(new pool_snap_info_t);
1306 o.back()->snapid = 1;
1307 o.back()->stamp = utime_t(1, 2);
1308 o.back()->name = "foo";
1309}
1310
1311// -- pool_opts_t --
1312
1313typedef std::map<std::string, pool_opts_t::opt_desc_t> opt_mapping_t;
1314static opt_mapping_t opt_mapping = boost::assign::map_list_of
1315 ("scrub_min_interval", pool_opts_t::opt_desc_t(
1316 pool_opts_t::SCRUB_MIN_INTERVAL, pool_opts_t::DOUBLE))
1317 ("scrub_max_interval", pool_opts_t::opt_desc_t(
1318 pool_opts_t::SCRUB_MAX_INTERVAL, pool_opts_t::DOUBLE))
1319 ("deep_scrub_interval", pool_opts_t::opt_desc_t(
1320 pool_opts_t::DEEP_SCRUB_INTERVAL, pool_opts_t::DOUBLE))
1321 ("recovery_priority", pool_opts_t::opt_desc_t(
1322 pool_opts_t::RECOVERY_PRIORITY, pool_opts_t::INT))
1323 ("recovery_op_priority", pool_opts_t::opt_desc_t(
1324 pool_opts_t::RECOVERY_OP_PRIORITY, pool_opts_t::INT))
1325 ("scrub_priority", pool_opts_t::opt_desc_t(
1326 pool_opts_t::SCRUB_PRIORITY, pool_opts_t::INT))
1327 ("compression_mode", pool_opts_t::opt_desc_t(
1328 pool_opts_t::COMPRESSION_MODE, pool_opts_t::STR))
1329 ("compression_algorithm", pool_opts_t::opt_desc_t(
1330 pool_opts_t::COMPRESSION_ALGORITHM, pool_opts_t::STR))
1331 ("compression_required_ratio", pool_opts_t::opt_desc_t(
1332 pool_opts_t::COMPRESSION_REQUIRED_RATIO, pool_opts_t::DOUBLE))
1333 ("compression_max_blob_size", pool_opts_t::opt_desc_t(
1334 pool_opts_t::COMPRESSION_MAX_BLOB_SIZE, pool_opts_t::INT))
1335 ("compression_min_blob_size", pool_opts_t::opt_desc_t(
1336 pool_opts_t::COMPRESSION_MIN_BLOB_SIZE, pool_opts_t::INT))
1337 ("csum_type", pool_opts_t::opt_desc_t(
1338 pool_opts_t::CSUM_TYPE, pool_opts_t::INT))
1339 ("csum_max_block", pool_opts_t::opt_desc_t(
1340 pool_opts_t::CSUM_MAX_BLOCK, pool_opts_t::INT))
1341 ("csum_min_block", pool_opts_t::opt_desc_t(
11fdf7f2
TL
1342 pool_opts_t::CSUM_MIN_BLOCK, pool_opts_t::INT))
1343 ("fingerprint_algorithm", pool_opts_t::opt_desc_t(
1344 pool_opts_t::FINGERPRINT_ALGORITHM, pool_opts_t::STR))
1345 ("pg_num_min", pool_opts_t::opt_desc_t(
1346 pool_opts_t::PG_NUM_MIN, pool_opts_t::INT))
1347 ("target_size_bytes", pool_opts_t::opt_desc_t(
1348 pool_opts_t::TARGET_SIZE_BYTES, pool_opts_t::INT))
1349 ("target_size_ratio", pool_opts_t::opt_desc_t(
1350 pool_opts_t::TARGET_SIZE_RATIO, pool_opts_t::DOUBLE))
1351 ("pg_autoscale_bias", pool_opts_t::opt_desc_t(
9f95a23c
TL
1352 pool_opts_t::PG_AUTOSCALE_BIAS, pool_opts_t::DOUBLE))
1353 ("read_lease_interval", pool_opts_t::opt_desc_t(
1354 pool_opts_t::READ_LEASE_INTERVAL, pool_opts_t::DOUBLE));
7c673cae 1355
11fdf7f2
TL
1356bool pool_opts_t::is_opt_name(const std::string& name)
1357{
1358 return opt_mapping.count(name);
7c673cae
FG
1359}
1360
11fdf7f2
TL
1361pool_opts_t::opt_desc_t pool_opts_t::get_opt_desc(const std::string& name)
1362{
9f95a23c 1363 auto i = opt_mapping.find(name);
11fdf7f2
TL
1364 ceph_assert(i != opt_mapping.end());
1365 return i->second;
7c673cae
FG
1366}
1367
11fdf7f2
TL
1368bool pool_opts_t::is_set(pool_opts_t::key_t key) const
1369{
1370 return opts.count(key);
7c673cae
FG
1371}
1372
11fdf7f2
TL
1373const pool_opts_t::value_t& pool_opts_t::get(pool_opts_t::key_t key) const
1374{
9f95a23c 1375 auto i = opts.find(key);
11fdf7f2 1376 ceph_assert(i != opts.end());
7c673cae
FG
1377 return i->second;
1378}
1379
1380bool pool_opts_t::unset(pool_opts_t::key_t key) {
1381 return opts.erase(key) > 0;
1382}
1383
11fdf7f2 1384class pool_opts_dumper_t : public boost::static_visitor<> {
7c673cae
FG
1385public:
1386 pool_opts_dumper_t(const std::string& name_, Formatter* f_) :
1387 name(name_.c_str()), f(f_) {}
1388
1389 void operator()(std::string s) const {
1390 f->dump_string(name, s);
1391 }
11fdf7f2 1392 void operator()(int64_t i) const {
7c673cae
FG
1393 f->dump_int(name, i);
1394 }
1395 void operator()(double d) const {
1396 f->dump_float(name, d);
1397 }
1398
1399private:
1400 const char* name;
1401 Formatter* f;
1402};
1403
1404void pool_opts_t::dump(const std::string& name, Formatter* f) const
1405{
1406 const opt_desc_t& desc = get_opt_desc(name);
9f95a23c 1407 auto i = opts.find(desc.key);
7c673cae
FG
1408 if (i == opts.end()) {
1409 return;
1410 }
1411 boost::apply_visitor(pool_opts_dumper_t(name, f), i->second);
1412}
1413
1414void pool_opts_t::dump(Formatter* f) const
1415{
9f95a23c 1416 for (auto i = opt_mapping.cbegin(); i != opt_mapping.cend(); ++i) {
7c673cae
FG
1417 const std::string& name = i->first;
1418 const opt_desc_t& desc = i->second;
9f95a23c 1419 auto j = opts.find(desc.key);
7c673cae
FG
1420 if (j == opts.end()) {
1421 continue;
1422 }
1423 boost::apply_visitor(pool_opts_dumper_t(name, f), j->second);
1424 }
1425}
1426
11fdf7f2 1427class pool_opts_encoder_t : public boost::static_visitor<> {
7c673cae 1428public:
9f95a23c 1429 explicit pool_opts_encoder_t(ceph::buffer::list& bl_, uint64_t features)
11fdf7f2
TL
1430 : bl(bl_),
1431 features(features) {}
1432
1433 void operator()(const std::string &s) const {
1434 encode(static_cast<int32_t>(pool_opts_t::STR), bl);
1435 encode(s, bl);
1436 }
1437 void operator()(int64_t i) const {
1438 encode(static_cast<int32_t>(pool_opts_t::INT), bl);
1439 if (HAVE_FEATURE(features, SERVER_NAUTILUS)) {
1440 encode(i, bl);
1441 } else {
1442 encode(static_cast<int32_t>(i), bl);
1443 }
7c673cae
FG
1444 }
1445 void operator()(double d) const {
11fdf7f2
TL
1446 encode(static_cast<int32_t>(pool_opts_t::DOUBLE), bl);
1447 encode(d, bl);
7c673cae
FG
1448 }
1449
1450private:
9f95a23c 1451 ceph::buffer::list& bl;
11fdf7f2 1452 uint64_t features;
7c673cae
FG
1453};
1454
9f95a23c 1455void pool_opts_t::encode(ceph::buffer::list& bl, uint64_t features) const
11fdf7f2
TL
1456{
1457 unsigned v = 2;
1458 if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
1459 v = 1;
1460 }
1461 ENCODE_START(v, 1, bl);
7c673cae 1462 uint32_t n = static_cast<uint32_t>(opts.size());
11fdf7f2 1463 encode(n, bl);
9f95a23c 1464 for (auto i = opts.cbegin(); i != opts.cend(); ++i) {
11fdf7f2
TL
1465 encode(static_cast<int32_t>(i->first), bl);
1466 boost::apply_visitor(pool_opts_encoder_t(bl, features), i->second);
7c673cae
FG
1467 }
1468 ENCODE_FINISH(bl);
1469}
1470
9f95a23c 1471void pool_opts_t::decode(ceph::buffer::list::const_iterator& bl)
11fdf7f2 1472{
7c673cae
FG
1473 DECODE_START(1, bl);
1474 __u32 n;
11fdf7f2 1475 decode(n, bl);
7c673cae
FG
1476 opts.clear();
1477 while (n--) {
1478 int32_t k, t;
11fdf7f2
TL
1479 decode(k, bl);
1480 decode(t, bl);
7c673cae
FG
1481 if (t == STR) {
1482 std::string s;
11fdf7f2 1483 decode(s, bl);
7c673cae
FG
1484 opts[static_cast<key_t>(k)] = s;
1485 } else if (t == INT) {
11fdf7f2
TL
1486 int64_t i;
1487 if (struct_v >= 2) {
1488 decode(i, bl);
1489 } else {
1490 int ii;
1491 decode(ii, bl);
1492 i = ii;
1493 }
7c673cae
FG
1494 opts[static_cast<key_t>(k)] = i;
1495 } else if (t == DOUBLE) {
1496 double d;
11fdf7f2 1497 decode(d, bl);
7c673cae
FG
1498 opts[static_cast<key_t>(k)] = d;
1499 } else {
11fdf7f2 1500 ceph_assert(!"invalid type");
7c673cae
FG
1501 }
1502 }
1503 DECODE_FINISH(bl);
1504}
1505
1506ostream& operator<<(ostream& out, const pool_opts_t& opts)
1507{
9f95a23c 1508 for (auto i = opt_mapping.begin(); i != opt_mapping.end(); ++i) {
7c673cae
FG
1509 const std::string& name = i->first;
1510 const pool_opts_t::opt_desc_t& desc = i->second;
9f95a23c 1511 auto j = opts.opts.find(desc.key);
7c673cae
FG
1512 if (j == opts.opts.end()) {
1513 continue;
1514 }
1515 out << " " << name << " " << j->second;
1516 }
1517 return out;
1518}
1519
1520// -- pg_pool_t --
1521
c07f9fc5
FG
1522const char *pg_pool_t::APPLICATION_NAME_CEPHFS("cephfs");
1523const char *pg_pool_t::APPLICATION_NAME_RBD("rbd");
1524const char *pg_pool_t::APPLICATION_NAME_RGW("rgw");
1525
7c673cae
FG
1526void pg_pool_t::dump(Formatter *f) const
1527{
11fdf7f2 1528 f->dump_stream("create_time") << get_create_time();
7c673cae
FG
1529 f->dump_unsigned("flags", get_flags());
1530 f->dump_string("flags_names", get_flags_string());
1531 f->dump_int("type", get_type());
1532 f->dump_int("size", get_size());
1533 f->dump_int("min_size", get_min_size());
31f18b77 1534 f->dump_int("crush_rule", get_crush_rule());
7c673cae 1535 f->dump_int("object_hash", get_object_hash());
11fdf7f2
TL
1536 f->dump_string("pg_autoscale_mode",
1537 get_pg_autoscale_mode_name(pg_autoscale_mode));
7c673cae
FG
1538 f->dump_unsigned("pg_num", get_pg_num());
1539 f->dump_unsigned("pg_placement_num", get_pgp_num());
11fdf7f2
TL
1540 f->dump_unsigned("pg_placement_num_target", get_pgp_num_target());
1541 f->dump_unsigned("pg_num_target", get_pg_num_target());
1542 f->dump_unsigned("pg_num_pending", get_pg_num_pending());
1543 f->dump_object("last_pg_merge_meta", last_pg_merge_meta);
7c673cae
FG
1544 f->dump_stream("last_change") << get_last_change();
1545 f->dump_stream("last_force_op_resend") << get_last_force_op_resend();
11fdf7f2
TL
1546 f->dump_stream("last_force_op_resend_prenautilus")
1547 << get_last_force_op_resend_prenautilus();
7c673cae
FG
1548 f->dump_stream("last_force_op_resend_preluminous")
1549 << get_last_force_op_resend_preluminous();
1550 f->dump_unsigned("auid", get_auid());
1551 f->dump_string("snap_mode", is_pool_snaps_mode() ? "pool" : "selfmanaged");
1552 f->dump_unsigned("snap_seq", get_snap_seq());
1553 f->dump_unsigned("snap_epoch", get_snap_epoch());
1554 f->open_array_section("pool_snaps");
9f95a23c 1555 for (auto p = snaps.cbegin(); p != snaps.cend(); ++p) {
7c673cae
FG
1556 f->open_object_section("pool_snap_info");
1557 p->second.dump(f);
1558 f->close_section();
1559 }
1560 f->close_section();
1561 f->dump_stream("removed_snaps") << removed_snaps;
1562 f->dump_unsigned("quota_max_bytes", quota_max_bytes);
1563 f->dump_unsigned("quota_max_objects", quota_max_objects);
1564 f->open_array_section("tiers");
9f95a23c 1565 for (auto p = tiers.cbegin(); p != tiers.cend(); ++p)
7c673cae
FG
1566 f->dump_unsigned("pool_id", *p);
1567 f->close_section();
1568 f->dump_int("tier_of", tier_of);
1569 f->dump_int("read_tier", read_tier);
1570 f->dump_int("write_tier", write_tier);
1571 f->dump_string("cache_mode", get_cache_mode_name());
1572 f->dump_unsigned("target_max_bytes", target_max_bytes);
1573 f->dump_unsigned("target_max_objects", target_max_objects);
1574 f->dump_unsigned("cache_target_dirty_ratio_micro",
1575 cache_target_dirty_ratio_micro);
1576 f->dump_unsigned("cache_target_dirty_high_ratio_micro",
1577 cache_target_dirty_high_ratio_micro);
1578 f->dump_unsigned("cache_target_full_ratio_micro",
1579 cache_target_full_ratio_micro);
1580 f->dump_unsigned("cache_min_flush_age", cache_min_flush_age);
1581 f->dump_unsigned("cache_min_evict_age", cache_min_evict_age);
1582 f->dump_string("erasure_code_profile", erasure_code_profile);
1583 f->open_object_section("hit_set_params");
1584 hit_set_params.dump(f);
1585 f->close_section(); // hit_set_params
1586 f->dump_unsigned("hit_set_period", hit_set_period);
1587 f->dump_unsigned("hit_set_count", hit_set_count);
1588 f->dump_bool("use_gmt_hitset", use_gmt_hitset);
1589 f->dump_unsigned("min_read_recency_for_promote", min_read_recency_for_promote);
1590 f->dump_unsigned("min_write_recency_for_promote", min_write_recency_for_promote);
1591 f->dump_unsigned("hit_set_grade_decay_rate", hit_set_grade_decay_rate);
1592 f->dump_unsigned("hit_set_search_last_n", hit_set_search_last_n);
1593 f->open_array_section("grade_table");
1594 for (unsigned i = 0; i < hit_set_count; ++i)
1595 f->dump_unsigned("value", get_grade(i));
1596 f->close_section();
1597 f->dump_unsigned("stripe_width", get_stripe_width());
1598 f->dump_unsigned("expected_num_objects", expected_num_objects);
1599 f->dump_bool("fast_read", fast_read);
1600 f->open_object_section("options");
1601 opts.dump(f);
1602 f->close_section(); // options
c07f9fc5
FG
1603 f->open_object_section("application_metadata");
1604 for (auto &app_pair : application_metadata) {
1605 f->open_object_section(app_pair.first.c_str());
1606 for (auto &kv_pair : app_pair.second) {
1607 f->dump_string(kv_pair.first.c_str(), kv_pair.second);
1608 }
1609 f->close_section(); // application
1610 }
1611 f->close_section(); // application_metadata
7c673cae
FG
1612}
1613
1614void pg_pool_t::convert_to_pg_shards(const vector<int> &from, set<pg_shard_t>* to) const {
1615 for (size_t i = 0; i < from.size(); ++i) {
1616 if (from[i] != CRUSH_ITEM_NONE) {
1617 to->insert(
1618 pg_shard_t(
1619 from[i],
11fdf7f2 1620 is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD));
7c673cae
FG
1621 }
1622 }
1623}
1624
1625void pg_pool_t::calc_pg_masks()
1626{
1627 pg_num_mask = (1 << cbits(pg_num-1)) - 1;
1628 pgp_num_mask = (1 << cbits(pgp_num-1)) - 1;
1629}
1630
1631unsigned pg_pool_t::get_pg_num_divisor(pg_t pgid) const
1632{
1633 if (pg_num == pg_num_mask + 1)
1634 return pg_num; // power-of-2 split
1635 unsigned mask = pg_num_mask >> 1;
1636 if ((pgid.ps() & mask) < (pg_num & mask))
1637 return pg_num_mask + 1; // smaller bin size (already split)
1638 else
1639 return (pg_num_mask + 1) >> 1; // bigger bin (not yet split)
1640}
1641
11fdf7f2
TL
1642bool pg_pool_t::is_pending_merge(pg_t pgid, bool *target) const
1643{
1644 if (pg_num_pending >= pg_num) {
1645 return false;
1646 }
1647 if (pgid.ps() >= pg_num_pending && pgid.ps() < pg_num) {
1648 if (target) {
1649 *target = false;
1650 }
1651 return true;
1652 }
1653 for (unsigned ps = pg_num_pending; ps < pg_num; ++ps) {
1654 if (pg_t(ps, pgid.pool()).get_parent() == pgid) {
1655 if (target) {
1656 *target = true;
1657 }
1658 return true;
1659 }
1660 }
1661 return false;
1662}
1663
7c673cae
FG
1664/*
1665 * we have two snap modes:
11fdf7f2 1666 * - pool snaps
7c673cae
FG
1667 * - snap existence/non-existence defined by snaps[] and snap_seq
1668 * - user managed snaps
11fdf7f2 1669 * - existence tracked by librados user
7c673cae
FG
1670 */
1671bool pg_pool_t::is_pool_snaps_mode() const
1672{
11fdf7f2 1673 return has_flag(FLAG_POOL_SNAPS);
7c673cae
FG
1674}
1675
1676bool pg_pool_t::is_unmanaged_snaps_mode() const
1677{
11fdf7f2 1678 return has_flag(FLAG_SELFMANAGED_SNAPS);
7c673cae
FG
1679}
1680
1681bool pg_pool_t::is_removed_snap(snapid_t s) const
1682{
1683 if (is_pool_snaps_mode())
1684 return s <= get_snap_seq() && snaps.count(s) == 0;
1685 else
1686 return removed_snaps.contains(s);
1687}
1688
7c673cae
FG
1689snapid_t pg_pool_t::snap_exists(const char *s) const
1690{
9f95a23c 1691 for (auto p = snaps.cbegin(); p != snaps.cend(); ++p)
7c673cae
FG
1692 if (p->second.name == s)
1693 return p->second.snapid;
1694 return 0;
1695}
1696
1697void pg_pool_t::add_snap(const char *n, utime_t stamp)
1698{
11fdf7f2
TL
1699 ceph_assert(!is_unmanaged_snaps_mode());
1700 flags |= FLAG_POOL_SNAPS;
7c673cae
FG
1701 snapid_t s = get_snap_seq() + 1;
1702 snap_seq = s;
1703 snaps[s].snapid = s;
1704 snaps[s].name = n;
1705 snaps[s].stamp = stamp;
1706}
1707
9f95a23c 1708uint64_t pg_pool_t::add_unmanaged_snap(bool preoctopus_compat)
7c673cae 1709{
11fdf7f2
TL
1710 ceph_assert(!is_pool_snaps_mode());
1711 if (snap_seq == 0) {
9f95a23c
TL
1712 if (preoctopus_compat) {
1713 // kludge for pre-mimic tracking of pool vs selfmanaged snaps. after
1714 // mimic this field is not decoded but our flag is set; pre-mimic, we
1715 // have a non-empty removed_snaps to signifiy a non-pool-snaps pool.
1716 removed_snaps.insert(snapid_t(1));
1717 }
7c673cae
FG
1718 snap_seq = 1;
1719 }
11fdf7f2 1720 flags |= FLAG_SELFMANAGED_SNAPS;
9f95a23c
TL
1721 snap_seq = snap_seq + 1;
1722 return snap_seq;
7c673cae
FG
1723}
1724
1725void pg_pool_t::remove_snap(snapid_t s)
1726{
11fdf7f2 1727 ceph_assert(snaps.count(s));
7c673cae
FG
1728 snaps.erase(s);
1729 snap_seq = snap_seq + 1;
1730}
1731
9f95a23c 1732void pg_pool_t::remove_unmanaged_snap(snapid_t s, bool preoctopus_compat)
7c673cae 1733{
11fdf7f2 1734 ceph_assert(is_unmanaged_snaps_mode());
9f95a23c
TL
1735 ++snap_seq;
1736 if (preoctopus_compat) {
1737 removed_snaps.insert(s);
1738 // try to add in the new seq, just to try to keep the interval_set contiguous
1739 if (!removed_snaps.contains(get_snap_seq())) {
1740 removed_snaps.insert(get_snap_seq());
1741 }
28e407b8 1742 }
7c673cae
FG
1743}
1744
1745SnapContext pg_pool_t::get_snap_context() const
1746{
1747 vector<snapid_t> s(snaps.size());
1748 unsigned i = 0;
9f95a23c 1749 for (auto p = snaps.crbegin(); p != snaps.crend(); ++p)
7c673cae
FG
1750 s[i++] = p->first;
1751 return SnapContext(get_snap_seq(), s);
1752}
1753
1754uint32_t pg_pool_t::hash_key(const string& key, const string& ns) const
1755{
1756 if (ns.empty())
1757 return ceph_str_hash(object_hash, key.data(), key.length());
1758 int nsl = ns.length();
1759 int len = key.length() + nsl + 1;
1760 char buf[len];
1761 memcpy(&buf[0], ns.data(), nsl);
1762 buf[nsl] = '\037';
1763 memcpy(&buf[nsl+1], key.data(), key.length());
1764 return ceph_str_hash(object_hash, &buf[0], len);
1765}
1766
1767uint32_t pg_pool_t::raw_hash_to_pg(uint32_t v) const
1768{
1769 return ceph_stable_mod(v, pg_num, pg_num_mask);
1770}
1771
1772/*
1773 * map a raw pg (with full precision ps) into an actual pg, for storage
1774 */
1775pg_t pg_pool_t::raw_pg_to_pg(pg_t pg) const
1776{
1777 pg.set_ps(ceph_stable_mod(pg.ps(), pg_num, pg_num_mask));
1778 return pg;
1779}
1780
1781/*
1782 * map raw pg (full precision ps) into a placement seed. include
1783 * pool id in that value so that different pools don't use the same
1784 * seeds.
1785 */
1786ps_t pg_pool_t::raw_pg_to_pps(pg_t pg) const
1787{
1788 if (flags & FLAG_HASHPSPOOL) {
1789 // Hash the pool id so that pool PGs do not overlap.
1790 return
1791 crush_hash32_2(CRUSH_HASH_RJENKINS1,
1792 ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask),
1793 pg.pool());
1794 } else {
1795 // Legacy behavior; add ps and pool together. This is not a great
1796 // idea because the PGs from each pool will essentially overlap on
1797 // top of each other: 0.5 == 1.4 == 2.3 == ...
1798 return
1799 ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask) +
1800 pg.pool();
1801 }
1802}
1803
1804uint32_t pg_pool_t::get_random_pg_position(pg_t pg, uint32_t seed) const
1805{
1806 uint32_t r = crush_hash32_2(CRUSH_HASH_RJENKINS1, seed, 123);
1807 if (pg_num == pg_num_mask + 1) {
1808 r &= ~pg_num_mask;
1809 } else {
1810 unsigned smaller_mask = pg_num_mask >> 1;
1811 if ((pg.ps() & smaller_mask) < (pg_num & smaller_mask)) {
1812 r &= ~pg_num_mask;
1813 } else {
1814 r &= ~smaller_mask;
1815 }
1816 }
1817 r |= pg.ps();
1818 return r;
1819}
1820
9f95a23c 1821void pg_pool_t::encode(ceph::buffer::list& bl, uint64_t features) const
7c673cae 1822{
11fdf7f2 1823 using ceph::encode;
7c673cae
FG
1824 if ((features & CEPH_FEATURE_PGPOOL3) == 0) {
1825 // this encoding matches the old struct ceph_pg_pool
1826 __u8 struct_v = 2;
11fdf7f2
TL
1827 encode(struct_v, bl);
1828 encode(type, bl);
1829 encode(size, bl);
1830 encode(crush_rule, bl);
1831 encode(object_hash, bl);
1832 encode(pg_num, bl);
1833 encode(pgp_num, bl);
7c673cae 1834 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
11fdf7f2
TL
1835 encode(lpg_num, bl);
1836 encode(lpgp_num, bl);
1837 encode(last_change, bl);
1838 encode(snap_seq, bl);
1839 encode(snap_epoch, bl);
7c673cae
FG
1840
1841 __u32 n = snaps.size();
11fdf7f2 1842 encode(n, bl);
7c673cae 1843 n = removed_snaps.num_intervals();
11fdf7f2 1844 encode(n, bl);
7c673cae 1845
11fdf7f2 1846 encode(auid, bl);
7c673cae 1847
11fdf7f2
TL
1848 encode_nohead(snaps, bl, features);
1849 encode_nohead(removed_snaps, bl);
7c673cae
FG
1850 return;
1851 }
1852
1853 if ((features & CEPH_FEATURE_OSDENC) == 0) {
1854 __u8 struct_v = 4;
11fdf7f2
TL
1855 encode(struct_v, bl);
1856 encode(type, bl);
1857 encode(size, bl);
1858 encode(crush_rule, bl);
1859 encode(object_hash, bl);
1860 encode(pg_num, bl);
1861 encode(pgp_num, bl);
7c673cae 1862 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
11fdf7f2
TL
1863 encode(lpg_num, bl);
1864 encode(lpgp_num, bl);
1865 encode(last_change, bl);
1866 encode(snap_seq, bl);
1867 encode(snap_epoch, bl);
1868 encode(snaps, bl, features);
1869 encode(removed_snaps, bl);
1870 encode(auid, bl);
1871 encode(flags, bl);
1872 encode((uint32_t)0, bl); // crash_replay_interval
7c673cae
FG
1873 return;
1874 }
1875
1876 if ((features & CEPH_FEATURE_OSD_POOLRESEND) == 0) {
1877 // we simply added last_force_op_resend here, which is a fully
1878 // backward compatible change. however, encoding the same map
1879 // differently between monitors triggers scrub noise (even though
1880 // they are decodable without the feature), so let's be pendantic
1881 // about it.
1882 ENCODE_START(14, 5, bl);
11fdf7f2
TL
1883 encode(type, bl);
1884 encode(size, bl);
1885 encode(crush_rule, bl);
1886 encode(object_hash, bl);
1887 encode(pg_num, bl);
1888 encode(pgp_num, bl);
7c673cae 1889 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
11fdf7f2
TL
1890 encode(lpg_num, bl);
1891 encode(lpgp_num, bl);
1892 encode(last_change, bl);
1893 encode(snap_seq, bl);
1894 encode(snap_epoch, bl);
1895 encode(snaps, bl, features);
1896 encode(removed_snaps, bl);
1897 encode(auid, bl);
1898 encode(flags, bl);
1899 encode((uint32_t)0, bl); // crash_replay_interval
1900 encode(min_size, bl);
1901 encode(quota_max_bytes, bl);
1902 encode(quota_max_objects, bl);
1903 encode(tiers, bl);
1904 encode(tier_of, bl);
7c673cae 1905 __u8 c = cache_mode;
11fdf7f2
TL
1906 encode(c, bl);
1907 encode(read_tier, bl);
1908 encode(write_tier, bl);
1909 encode(properties, bl);
1910 encode(hit_set_params, bl);
1911 encode(hit_set_period, bl);
1912 encode(hit_set_count, bl);
1913 encode(stripe_width, bl);
1914 encode(target_max_bytes, bl);
1915 encode(target_max_objects, bl);
1916 encode(cache_target_dirty_ratio_micro, bl);
1917 encode(cache_target_full_ratio_micro, bl);
1918 encode(cache_min_flush_age, bl);
1919 encode(cache_min_evict_age, bl);
1920 encode(erasure_code_profile, bl);
7c673cae
FG
1921 ENCODE_FINISH(bl);
1922 return;
1923 }
1924
11fdf7f2 1925 uint8_t v = 29;
28e407b8
AA
1926 // NOTE: any new encoding dependencies must be reflected by
1927 // SIGNIFICANT_FEATURES
7c673cae
FG
1928 if (!(features & CEPH_FEATURE_NEW_OSDOP_ENCODING)) {
1929 // this was the first post-hammer thing we added; if it's missing, encode
1930 // like hammer.
1931 v = 21;
94b18763 1932 } else if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
7c673cae 1933 v = 24;
11fdf7f2
TL
1934 } else if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
1935 v = 26;
1936 } else if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
1937 v = 27;
7c673cae
FG
1938 }
1939
1940 ENCODE_START(v, 5, bl);
11fdf7f2
TL
1941 encode(type, bl);
1942 encode(size, bl);
1943 encode(crush_rule, bl);
1944 encode(object_hash, bl);
1945 encode(pg_num, bl);
1946 encode(pgp_num, bl);
7c673cae 1947 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
11fdf7f2
TL
1948 encode(lpg_num, bl);
1949 encode(lpgp_num, bl);
1950 encode(last_change, bl);
1951 encode(snap_seq, bl);
1952 encode(snap_epoch, bl);
1953 encode(snaps, bl, features);
1954 encode(removed_snaps, bl);
1955 encode(auid, bl);
1956 if (v >= 27) {
1957 encode(flags, bl);
1958 } else {
1959 auto tmp = flags;
1960 tmp &= ~(FLAG_SELFMANAGED_SNAPS | FLAG_POOL_SNAPS | FLAG_CREATING);
1961 encode(tmp, bl);
1962 }
1963 encode((uint32_t)0, bl); // crash_replay_interval
1964 encode(min_size, bl);
1965 encode(quota_max_bytes, bl);
1966 encode(quota_max_objects, bl);
1967 encode(tiers, bl);
1968 encode(tier_of, bl);
7c673cae 1969 __u8 c = cache_mode;
11fdf7f2
TL
1970 encode(c, bl);
1971 encode(read_tier, bl);
1972 encode(write_tier, bl);
1973 encode(properties, bl);
1974 encode(hit_set_params, bl);
1975 encode(hit_set_period, bl);
1976 encode(hit_set_count, bl);
1977 encode(stripe_width, bl);
1978 encode(target_max_bytes, bl);
1979 encode(target_max_objects, bl);
1980 encode(cache_target_dirty_ratio_micro, bl);
1981 encode(cache_target_full_ratio_micro, bl);
1982 encode(cache_min_flush_age, bl);
1983 encode(cache_min_evict_age, bl);
1984 encode(erasure_code_profile, bl);
1985 encode(last_force_op_resend_preluminous, bl);
1986 encode(min_read_recency_for_promote, bl);
1987 encode(expected_num_objects, bl);
7c673cae 1988 if (v >= 19) {
11fdf7f2 1989 encode(cache_target_dirty_high_ratio_micro, bl);
7c673cae
FG
1990 }
1991 if (v >= 20) {
11fdf7f2 1992 encode(min_write_recency_for_promote, bl);
7c673cae
FG
1993 }
1994 if (v >= 21) {
11fdf7f2 1995 encode(use_gmt_hitset, bl);
7c673cae
FG
1996 }
1997 if (v >= 22) {
11fdf7f2 1998 encode(fast_read, bl);
7c673cae
FG
1999 }
2000 if (v >= 23) {
11fdf7f2
TL
2001 encode(hit_set_grade_decay_rate, bl);
2002 encode(hit_set_search_last_n, bl);
7c673cae
FG
2003 }
2004 if (v >= 24) {
11fdf7f2 2005 encode(opts, bl, features);
7c673cae
FG
2006 }
2007 if (v >= 25) {
11fdf7f2 2008 encode(last_force_op_resend_prenautilus, bl);
7c673cae 2009 }
c07f9fc5 2010 if (v >= 26) {
11fdf7f2
TL
2011 encode(application_metadata, bl);
2012 }
2013 if (v >= 27) {
2014 encode(create_time, bl);
2015 }
2016 if (v >= 28) {
2017 encode(pg_num_target, bl);
2018 encode(pgp_num_target, bl);
2019 encode(pg_num_pending, bl);
2020 encode((epoch_t)0, bl); // pg_num_dec_last_epoch_started from 14.1.[01]
2021 encode((epoch_t)0, bl); // pg_num_dec_last_epoch_clean from 14.1.[01]
2022 encode(last_force_op_resend, bl);
2023 encode(pg_autoscale_mode, bl);
2024 }
2025 if (v >= 29) {
2026 encode(last_pg_merge_meta, bl);
c07f9fc5 2027 }
7c673cae
FG
2028 ENCODE_FINISH(bl);
2029}
2030
9f95a23c 2031void pg_pool_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae 2032{
11fdf7f2
TL
2033 DECODE_START_LEGACY_COMPAT_LEN(29, 5, 5, bl);
2034 decode(type, bl);
2035 decode(size, bl);
2036 decode(crush_rule, bl);
2037 decode(object_hash, bl);
2038 decode(pg_num, bl);
2039 decode(pgp_num, bl);
7c673cae
FG
2040 {
2041 __u32 lpg_num, lpgp_num;
11fdf7f2
TL
2042 decode(lpg_num, bl);
2043 decode(lpgp_num, bl);
7c673cae 2044 }
11fdf7f2
TL
2045 decode(last_change, bl);
2046 decode(snap_seq, bl);
2047 decode(snap_epoch, bl);
7c673cae
FG
2048
2049 if (struct_v >= 3) {
11fdf7f2
TL
2050 decode(snaps, bl);
2051 decode(removed_snaps, bl);
2052 decode(auid, bl);
7c673cae
FG
2053 } else {
2054 __u32 n, m;
11fdf7f2
TL
2055 decode(n, bl);
2056 decode(m, bl);
2057 decode(auid, bl);
2058 decode_nohead(n, snaps, bl);
2059 decode_nohead(m, removed_snaps, bl);
7c673cae
FG
2060 }
2061
2062 if (struct_v >= 4) {
11fdf7f2
TL
2063 decode(flags, bl);
2064 uint32_t crash_replay_interval;
2065 decode(crash_replay_interval, bl);
7c673cae
FG
2066 } else {
2067 flags = 0;
11fdf7f2
TL
2068 }
2069 // upgrade path for selfmanaged vs pool snaps
2070 if (snap_seq > 0 && (flags & (FLAG_SELFMANAGED_SNAPS|FLAG_POOL_SNAPS)) == 0) {
2071 if (!removed_snaps.empty()) {
2072 flags |= FLAG_SELFMANAGED_SNAPS;
2073 } else {
2074 flags |= FLAG_POOL_SNAPS;
2075 }
7c673cae
FG
2076 }
2077 if (struct_v >= 7) {
11fdf7f2 2078 decode(min_size, bl);
7c673cae
FG
2079 } else {
2080 min_size = size - size/2;
2081 }
2082 if (struct_v >= 8) {
11fdf7f2
TL
2083 decode(quota_max_bytes, bl);
2084 decode(quota_max_objects, bl);
7c673cae
FG
2085 }
2086 if (struct_v >= 9) {
11fdf7f2
TL
2087 decode(tiers, bl);
2088 decode(tier_of, bl);
7c673cae 2089 __u8 v;
11fdf7f2 2090 decode(v, bl);
7c673cae 2091 cache_mode = (cache_mode_t)v;
11fdf7f2
TL
2092 decode(read_tier, bl);
2093 decode(write_tier, bl);
7c673cae
FG
2094 }
2095 if (struct_v >= 10) {
11fdf7f2 2096 decode(properties, bl);
7c673cae
FG
2097 }
2098 if (struct_v >= 11) {
11fdf7f2
TL
2099 decode(hit_set_params, bl);
2100 decode(hit_set_period, bl);
2101 decode(hit_set_count, bl);
7c673cae
FG
2102 } else {
2103 pg_pool_t def;
2104 hit_set_period = def.hit_set_period;
2105 hit_set_count = def.hit_set_count;
2106 }
2107 if (struct_v >= 12) {
11fdf7f2 2108 decode(stripe_width, bl);
7c673cae
FG
2109 } else {
2110 set_stripe_width(0);
2111 }
2112 if (struct_v >= 13) {
11fdf7f2
TL
2113 decode(target_max_bytes, bl);
2114 decode(target_max_objects, bl);
2115 decode(cache_target_dirty_ratio_micro, bl);
2116 decode(cache_target_full_ratio_micro, bl);
2117 decode(cache_min_flush_age, bl);
2118 decode(cache_min_evict_age, bl);
7c673cae
FG
2119 } else {
2120 target_max_bytes = 0;
2121 target_max_objects = 0;
2122 cache_target_dirty_ratio_micro = 0;
2123 cache_target_full_ratio_micro = 0;
2124 cache_min_flush_age = 0;
2125 cache_min_evict_age = 0;
2126 }
2127 if (struct_v >= 14) {
11fdf7f2 2128 decode(erasure_code_profile, bl);
7c673cae
FG
2129 }
2130 if (struct_v >= 15) {
11fdf7f2 2131 decode(last_force_op_resend_preluminous, bl);
7c673cae
FG
2132 } else {
2133 last_force_op_resend_preluminous = 0;
2134 }
2135 if (struct_v >= 16) {
11fdf7f2 2136 decode(min_read_recency_for_promote, bl);
7c673cae
FG
2137 } else {
2138 min_read_recency_for_promote = 1;
2139 }
2140 if (struct_v >= 17) {
11fdf7f2 2141 decode(expected_num_objects, bl);
7c673cae
FG
2142 } else {
2143 expected_num_objects = 0;
2144 }
2145 if (struct_v >= 19) {
11fdf7f2 2146 decode(cache_target_dirty_high_ratio_micro, bl);
7c673cae
FG
2147 } else {
2148 cache_target_dirty_high_ratio_micro = cache_target_dirty_ratio_micro;
2149 }
2150 if (struct_v >= 20) {
11fdf7f2 2151 decode(min_write_recency_for_promote, bl);
7c673cae
FG
2152 } else {
2153 min_write_recency_for_promote = 1;
2154 }
2155 if (struct_v >= 21) {
11fdf7f2 2156 decode(use_gmt_hitset, bl);
7c673cae
FG
2157 } else {
2158 use_gmt_hitset = false;
2159 }
2160 if (struct_v >= 22) {
11fdf7f2 2161 decode(fast_read, bl);
7c673cae
FG
2162 } else {
2163 fast_read = false;
2164 }
2165 if (struct_v >= 23) {
11fdf7f2
TL
2166 decode(hit_set_grade_decay_rate, bl);
2167 decode(hit_set_search_last_n, bl);
7c673cae
FG
2168 } else {
2169 hit_set_grade_decay_rate = 0;
2170 hit_set_search_last_n = 1;
2171 }
2172 if (struct_v >= 24) {
11fdf7f2 2173 decode(opts, bl);
7c673cae
FG
2174 }
2175 if (struct_v >= 25) {
11fdf7f2 2176 decode(last_force_op_resend_prenautilus, bl);
7c673cae 2177 } else {
11fdf7f2 2178 last_force_op_resend_prenautilus = last_force_op_resend_preluminous;
7c673cae 2179 }
c07f9fc5 2180 if (struct_v >= 26) {
11fdf7f2
TL
2181 decode(application_metadata, bl);
2182 }
2183 if (struct_v >= 27) {
2184 decode(create_time, bl);
2185 }
2186 if (struct_v >= 28) {
2187 decode(pg_num_target, bl);
2188 decode(pgp_num_target, bl);
2189 decode(pg_num_pending, bl);
2190 epoch_t old_merge_last_epoch_clean, old_merge_last_epoch_started;
2191 decode(old_merge_last_epoch_started, bl);
2192 decode(old_merge_last_epoch_clean, bl);
2193 decode(last_force_op_resend, bl);
2194 decode(pg_autoscale_mode, bl);
2195 if (struct_v >= 29) {
2196 decode(last_pg_merge_meta, bl);
2197 } else {
2198 last_pg_merge_meta.last_epoch_clean = old_merge_last_epoch_clean;
2199 last_pg_merge_meta.last_epoch_started = old_merge_last_epoch_started;
2200 }
2201 } else {
2202 pg_num_target = pg_num;
2203 pgp_num_target = pgp_num;
2204 pg_num_pending = pg_num;
2205 last_force_op_resend = last_force_op_resend_prenautilus;
9f95a23c 2206 pg_autoscale_mode = pg_autoscale_mode_t::WARN; // default to warn on upgrade
c07f9fc5 2207 }
7c673cae
FG
2208 DECODE_FINISH(bl);
2209 calc_pg_masks();
2210 calc_grade_table();
2211}
2212
2213void pg_pool_t::generate_test_instances(list<pg_pool_t*>& o)
2214{
2215 pg_pool_t a;
2216 o.push_back(new pg_pool_t(a));
2217
11fdf7f2 2218 a.create_time = utime_t(4,5);
7c673cae
FG
2219 a.type = TYPE_REPLICATED;
2220 a.size = 2;
31f18b77 2221 a.crush_rule = 3;
7c673cae
FG
2222 a.object_hash = 4;
2223 a.pg_num = 6;
11fdf7f2
TL
2224 a.pgp_num = 4;
2225 a.pgp_num_target = 4;
2226 a.pg_num_target = 5;
2227 a.pg_num_pending = 5;
2228 a.last_pg_merge_meta.last_epoch_started = 2;
2229 a.last_pg_merge_meta.last_epoch_clean = 2;
7c673cae
FG
2230 a.last_change = 9;
2231 a.last_force_op_resend = 123823;
2232 a.last_force_op_resend_preluminous = 123824;
2233 a.snap_seq = 10;
2234 a.snap_epoch = 11;
11fdf7f2 2235 a.flags = FLAG_POOL_SNAPS;
7c673cae 2236 a.auid = 12;
7c673cae
FG
2237 a.quota_max_bytes = 473;
2238 a.quota_max_objects = 474;
2239 o.push_back(new pg_pool_t(a));
2240
2241 a.snaps[3].name = "asdf";
2242 a.snaps[3].snapid = 3;
2243 a.snaps[3].stamp = utime_t(123, 4);
2244 a.snaps[6].name = "qwer";
2245 a.snaps[6].snapid = 6;
2246 a.snaps[6].stamp = utime_t(23423, 4);
2247 o.push_back(new pg_pool_t(a));
2248
11fdf7f2
TL
2249 a.flags = FLAG_SELFMANAGED_SNAPS;
2250 a.snaps.clear();
2251 a.removed_snaps.insert(2);
7c673cae
FG
2252 a.quota_max_bytes = 2473;
2253 a.quota_max_objects = 4374;
2254 a.tiers.insert(0);
2255 a.tiers.insert(1);
2256 a.tier_of = 2;
2257 a.cache_mode = CACHEMODE_WRITEBACK;
2258 a.read_tier = 1;
2259 a.write_tier = 1;
2260 a.hit_set_params = HitSet::Params(new BloomHitSet::Params);
2261 a.hit_set_period = 3600;
2262 a.hit_set_count = 8;
2263 a.min_read_recency_for_promote = 1;
2264 a.min_write_recency_for_promote = 1;
2265 a.hit_set_grade_decay_rate = 50;
2266 a.hit_set_search_last_n = 1;
2267 a.calc_grade_table();
2268 a.set_stripe_width(12345);
2269 a.target_max_bytes = 1238132132;
2270 a.target_max_objects = 1232132;
2271 a.cache_target_dirty_ratio_micro = 187232;
2272 a.cache_target_dirty_high_ratio_micro = 309856;
2273 a.cache_target_full_ratio_micro = 987222;
2274 a.cache_min_flush_age = 231;
2275 a.cache_min_evict_age = 2321;
2276 a.erasure_code_profile = "profile in osdmap";
2277 a.expected_num_objects = 123456;
2278 a.fast_read = false;
c07f9fc5 2279 a.application_metadata = {{"rbd", {{"key", "value"}}}};
7c673cae
FG
2280 o.push_back(new pg_pool_t(a));
2281}
2282
2283ostream& operator<<(ostream& out, const pg_pool_t& p)
2284{
9f95a23c
TL
2285 out << p.get_type_name();
2286 if (p.get_type_name() == "erasure") {
2287 out << " profile " << p.erasure_code_profile;
2288 }
2289 out << " size " << p.get_size()
7c673cae 2290 << " min_size " << p.get_min_size()
31f18b77 2291 << " crush_rule " << p.get_crush_rule()
7c673cae
FG
2292 << " object_hash " << p.get_object_hash_name()
2293 << " pg_num " << p.get_pg_num()
11fdf7f2
TL
2294 << " pgp_num " << p.get_pgp_num();
2295 if (p.get_pg_num_target() != p.get_pg_num()) {
2296 out << " pg_num_target " << p.get_pg_num_target();
2297 }
2298 if (p.get_pgp_num_target() != p.get_pgp_num()) {
2299 out << " pgp_num_target " << p.get_pgp_num_target();
2300 }
2301 if (p.get_pg_num_pending() != p.get_pg_num()) {
2302 out << " pg_num_pending " << p.get_pg_num_pending();
2303 }
9f95a23c 2304 if (p.pg_autoscale_mode != pg_pool_t::pg_autoscale_mode_t::UNKNOWN) {
11fdf7f2
TL
2305 out << " autoscale_mode " << p.get_pg_autoscale_mode_name(p.pg_autoscale_mode);
2306 }
2307 out << " last_change " << p.get_last_change();
7c673cae 2308 if (p.get_last_force_op_resend() ||
11fdf7f2 2309 p.get_last_force_op_resend_prenautilus() ||
7c673cae
FG
2310 p.get_last_force_op_resend_preluminous())
2311 out << " lfor " << p.get_last_force_op_resend() << "/"
11fdf7f2 2312 << p.get_last_force_op_resend_prenautilus() << "/"
7c673cae
FG
2313 << p.get_last_force_op_resend_preluminous();
2314 if (p.get_auid())
2315 out << " owner " << p.get_auid();
2316 if (p.flags)
2317 out << " flags " << p.get_flags_string();
7c673cae
FG
2318 if (p.quota_max_bytes)
2319 out << " max_bytes " << p.quota_max_bytes;
2320 if (p.quota_max_objects)
2321 out << " max_objects " << p.quota_max_objects;
2322 if (!p.tiers.empty())
2323 out << " tiers " << p.tiers;
2324 if (p.is_tier())
2325 out << " tier_of " << p.tier_of;
2326 if (p.has_read_tier())
2327 out << " read_tier " << p.read_tier;
2328 if (p.has_write_tier())
2329 out << " write_tier " << p.write_tier;
2330 if (p.cache_mode)
2331 out << " cache_mode " << p.get_cache_mode_name();
2332 if (p.target_max_bytes)
2333 out << " target_bytes " << p.target_max_bytes;
2334 if (p.target_max_objects)
2335 out << " target_objects " << p.target_max_objects;
2336 if (p.hit_set_params.get_type() != HitSet::TYPE_NONE) {
2337 out << " hit_set " << p.hit_set_params
2338 << " " << p.hit_set_period << "s"
2339 << " x" << p.hit_set_count << " decay_rate "
2340 << p.hit_set_grade_decay_rate
2341 << " search_last_n " << p.hit_set_search_last_n;
2342 }
2343 if (p.min_read_recency_for_promote)
2344 out << " min_read_recency_for_promote " << p.min_read_recency_for_promote;
2345 if (p.min_write_recency_for_promote)
2346 out << " min_write_recency_for_promote " << p.min_write_recency_for_promote;
2347 out << " stripe_width " << p.get_stripe_width();
2348 if (p.expected_num_objects)
2349 out << " expected_num_objects " << p.expected_num_objects;
2350 if (p.fast_read)
2351 out << " fast_read " << p.fast_read;
2352 out << p.opts;
c07f9fc5
FG
2353 if (!p.application_metadata.empty()) {
2354 out << " application ";
2355 for (auto it = p.application_metadata.begin();
2356 it != p.application_metadata.end(); ++it) {
2357 if (it != p.application_metadata.begin())
2358 out << ",";
2359 out << it->first;
2360 }
2361 }
7c673cae
FG
2362 return out;
2363}
2364
2365
2366// -- object_stat_sum_t --
2367
2368void object_stat_sum_t::dump(Formatter *f) const
2369{
2370 f->dump_int("num_bytes", num_bytes);
2371 f->dump_int("num_objects", num_objects);
2372 f->dump_int("num_object_clones", num_object_clones);
2373 f->dump_int("num_object_copies", num_object_copies);
2374 f->dump_int("num_objects_missing_on_primary", num_objects_missing_on_primary);
2375 f->dump_int("num_objects_missing", num_objects_missing);
2376 f->dump_int("num_objects_degraded", num_objects_degraded);
2377 f->dump_int("num_objects_misplaced", num_objects_misplaced);
2378 f->dump_int("num_objects_unfound", num_objects_unfound);
2379 f->dump_int("num_objects_dirty", num_objects_dirty);
2380 f->dump_int("num_whiteouts", num_whiteouts);
2381 f->dump_int("num_read", num_rd);
2382 f->dump_int("num_read_kb", num_rd_kb);
2383 f->dump_int("num_write", num_wr);
2384 f->dump_int("num_write_kb", num_wr_kb);
2385 f->dump_int("num_scrub_errors", num_scrub_errors);
2386 f->dump_int("num_shallow_scrub_errors", num_shallow_scrub_errors);
2387 f->dump_int("num_deep_scrub_errors", num_deep_scrub_errors);
2388 f->dump_int("num_objects_recovered", num_objects_recovered);
2389 f->dump_int("num_bytes_recovered", num_bytes_recovered);
2390 f->dump_int("num_keys_recovered", num_keys_recovered);
2391 f->dump_int("num_objects_omap", num_objects_omap);
2392 f->dump_int("num_objects_hit_set_archive", num_objects_hit_set_archive);
2393 f->dump_int("num_bytes_hit_set_archive", num_bytes_hit_set_archive);
2394 f->dump_int("num_flush", num_flush);
2395 f->dump_int("num_flush_kb", num_flush_kb);
2396 f->dump_int("num_evict", num_evict);
2397 f->dump_int("num_evict_kb", num_evict_kb);
2398 f->dump_int("num_promote", num_promote);
2399 f->dump_int("num_flush_mode_high", num_flush_mode_high);
2400 f->dump_int("num_flush_mode_low", num_flush_mode_low);
2401 f->dump_int("num_evict_mode_some", num_evict_mode_some);
2402 f->dump_int("num_evict_mode_full", num_evict_mode_full);
2403 f->dump_int("num_objects_pinned", num_objects_pinned);
2404 f->dump_int("num_legacy_snapsets", num_legacy_snapsets);
28e407b8 2405 f->dump_int("num_large_omap_objects", num_large_omap_objects);
11fdf7f2
TL
2406 f->dump_int("num_objects_manifest", num_objects_manifest);
2407 f->dump_int("num_omap_bytes", num_omap_bytes);
2408 f->dump_int("num_omap_keys", num_omap_keys);
2409 f->dump_int("num_objects_repaired", num_objects_repaired);
7c673cae
FG
2410}
2411
9f95a23c 2412void object_stat_sum_t::encode(ceph::buffer::list& bl) const
7c673cae 2413{
11fdf7f2 2414 ENCODE_START(20, 14, bl);
7c673cae
FG
2415#if defined(CEPH_LITTLE_ENDIAN)
2416 bl.append((char *)(&num_bytes), sizeof(object_stat_sum_t));
2417#else
11fdf7f2
TL
2418 encode(num_bytes, bl);
2419 encode(num_objects, bl);
2420 encode(num_object_clones, bl);
2421 encode(num_object_copies, bl);
2422 encode(num_objects_missing_on_primary, bl);
2423 encode(num_objects_degraded, bl);
2424 encode(num_objects_unfound, bl);
2425 encode(num_rd, bl);
2426 encode(num_rd_kb, bl);
2427 encode(num_wr, bl);
2428 encode(num_wr_kb, bl);
2429 encode(num_scrub_errors, bl);
2430 encode(num_objects_recovered, bl);
2431 encode(num_bytes_recovered, bl);
2432 encode(num_keys_recovered, bl);
2433 encode(num_shallow_scrub_errors, bl);
2434 encode(num_deep_scrub_errors, bl);
2435 encode(num_objects_dirty, bl);
2436 encode(num_whiteouts, bl);
2437 encode(num_objects_omap, bl);
2438 encode(num_objects_hit_set_archive, bl);
2439 encode(num_objects_misplaced, bl);
2440 encode(num_bytes_hit_set_archive, bl);
2441 encode(num_flush, bl);
2442 encode(num_flush_kb, bl);
2443 encode(num_evict, bl);
2444 encode(num_evict_kb, bl);
2445 encode(num_promote, bl);
2446 encode(num_flush_mode_high, bl);
2447 encode(num_flush_mode_low, bl);
2448 encode(num_evict_mode_some, bl);
2449 encode(num_evict_mode_full, bl);
2450 encode(num_objects_pinned, bl);
2451 encode(num_objects_missing, bl);
2452 encode(num_legacy_snapsets, bl);
2453 encode(num_large_omap_objects, bl);
2454 encode(num_objects_manifest, bl);
2455 encode(num_omap_bytes, bl);
2456 encode(num_omap_keys, bl);
2457 encode(num_objects_repaired, bl);
7c673cae
FG
2458#endif
2459 ENCODE_FINISH(bl);
2460}
2461
9f95a23c 2462void object_stat_sum_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
2463{
2464 bool decode_finish = false;
11fdf7f2
TL
2465 static const int STAT_SUM_DECODE_VERSION = 20;
2466 DECODE_START(STAT_SUM_DECODE_VERSION, bl);
7c673cae 2467#if defined(CEPH_LITTLE_ENDIAN)
11fdf7f2 2468 if (struct_v == STAT_SUM_DECODE_VERSION) {
7c673cae
FG
2469 bl.copy(sizeof(object_stat_sum_t), (char*)(&num_bytes));
2470 decode_finish = true;
2471 }
2472#endif
2473 if (!decode_finish) {
11fdf7f2
TL
2474 decode(num_bytes, bl);
2475 decode(num_objects, bl);
2476 decode(num_object_clones, bl);
2477 decode(num_object_copies, bl);
2478 decode(num_objects_missing_on_primary, bl);
2479 decode(num_objects_degraded, bl);
2480 decode(num_objects_unfound, bl);
2481 decode(num_rd, bl);
2482 decode(num_rd_kb, bl);
2483 decode(num_wr, bl);
2484 decode(num_wr_kb, bl);
2485 decode(num_scrub_errors, bl);
2486 decode(num_objects_recovered, bl);
2487 decode(num_bytes_recovered, bl);
2488 decode(num_keys_recovered, bl);
2489 decode(num_shallow_scrub_errors, bl);
2490 decode(num_deep_scrub_errors, bl);
2491 decode(num_objects_dirty, bl);
2492 decode(num_whiteouts, bl);
2493 decode(num_objects_omap, bl);
2494 decode(num_objects_hit_set_archive, bl);
2495 decode(num_objects_misplaced, bl);
2496 decode(num_bytes_hit_set_archive, bl);
2497 decode(num_flush, bl);
2498 decode(num_flush_kb, bl);
2499 decode(num_evict, bl);
2500 decode(num_evict_kb, bl);
2501 decode(num_promote, bl);
2502 decode(num_flush_mode_high, bl);
2503 decode(num_flush_mode_low, bl);
2504 decode(num_evict_mode_some, bl);
2505 decode(num_evict_mode_full, bl);
2506 decode(num_objects_pinned, bl);
2507 decode(num_objects_missing, bl);
7c673cae 2508 if (struct_v >= 16) {
11fdf7f2 2509 decode(num_legacy_snapsets, bl);
7c673cae
FG
2510 } else {
2511 num_legacy_snapsets = num_object_clones; // upper bound
2512 }
28e407b8 2513 if (struct_v >= 17) {
11fdf7f2
TL
2514 decode(num_large_omap_objects, bl);
2515 }
2516 if (struct_v >= 18) {
2517 decode(num_objects_manifest, bl);
2518 }
2519 if (struct_v >= 19) {
2520 decode(num_omap_bytes, bl);
2521 decode(num_omap_keys, bl);
2522 }
2523 if (struct_v >= 20) {
2524 decode(num_objects_repaired, bl);
28e407b8 2525 }
7c673cae
FG
2526 }
2527 DECODE_FINISH(bl);
2528}
2529
2530void object_stat_sum_t::generate_test_instances(list<object_stat_sum_t*>& o)
2531{
2532 object_stat_sum_t a;
2533
2534 a.num_bytes = 1;
2535 a.num_objects = 3;
2536 a.num_object_clones = 4;
2537 a.num_object_copies = 5;
2538 a.num_objects_missing_on_primary = 6;
2539 a.num_objects_missing = 123;
2540 a.num_objects_degraded = 7;
2541 a.num_objects_unfound = 8;
2542 a.num_rd = 9; a.num_rd_kb = 10;
2543 a.num_wr = 11; a.num_wr_kb = 12;
2544 a.num_objects_recovered = 14;
2545 a.num_bytes_recovered = 15;
2546 a.num_keys_recovered = 16;
2547 a.num_deep_scrub_errors = 17;
2548 a.num_shallow_scrub_errors = 18;
2549 a.num_scrub_errors = a.num_deep_scrub_errors + a.num_shallow_scrub_errors;
2550 a.num_objects_dirty = 21;
2551 a.num_whiteouts = 22;
2552 a.num_objects_misplaced = 1232;
2553 a.num_objects_hit_set_archive = 2;
2554 a.num_bytes_hit_set_archive = 27;
2555 a.num_flush = 5;
2556 a.num_flush_kb = 6;
2557 a.num_evict = 7;
2558 a.num_evict_kb = 8;
2559 a.num_promote = 9;
2560 a.num_flush_mode_high = 0;
2561 a.num_flush_mode_low = 1;
2562 a.num_evict_mode_some = 1;
2563 a.num_evict_mode_full = 0;
2564 a.num_objects_pinned = 20;
28e407b8 2565 a.num_large_omap_objects = 5;
11fdf7f2
TL
2566 a.num_objects_manifest = 2;
2567 a.num_omap_bytes = 20000;
2568 a.num_omap_keys = 200;
2569 a.num_objects_repaired = 300;
7c673cae
FG
2570 o.push_back(new object_stat_sum_t(a));
2571}
2572
2573void object_stat_sum_t::add(const object_stat_sum_t& o)
2574{
2575 num_bytes += o.num_bytes;
2576 num_objects += o.num_objects;
2577 num_object_clones += o.num_object_clones;
2578 num_object_copies += o.num_object_copies;
2579 num_objects_missing_on_primary += o.num_objects_missing_on_primary;
2580 num_objects_missing += o.num_objects_missing;
2581 num_objects_degraded += o.num_objects_degraded;
2582 num_objects_misplaced += o.num_objects_misplaced;
2583 num_rd += o.num_rd;
2584 num_rd_kb += o.num_rd_kb;
2585 num_wr += o.num_wr;
2586 num_wr_kb += o.num_wr_kb;
2587 num_objects_unfound += o.num_objects_unfound;
2588 num_scrub_errors += o.num_scrub_errors;
2589 num_shallow_scrub_errors += o.num_shallow_scrub_errors;
2590 num_deep_scrub_errors += o.num_deep_scrub_errors;
2591 num_objects_recovered += o.num_objects_recovered;
2592 num_bytes_recovered += o.num_bytes_recovered;
2593 num_keys_recovered += o.num_keys_recovered;
2594 num_objects_dirty += o.num_objects_dirty;
2595 num_whiteouts += o.num_whiteouts;
2596 num_objects_omap += o.num_objects_omap;
2597 num_objects_hit_set_archive += o.num_objects_hit_set_archive;
2598 num_bytes_hit_set_archive += o.num_bytes_hit_set_archive;
2599 num_flush += o.num_flush;
2600 num_flush_kb += o.num_flush_kb;
2601 num_evict += o.num_evict;
2602 num_evict_kb += o.num_evict_kb;
2603 num_promote += o.num_promote;
2604 num_flush_mode_high += o.num_flush_mode_high;
2605 num_flush_mode_low += o.num_flush_mode_low;
2606 num_evict_mode_some += o.num_evict_mode_some;
2607 num_evict_mode_full += o.num_evict_mode_full;
2608 num_objects_pinned += o.num_objects_pinned;
2609 num_legacy_snapsets += o.num_legacy_snapsets;
28e407b8 2610 num_large_omap_objects += o.num_large_omap_objects;
11fdf7f2
TL
2611 num_objects_manifest += o.num_objects_manifest;
2612 num_omap_bytes += o.num_omap_bytes;
2613 num_omap_keys += o.num_omap_keys;
2614 num_objects_repaired += o.num_objects_repaired;
7c673cae
FG
2615}
2616
2617void object_stat_sum_t::sub(const object_stat_sum_t& o)
2618{
2619 num_bytes -= o.num_bytes;
2620 num_objects -= o.num_objects;
2621 num_object_clones -= o.num_object_clones;
2622 num_object_copies -= o.num_object_copies;
2623 num_objects_missing_on_primary -= o.num_objects_missing_on_primary;
2624 num_objects_missing -= o.num_objects_missing;
2625 num_objects_degraded -= o.num_objects_degraded;
2626 num_objects_misplaced -= o.num_objects_misplaced;
2627 num_rd -= o.num_rd;
2628 num_rd_kb -= o.num_rd_kb;
2629 num_wr -= o.num_wr;
2630 num_wr_kb -= o.num_wr_kb;
2631 num_objects_unfound -= o.num_objects_unfound;
2632 num_scrub_errors -= o.num_scrub_errors;
2633 num_shallow_scrub_errors -= o.num_shallow_scrub_errors;
2634 num_deep_scrub_errors -= o.num_deep_scrub_errors;
2635 num_objects_recovered -= o.num_objects_recovered;
2636 num_bytes_recovered -= o.num_bytes_recovered;
2637 num_keys_recovered -= o.num_keys_recovered;
2638 num_objects_dirty -= o.num_objects_dirty;
2639 num_whiteouts -= o.num_whiteouts;
2640 num_objects_omap -= o.num_objects_omap;
2641 num_objects_hit_set_archive -= o.num_objects_hit_set_archive;
2642 num_bytes_hit_set_archive -= o.num_bytes_hit_set_archive;
2643 num_flush -= o.num_flush;
2644 num_flush_kb -= o.num_flush_kb;
2645 num_evict -= o.num_evict;
2646 num_evict_kb -= o.num_evict_kb;
2647 num_promote -= o.num_promote;
2648 num_flush_mode_high -= o.num_flush_mode_high;
2649 num_flush_mode_low -= o.num_flush_mode_low;
2650 num_evict_mode_some -= o.num_evict_mode_some;
2651 num_evict_mode_full -= o.num_evict_mode_full;
2652 num_objects_pinned -= o.num_objects_pinned;
2653 num_legacy_snapsets -= o.num_legacy_snapsets;
28e407b8 2654 num_large_omap_objects -= o.num_large_omap_objects;
11fdf7f2
TL
2655 num_objects_manifest -= o.num_objects_manifest;
2656 num_omap_bytes -= o.num_omap_bytes;
2657 num_omap_keys -= o.num_omap_keys;
2658 num_objects_repaired -= o.num_objects_repaired;
7c673cae
FG
2659}
2660
2661bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r)
2662{
2663 return
2664 l.num_bytes == r.num_bytes &&
2665 l.num_objects == r.num_objects &&
2666 l.num_object_clones == r.num_object_clones &&
2667 l.num_object_copies == r.num_object_copies &&
2668 l.num_objects_missing_on_primary == r.num_objects_missing_on_primary &&
2669 l.num_objects_missing == r.num_objects_missing &&
2670 l.num_objects_degraded == r.num_objects_degraded &&
2671 l.num_objects_misplaced == r.num_objects_misplaced &&
2672 l.num_objects_unfound == r.num_objects_unfound &&
2673 l.num_rd == r.num_rd &&
2674 l.num_rd_kb == r.num_rd_kb &&
2675 l.num_wr == r.num_wr &&
2676 l.num_wr_kb == r.num_wr_kb &&
2677 l.num_scrub_errors == r.num_scrub_errors &&
2678 l.num_shallow_scrub_errors == r.num_shallow_scrub_errors &&
2679 l.num_deep_scrub_errors == r.num_deep_scrub_errors &&
2680 l.num_objects_recovered == r.num_objects_recovered &&
2681 l.num_bytes_recovered == r.num_bytes_recovered &&
2682 l.num_keys_recovered == r.num_keys_recovered &&
2683 l.num_objects_dirty == r.num_objects_dirty &&
2684 l.num_whiteouts == r.num_whiteouts &&
2685 l.num_objects_omap == r.num_objects_omap &&
2686 l.num_objects_hit_set_archive == r.num_objects_hit_set_archive &&
2687 l.num_bytes_hit_set_archive == r.num_bytes_hit_set_archive &&
2688 l.num_flush == r.num_flush &&
2689 l.num_flush_kb == r.num_flush_kb &&
2690 l.num_evict == r.num_evict &&
2691 l.num_evict_kb == r.num_evict_kb &&
2692 l.num_promote == r.num_promote &&
2693 l.num_flush_mode_high == r.num_flush_mode_high &&
2694 l.num_flush_mode_low == r.num_flush_mode_low &&
2695 l.num_evict_mode_some == r.num_evict_mode_some &&
2696 l.num_evict_mode_full == r.num_evict_mode_full &&
2697 l.num_objects_pinned == r.num_objects_pinned &&
28e407b8 2698 l.num_legacy_snapsets == r.num_legacy_snapsets &&
11fdf7f2
TL
2699 l.num_large_omap_objects == r.num_large_omap_objects &&
2700 l.num_objects_manifest == r.num_objects_manifest &&
2701 l.num_omap_bytes == r.num_omap_bytes &&
2702 l.num_omap_keys == r.num_omap_keys &&
2703 l.num_objects_repaired == r.num_objects_repaired;
7c673cae
FG
2704}
2705
2706// -- object_stat_collection_t --
2707
2708void object_stat_collection_t::dump(Formatter *f) const
2709{
2710 f->open_object_section("stat_sum");
2711 sum.dump(f);
2712 f->close_section();
2713}
2714
9f95a23c 2715void object_stat_collection_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
2716{
2717 ENCODE_START(2, 2, bl);
11fdf7f2
TL
2718 encode(sum, bl);
2719 encode((__u32)0, bl);
7c673cae
FG
2720 ENCODE_FINISH(bl);
2721}
2722
9f95a23c 2723void object_stat_collection_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
2724{
2725 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
11fdf7f2 2726 decode(sum, bl);
7c673cae
FG
2727 {
2728 map<string,object_stat_sum_t> cat_sum;
11fdf7f2 2729 decode(cat_sum, bl);
7c673cae
FG
2730 }
2731 DECODE_FINISH(bl);
2732}
2733
2734void object_stat_collection_t::generate_test_instances(list<object_stat_collection_t*>& o)
2735{
2736 object_stat_collection_t a;
2737 o.push_back(new object_stat_collection_t(a));
2738 list<object_stat_sum_t*> l;
2739 object_stat_sum_t::generate_test_instances(l);
9f95a23c 2740 for (auto p = l.begin(); p != l.end(); ++p) {
7c673cae
FG
2741 a.add(**p);
2742 o.push_back(new object_stat_collection_t(a));
2743 }
2744}
2745
2746
2747// -- pg_stat_t --
2748
2749bool pg_stat_t::is_acting_osd(int32_t osd, bool primary) const
2750{
2751 if (primary && osd == acting_primary) {
2752 return true;
2753 } else if (!primary) {
9f95a23c 2754 for(auto it = acting.cbegin(); it != acting.cend(); ++it)
7c673cae
FG
2755 {
2756 if (*it == osd)
2757 return true;
2758 }
2759 }
2760 return false;
2761}
2762
2763void pg_stat_t::dump(Formatter *f) const
2764{
2765 f->dump_stream("version") << version;
ec96510d
FG
2766 f->dump_unsigned("reported_seq", reported_seq);
2767 f->dump_unsigned("reported_epoch", reported_epoch);
7c673cae
FG
2768 f->dump_string("state", pg_state_string(state));
2769 f->dump_stream("last_fresh") << last_fresh;
2770 f->dump_stream("last_change") << last_change;
2771 f->dump_stream("last_active") << last_active;
2772 f->dump_stream("last_peered") << last_peered;
2773 f->dump_stream("last_clean") << last_clean;
2774 f->dump_stream("last_became_active") << last_became_active;
2775 f->dump_stream("last_became_peered") << last_became_peered;
2776 f->dump_stream("last_unstale") << last_unstale;
2777 f->dump_stream("last_undegraded") << last_undegraded;
2778 f->dump_stream("last_fullsized") << last_fullsized;
2779 f->dump_unsigned("mapping_epoch", mapping_epoch);
2780 f->dump_stream("log_start") << log_start;
2781 f->dump_stream("ondisk_log_start") << ondisk_log_start;
2782 f->dump_unsigned("created", created);
2783 f->dump_unsigned("last_epoch_clean", last_epoch_clean);
2784 f->dump_stream("parent") << parent;
2785 f->dump_unsigned("parent_split_bits", parent_split_bits);
2786 f->dump_stream("last_scrub") << last_scrub;
2787 f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
2788 f->dump_stream("last_deep_scrub") << last_deep_scrub;
2789 f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
2790 f->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp;
2791 f->dump_int("log_size", log_size);
2792 f->dump_int("ondisk_log_size", ondisk_log_size);
2793 f->dump_bool("stats_invalid", stats_invalid);
2794 f->dump_bool("dirty_stats_invalid", dirty_stats_invalid);
2795 f->dump_bool("omap_stats_invalid", omap_stats_invalid);
2796 f->dump_bool("hitset_stats_invalid", hitset_stats_invalid);
2797 f->dump_bool("hitset_bytes_stats_invalid", hitset_bytes_stats_invalid);
2798 f->dump_bool("pin_stats_invalid", pin_stats_invalid);
11fdf7f2 2799 f->dump_bool("manifest_stats_invalid", manifest_stats_invalid);
b32b8144 2800 f->dump_unsigned("snaptrimq_len", snaptrimq_len);
7c673cae
FG
2801 stats.dump(f);
2802 f->open_array_section("up");
9f95a23c 2803 for (auto p = up.cbegin(); p != up.cend(); ++p)
7c673cae
FG
2804 f->dump_int("osd", *p);
2805 f->close_section();
2806 f->open_array_section("acting");
9f95a23c 2807 for (auto p = acting.cbegin(); p != acting.cend(); ++p)
7c673cae
FG
2808 f->dump_int("osd", *p);
2809 f->close_section();
81eedcae
TL
2810 f->open_array_section("avail_no_missing");
2811 for (auto p = avail_no_missing.cbegin(); p != avail_no_missing.cend(); ++p)
2812 f->dump_stream("shard") << *p;
2813 f->close_section();
2814 f->open_array_section("object_location_counts");
2815 for (auto p = object_location_counts.cbegin(); p != object_location_counts.cend(); ++p) {
2816 f->open_object_section("entry");
2817 f->dump_stream("shards") << p->first;
2818 f->dump_int("objects", p->second);
2819 f->close_section();
2820 }
2821 f->close_section();
7c673cae 2822 f->open_array_section("blocked_by");
9f95a23c 2823 for (auto p = blocked_by.cbegin(); p != blocked_by.cend(); ++p)
7c673cae
FG
2824 f->dump_int("osd", *p);
2825 f->close_section();
2826 f->dump_int("up_primary", up_primary);
2827 f->dump_int("acting_primary", acting_primary);
11fdf7f2 2828 f->open_array_section("purged_snaps");
9f95a23c 2829 for (auto i = purged_snaps.begin(); i != purged_snaps.end(); ++i) {
11fdf7f2
TL
2830 f->open_object_section("interval");
2831 f->dump_stream("start") << i.get_start();
2832 f->dump_stream("length") << i.get_len();
2833 f->close_section();
2834 }
2835 f->close_section();
7c673cae
FG
2836}
2837
2838void pg_stat_t::dump_brief(Formatter *f) const
2839{
2840 f->dump_string("state", pg_state_string(state));
2841 f->open_array_section("up");
9f95a23c 2842 for (auto p = up.cbegin(); p != up.cend(); ++p)
7c673cae
FG
2843 f->dump_int("osd", *p);
2844 f->close_section();
2845 f->open_array_section("acting");
9f95a23c 2846 for (auto p = acting.cbegin(); p != acting.cend(); ++p)
7c673cae
FG
2847 f->dump_int("osd", *p);
2848 f->close_section();
2849 f->dump_int("up_primary", up_primary);
2850 f->dump_int("acting_primary", acting_primary);
2851}
2852
9f95a23c 2853void pg_stat_t::encode(ceph::buffer::list &bl) const
7c673cae 2854{
81eedcae 2855 ENCODE_START(26, 22, bl);
11fdf7f2
TL
2856 encode(version, bl);
2857 encode(reported_seq, bl);
2858 encode(reported_epoch, bl);
2859 encode((__u32)state, bl); // for older peers
2860 encode(log_start, bl);
2861 encode(ondisk_log_start, bl);
2862 encode(created, bl);
2863 encode(last_epoch_clean, bl);
2864 encode(parent, bl);
2865 encode(parent_split_bits, bl);
2866 encode(last_scrub, bl);
2867 encode(last_scrub_stamp, bl);
2868 encode(stats, bl);
2869 encode(log_size, bl);
2870 encode(ondisk_log_size, bl);
2871 encode(up, bl);
2872 encode(acting, bl);
2873 encode(last_fresh, bl);
2874 encode(last_change, bl);
2875 encode(last_active, bl);
2876 encode(last_clean, bl);
2877 encode(last_unstale, bl);
2878 encode(mapping_epoch, bl);
2879 encode(last_deep_scrub, bl);
2880 encode(last_deep_scrub_stamp, bl);
2881 encode(stats_invalid, bl);
2882 encode(last_clean_scrub_stamp, bl);
2883 encode(last_became_active, bl);
2884 encode(dirty_stats_invalid, bl);
2885 encode(up_primary, bl);
2886 encode(acting_primary, bl);
2887 encode(omap_stats_invalid, bl);
2888 encode(hitset_stats_invalid, bl);
2889 encode(blocked_by, bl);
2890 encode(last_undegraded, bl);
2891 encode(last_fullsized, bl);
2892 encode(hitset_bytes_stats_invalid, bl);
2893 encode(last_peered, bl);
2894 encode(last_became_peered, bl);
2895 encode(pin_stats_invalid, bl);
2896 encode(snaptrimq_len, bl);
2897 __u32 top_state = (state >> 32);
2898 encode(top_state, bl);
2899 encode(purged_snaps, bl);
2900 encode(manifest_stats_invalid, bl);
81eedcae
TL
2901 encode(avail_no_missing, bl);
2902 encode(object_location_counts, bl);
7c673cae
FG
2903 ENCODE_FINISH(bl);
2904}
2905
9f95a23c 2906void pg_stat_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
2907{
2908 bool tmp;
11fdf7f2 2909 uint32_t old_state;
81eedcae 2910 DECODE_START(26, bl);
11fdf7f2
TL
2911 decode(version, bl);
2912 decode(reported_seq, bl);
2913 decode(reported_epoch, bl);
2914 decode(old_state, bl);
2915 decode(log_start, bl);
2916 decode(ondisk_log_start, bl);
2917 decode(created, bl);
2918 decode(last_epoch_clean, bl);
2919 decode(parent, bl);
2920 decode(parent_split_bits, bl);
2921 decode(last_scrub, bl);
2922 decode(last_scrub_stamp, bl);
2923 decode(stats, bl);
2924 decode(log_size, bl);
2925 decode(ondisk_log_size, bl);
2926 decode(up, bl);
2927 decode(acting, bl);
2928 decode(last_fresh, bl);
2929 decode(last_change, bl);
2930 decode(last_active, bl);
2931 decode(last_clean, bl);
2932 decode(last_unstale, bl);
2933 decode(mapping_epoch, bl);
2934 decode(last_deep_scrub, bl);
2935 decode(last_deep_scrub_stamp, bl);
2936 decode(tmp, bl);
7c673cae 2937 stats_invalid = tmp;
11fdf7f2
TL
2938 decode(last_clean_scrub_stamp, bl);
2939 decode(last_became_active, bl);
2940 decode(tmp, bl);
7c673cae 2941 dirty_stats_invalid = tmp;
11fdf7f2
TL
2942 decode(up_primary, bl);
2943 decode(acting_primary, bl);
2944 decode(tmp, bl);
7c673cae 2945 omap_stats_invalid = tmp;
11fdf7f2 2946 decode(tmp, bl);
7c673cae 2947 hitset_stats_invalid = tmp;
11fdf7f2
TL
2948 decode(blocked_by, bl);
2949 decode(last_undegraded, bl);
2950 decode(last_fullsized, bl);
2951 decode(tmp, bl);
7c673cae 2952 hitset_bytes_stats_invalid = tmp;
11fdf7f2
TL
2953 decode(last_peered, bl);
2954 decode(last_became_peered, bl);
2955 decode(tmp, bl);
7c673cae 2956 pin_stats_invalid = tmp;
b32b8144 2957 if (struct_v >= 23) {
11fdf7f2
TL
2958 decode(snaptrimq_len, bl);
2959 if (struct_v >= 24) {
2960 __u32 top_state;
2961 decode(top_state, bl);
2962 state = (uint64_t)old_state | ((uint64_t)top_state << 32);
2963 decode(purged_snaps, bl);
2964 } else {
2965 state = old_state;
2966 }
2967 if (struct_v >= 25) {
2968 decode(tmp, bl);
2969 manifest_stats_invalid = tmp;
2970 } else {
2971 manifest_stats_invalid = true;
2972 }
81eedcae
TL
2973 if (struct_v >= 26) {
2974 decode(avail_no_missing, bl);
2975 decode(object_location_counts, bl);
2976 }
b32b8144 2977 }
7c673cae
FG
2978 DECODE_FINISH(bl);
2979}
2980
2981void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o)
2982{
2983 pg_stat_t a;
2984 o.push_back(new pg_stat_t(a));
2985
2986 a.version = eversion_t(1, 3);
2987 a.reported_epoch = 1;
2988 a.reported_seq = 2;
2989 a.state = 123;
2990 a.mapping_epoch = 998;
2991 a.last_fresh = utime_t(1002, 1);
2992 a.last_change = utime_t(1002, 2);
2993 a.last_active = utime_t(1002, 3);
2994 a.last_clean = utime_t(1002, 4);
2995 a.last_unstale = utime_t(1002, 5);
2996 a.last_undegraded = utime_t(1002, 7);
2997 a.last_fullsized = utime_t(1002, 8);
2998 a.log_start = eversion_t(1, 4);
2999 a.ondisk_log_start = eversion_t(1, 5);
3000 a.created = 6;
3001 a.last_epoch_clean = 7;
11fdf7f2 3002 a.parent = pg_t(1, 2);
7c673cae
FG
3003 a.parent_split_bits = 12;
3004 a.last_scrub = eversion_t(9, 10);
3005 a.last_scrub_stamp = utime_t(11, 12);
3006 a.last_deep_scrub = eversion_t(13, 14);
3007 a.last_deep_scrub_stamp = utime_t(15, 16);
3008 a.last_clean_scrub_stamp = utime_t(17, 18);
b32b8144 3009 a.snaptrimq_len = 1048576;
7c673cae
FG
3010 list<object_stat_collection_t*> l;
3011 object_stat_collection_t::generate_test_instances(l);
3012 a.stats = *l.back();
3013 a.log_size = 99;
3014 a.ondisk_log_size = 88;
3015 a.up.push_back(123);
3016 a.up_primary = 123;
3017 a.acting.push_back(456);
81eedcae
TL
3018 a.avail_no_missing.push_back(pg_shard_t(456, shard_id_t::NO_SHARD));
3019 set<pg_shard_t> sset = { pg_shard_t(0), pg_shard_t(1) };
3020 a.object_location_counts.insert(make_pair(sset, 10));
3021 sset.insert(pg_shard_t(2));
3022 a.object_location_counts.insert(make_pair(sset, 5));
7c673cae
FG
3023 a.acting_primary = 456;
3024 o.push_back(new pg_stat_t(a));
3025
3026 a.up.push_back(124);
3027 a.up_primary = 124;
3028 a.acting.push_back(124);
3029 a.acting_primary = 124;
3030 a.blocked_by.push_back(155);
3031 a.blocked_by.push_back(156);
3032 o.push_back(new pg_stat_t(a));
3033}
3034
3035bool operator==(const pg_stat_t& l, const pg_stat_t& r)
3036{
3037 return
3038 l.version == r.version &&
3039 l.reported_seq == r.reported_seq &&
3040 l.reported_epoch == r.reported_epoch &&
3041 l.state == r.state &&
3042 l.last_fresh == r.last_fresh &&
3043 l.last_change == r.last_change &&
3044 l.last_active == r.last_active &&
3045 l.last_peered == r.last_peered &&
3046 l.last_clean == r.last_clean &&
3047 l.last_unstale == r.last_unstale &&
3048 l.last_undegraded == r.last_undegraded &&
3049 l.last_fullsized == r.last_fullsized &&
3050 l.log_start == r.log_start &&
3051 l.ondisk_log_start == r.ondisk_log_start &&
3052 l.created == r.created &&
3053 l.last_epoch_clean == r.last_epoch_clean &&
3054 l.parent == r.parent &&
3055 l.parent_split_bits == r.parent_split_bits &&
3056 l.last_scrub == r.last_scrub &&
3057 l.last_deep_scrub == r.last_deep_scrub &&
3058 l.last_scrub_stamp == r.last_scrub_stamp &&
3059 l.last_deep_scrub_stamp == r.last_deep_scrub_stamp &&
3060 l.last_clean_scrub_stamp == r.last_clean_scrub_stamp &&
3061 l.stats == r.stats &&
3062 l.stats_invalid == r.stats_invalid &&
3063 l.log_size == r.log_size &&
3064 l.ondisk_log_size == r.ondisk_log_size &&
3065 l.up == r.up &&
3066 l.acting == r.acting &&
81eedcae
TL
3067 l.avail_no_missing == r.avail_no_missing &&
3068 l.object_location_counts == r.object_location_counts &&
7c673cae
FG
3069 l.mapping_epoch == r.mapping_epoch &&
3070 l.blocked_by == r.blocked_by &&
3071 l.last_became_active == r.last_became_active &&
3072 l.last_became_peered == r.last_became_peered &&
3073 l.dirty_stats_invalid == r.dirty_stats_invalid &&
3074 l.omap_stats_invalid == r.omap_stats_invalid &&
3075 l.hitset_stats_invalid == r.hitset_stats_invalid &&
3076 l.hitset_bytes_stats_invalid == r.hitset_bytes_stats_invalid &&
3077 l.up_primary == r.up_primary &&
3078 l.acting_primary == r.acting_primary &&
b32b8144 3079 l.pin_stats_invalid == r.pin_stats_invalid &&
11fdf7f2
TL
3080 l.manifest_stats_invalid == r.manifest_stats_invalid &&
3081 l.purged_snaps == r.purged_snaps &&
b32b8144 3082 l.snaptrimq_len == r.snaptrimq_len;
7c673cae
FG
3083}
3084
11fdf7f2
TL
3085// -- store_statfs_t --
3086
3087bool store_statfs_t::operator==(const store_statfs_t& other) const
3088{
3089 return total == other.total
3090 && available == other.available
3091 && allocated == other.allocated
3092 && internally_reserved == other.internally_reserved
3093 && data_stored == other.data_stored
3094 && data_compressed == other.data_compressed
3095 && data_compressed_allocated == other.data_compressed_allocated
3096 && data_compressed_original == other.data_compressed_original
3097 && omap_allocated == other.omap_allocated
3098 && internal_metadata == other.internal_metadata;
3099}
3100
3101void store_statfs_t::dump(Formatter *f) const
3102{
3103 f->dump_int("total", total);
3104 f->dump_int("available", available);
3105 f->dump_int("internally_reserved", internally_reserved);
3106 f->dump_int("allocated", allocated);
3107 f->dump_int("data_stored", data_stored);
3108 f->dump_int("data_compressed", data_compressed);
3109 f->dump_int("data_compressed_allocated", data_compressed_allocated);
3110 f->dump_int("data_compressed_original", data_compressed_original);
3111 f->dump_int("omap_allocated", omap_allocated);
3112 f->dump_int("internal_metadata", internal_metadata);
3113}
3114
3115ostream& operator<<(ostream& out, const store_statfs_t &s)
3116{
3117 out << std::hex
3118 << "store_statfs(0x" << s.available
3119 << "/0x" << s.internally_reserved
3120 << "/0x" << s.total
3121 << ", data 0x" << s.data_stored
3122 << "/0x" << s.allocated
3123 << ", compress 0x" << s.data_compressed
3124 << "/0x" << s.data_compressed_allocated
3125 << "/0x" << s.data_compressed_original
3126 << ", omap 0x" << s.omap_allocated
3127 << ", meta 0x" << s.internal_metadata
3128 << std::dec
3129 << ")";
3130 return out;
3131}
3132
3133void store_statfs_t::generate_test_instances(list<store_statfs_t*>& o)
3134{
3135 store_statfs_t a;
3136 o.push_back(new store_statfs_t(a));
3137 a.total = 234;
3138 a.available = 123;
3139 a.internally_reserved = 33;
3140 a.allocated = 32;
3141 a.data_stored = 44;
3142 a.data_compressed = 21;
3143 a.data_compressed_allocated = 12;
3144 a.data_compressed_original = 13;
3145 a.omap_allocated = 14;
3146 a.internal_metadata = 15;
3147 o.push_back(new store_statfs_t(a));
3148}
3149
7c673cae
FG
3150// -- pool_stat_t --
3151
3152void pool_stat_t::dump(Formatter *f) const
3153{
3154 stats.dump(f);
11fdf7f2
TL
3155 f->open_object_section("store_stats");
3156 store_stats.dump(f);
3157 f->close_section();
7c673cae
FG
3158 f->dump_int("log_size", log_size);
3159 f->dump_int("ondisk_log_size", ondisk_log_size);
3160 f->dump_int("up", up);
3161 f->dump_int("acting", acting);
eafe8130 3162 f->dump_int("num_store_stats", num_store_stats);
7c673cae
FG
3163}
3164
9f95a23c 3165void pool_stat_t::encode(ceph::buffer::list &bl, uint64_t features) const
7c673cae 3166{
11fdf7f2 3167 using ceph::encode;
7c673cae
FG
3168 if ((features & CEPH_FEATURE_OSDENC) == 0) {
3169 __u8 v = 4;
11fdf7f2
TL
3170 encode(v, bl);
3171 encode(stats, bl);
3172 encode(log_size, bl);
3173 encode(ondisk_log_size, bl);
7c673cae
FG
3174 return;
3175 }
3176
11fdf7f2
TL
3177 ENCODE_START(7, 5, bl);
3178 encode(stats, bl);
3179 encode(log_size, bl);
3180 encode(ondisk_log_size, bl);
3181 encode(up, bl);
3182 encode(acting, bl);
3183 encode(store_stats, bl);
3184 encode(num_store_stats, bl);
7c673cae
FG
3185 ENCODE_FINISH(bl);
3186}
3187
9f95a23c 3188void pool_stat_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 3189{
11fdf7f2 3190 DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl);
7c673cae 3191 if (struct_v >= 4) {
11fdf7f2
TL
3192 decode(stats, bl);
3193 decode(log_size, bl);
3194 decode(ondisk_log_size, bl);
7c673cae 3195 if (struct_v >= 6) {
11fdf7f2
TL
3196 decode(up, bl);
3197 decode(acting, bl);
7c673cae
FG
3198 } else {
3199 up = 0;
3200 acting = 0;
3201 }
11fdf7f2
TL
3202 if (struct_v >= 7) {
3203 decode(store_stats, bl);
3204 decode(num_store_stats, bl);
3205 } else {
3206 store_stats.reset();
3207 num_store_stats = 0;
3208 }
3209
7c673cae 3210 } else {
11fdf7f2 3211 decode(stats.sum.num_bytes, bl);
7c673cae 3212 uint64_t num_kb;
11fdf7f2
TL
3213 decode(num_kb, bl);
3214 decode(stats.sum.num_objects, bl);
3215 decode(stats.sum.num_object_clones, bl);
3216 decode(stats.sum.num_object_copies, bl);
3217 decode(stats.sum.num_objects_missing_on_primary, bl);
3218 decode(stats.sum.num_objects_degraded, bl);
3219 decode(log_size, bl);
3220 decode(ondisk_log_size, bl);
7c673cae 3221 if (struct_v >= 2) {
11fdf7f2
TL
3222 decode(stats.sum.num_rd, bl);
3223 decode(stats.sum.num_rd_kb, bl);
3224 decode(stats.sum.num_wr, bl);
3225 decode(stats.sum.num_wr_kb, bl);
7c673cae
FG
3226 }
3227 if (struct_v >= 3) {
11fdf7f2 3228 decode(stats.sum.num_objects_unfound, bl);
7c673cae
FG
3229 }
3230 }
3231 DECODE_FINISH(bl);
3232}
3233
3234void pool_stat_t::generate_test_instances(list<pool_stat_t*>& o)
3235{
3236 pool_stat_t a;
3237 o.push_back(new pool_stat_t(a));
3238
3239 list<object_stat_collection_t*> l;
3240 object_stat_collection_t::generate_test_instances(l);
11fdf7f2
TL
3241 list<store_statfs_t*> ll;
3242 store_statfs_t::generate_test_instances(ll);
7c673cae 3243 a.stats = *l.back();
11fdf7f2 3244 a.store_stats = *ll.back();
7c673cae
FG
3245 a.log_size = 123;
3246 a.ondisk_log_size = 456;
3247 a.acting = 3;
3248 a.up = 4;
11fdf7f2 3249 a.num_store_stats = 1;
7c673cae
FG
3250 o.push_back(new pool_stat_t(a));
3251}
3252
3253
3254// -- pg_history_t --
3255
9f95a23c 3256void pg_history_t::encode(ceph::buffer::list &bl) const
7c673cae 3257{
9f95a23c 3258 ENCODE_START(10, 4, bl);
11fdf7f2
TL
3259 encode(epoch_created, bl);
3260 encode(last_epoch_started, bl);
3261 encode(last_epoch_clean, bl);
3262 encode(last_epoch_split, bl);
3263 encode(same_interval_since, bl);
3264 encode(same_up_since, bl);
3265 encode(same_primary_since, bl);
3266 encode(last_scrub, bl);
3267 encode(last_scrub_stamp, bl);
3268 encode(last_deep_scrub, bl);
3269 encode(last_deep_scrub_stamp, bl);
3270 encode(last_clean_scrub_stamp, bl);
3271 encode(last_epoch_marked_full, bl);
3272 encode(last_interval_started, bl);
3273 encode(last_interval_clean, bl);
3274 encode(epoch_pool_created, bl);
9f95a23c 3275 encode(prior_readable_until_ub, bl);
7c673cae
FG
3276 ENCODE_FINISH(bl);
3277}
3278
9f95a23c 3279void pg_history_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 3280{
9f95a23c 3281 DECODE_START_LEGACY_COMPAT_LEN(10, 4, 4, bl);
11fdf7f2
TL
3282 decode(epoch_created, bl);
3283 decode(last_epoch_started, bl);
7c673cae 3284 if (struct_v >= 3)
11fdf7f2 3285 decode(last_epoch_clean, bl);
7c673cae
FG
3286 else
3287 last_epoch_clean = last_epoch_started; // careful, it's a lie!
11fdf7f2
TL
3288 decode(last_epoch_split, bl);
3289 decode(same_interval_since, bl);
3290 decode(same_up_since, bl);
3291 decode(same_primary_since, bl);
7c673cae 3292 if (struct_v >= 2) {
11fdf7f2
TL
3293 decode(last_scrub, bl);
3294 decode(last_scrub_stamp, bl);
7c673cae
FG
3295 }
3296 if (struct_v >= 5) {
11fdf7f2
TL
3297 decode(last_deep_scrub, bl);
3298 decode(last_deep_scrub_stamp, bl);
7c673cae
FG
3299 }
3300 if (struct_v >= 6) {
11fdf7f2 3301 decode(last_clean_scrub_stamp, bl);
7c673cae
FG
3302 }
3303 if (struct_v >= 7) {
11fdf7f2 3304 decode(last_epoch_marked_full, bl);
7c673cae
FG
3305 }
3306 if (struct_v >= 8) {
11fdf7f2
TL
3307 decode(last_interval_started, bl);
3308 decode(last_interval_clean, bl);
7c673cae
FG
3309 } else {
3310 if (last_epoch_started >= same_interval_since) {
3311 last_interval_started = same_interval_since;
3312 } else {
3313 last_interval_started = last_epoch_started; // best guess
3314 }
3315 if (last_epoch_clean >= same_interval_since) {
3316 last_interval_clean = same_interval_since;
3317 } else {
3318 last_interval_clean = last_epoch_clean; // best guess
3319 }
3320 }
31f18b77 3321 if (struct_v >= 9) {
11fdf7f2 3322 decode(epoch_pool_created, bl);
31f18b77
FG
3323 } else {
3324 epoch_pool_created = epoch_created;
3325 }
9f95a23c
TL
3326 if (struct_v >= 10) {
3327 decode(prior_readable_until_ub, bl);
3328 }
7c673cae
FG
3329 DECODE_FINISH(bl);
3330}
3331
3332void pg_history_t::dump(Formatter *f) const
3333{
3334 f->dump_int("epoch_created", epoch_created);
31f18b77 3335 f->dump_int("epoch_pool_created", epoch_pool_created);
7c673cae
FG
3336 f->dump_int("last_epoch_started", last_epoch_started);
3337 f->dump_int("last_interval_started", last_interval_started);
3338 f->dump_int("last_epoch_clean", last_epoch_clean);
3339 f->dump_int("last_interval_clean", last_interval_clean);
3340 f->dump_int("last_epoch_split", last_epoch_split);
3341 f->dump_int("last_epoch_marked_full", last_epoch_marked_full);
3342 f->dump_int("same_up_since", same_up_since);
3343 f->dump_int("same_interval_since", same_interval_since);
3344 f->dump_int("same_primary_since", same_primary_since);
3345 f->dump_stream("last_scrub") << last_scrub;
3346 f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
3347 f->dump_stream("last_deep_scrub") << last_deep_scrub;
3348 f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
3349 f->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp;
9f95a23c
TL
3350 f->dump_float(
3351 "prior_readable_until_ub",
3352 std::chrono::duration<double>(prior_readable_until_ub).count());
7c673cae
FG
3353}
3354
3355void pg_history_t::generate_test_instances(list<pg_history_t*>& o)
3356{
3357 o.push_back(new pg_history_t);
3358 o.push_back(new pg_history_t);
3359 o.back()->epoch_created = 1;
31f18b77 3360 o.back()->epoch_pool_created = 1;
7c673cae
FG
3361 o.back()->last_epoch_started = 2;
3362 o.back()->last_interval_started = 2;
3363 o.back()->last_epoch_clean = 3;
3364 o.back()->last_interval_clean = 2;
3365 o.back()->last_epoch_split = 4;
9f95a23c 3366 o.back()->prior_readable_until_ub = make_timespan(3.1415);
7c673cae
FG
3367 o.back()->same_up_since = 5;
3368 o.back()->same_interval_since = 6;
3369 o.back()->same_primary_since = 7;
3370 o.back()->last_scrub = eversion_t(8, 9);
3371 o.back()->last_scrub_stamp = utime_t(10, 11);
3372 o.back()->last_deep_scrub = eversion_t(12, 13);
3373 o.back()->last_deep_scrub_stamp = utime_t(14, 15);
3374 o.back()->last_clean_scrub_stamp = utime_t(16, 17);
3375 o.back()->last_epoch_marked_full = 18;
3376}
3377
3378
3379// -- pg_info_t --
3380
9f95a23c 3381void pg_info_t::encode(ceph::buffer::list &bl) const
7c673cae
FG
3382{
3383 ENCODE_START(32, 26, bl);
11fdf7f2
TL
3384 encode(pgid.pgid, bl);
3385 encode(last_update, bl);
3386 encode(last_complete, bl);
3387 encode(log_tail, bl);
9f95a23c 3388 encode(hobject_t(), bl); // old (nibblewise) last_backfill
11fdf7f2 3389 encode(stats, bl);
7c673cae 3390 history.encode(bl);
11fdf7f2
TL
3391 encode(purged_snaps, bl);
3392 encode(last_epoch_started, bl);
3393 encode(last_user_version, bl);
3394 encode(hit_set, bl);
3395 encode(pgid.shard, bl);
3396 encode(last_backfill, bl);
9f95a23c 3397 encode(true, bl); // was last_backfill_bitwise
11fdf7f2 3398 encode(last_interval_started, bl);
7c673cae
FG
3399 ENCODE_FINISH(bl);
3400}
3401
9f95a23c 3402void pg_info_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
3403{
3404 DECODE_START(32, bl);
11fdf7f2
TL
3405 decode(pgid.pgid, bl);
3406 decode(last_update, bl);
3407 decode(last_complete, bl);
3408 decode(log_tail, bl);
7c673cae
FG
3409 {
3410 hobject_t old_last_backfill;
11fdf7f2 3411 decode(old_last_backfill, bl);
7c673cae 3412 }
11fdf7f2 3413 decode(stats, bl);
7c673cae 3414 history.decode(bl);
11fdf7f2
TL
3415 decode(purged_snaps, bl);
3416 decode(last_epoch_started, bl);
3417 decode(last_user_version, bl);
3418 decode(hit_set, bl);
3419 decode(pgid.shard, bl);
3420 decode(last_backfill, bl);
9f95a23c
TL
3421 {
3422 bool last_backfill_bitwise;
3423 decode(last_backfill_bitwise, bl);
3424 // note: we may see a false value here since the default value for
3425 // the member was false, so it often didn't get set to true until
3426 // peering progressed.
3427 }
7c673cae 3428 if (struct_v >= 32) {
11fdf7f2 3429 decode(last_interval_started, bl);
7c673cae
FG
3430 } else {
3431 last_interval_started = last_epoch_started;
3432 }
3433 DECODE_FINISH(bl);
3434}
3435
3436// -- pg_info_t --
3437
3438void pg_info_t::dump(Formatter *f) const
3439{
3440 f->dump_stream("pgid") << pgid;
3441 f->dump_stream("last_update") << last_update;
3442 f->dump_stream("last_complete") << last_complete;
3443 f->dump_stream("log_tail") << log_tail;
3444 f->dump_int("last_user_version", last_user_version);
3445 f->dump_stream("last_backfill") << last_backfill;
7c673cae
FG
3446 f->open_array_section("purged_snaps");
3447 for (interval_set<snapid_t>::const_iterator i=purged_snaps.begin();
3448 i != purged_snaps.end();
3449 ++i) {
3450 f->open_object_section("purged_snap_interval");
3451 f->dump_stream("start") << i.get_start();
3452 f->dump_stream("length") << i.get_len();
3453 f->close_section();
3454 }
3455 f->close_section();
3456 f->open_object_section("history");
3457 history.dump(f);
3458 f->close_section();
3459 f->open_object_section("stats");
3460 stats.dump(f);
3461 f->close_section();
3462
3463 f->dump_int("empty", is_empty());
3464 f->dump_int("dne", dne());
3465 f->dump_int("incomplete", is_incomplete());
3466 f->dump_int("last_epoch_started", last_epoch_started);
3467
3468 f->open_object_section("hit_set_history");
3469 hit_set.dump(f);
3470 f->close_section();
3471}
3472
3473void pg_info_t::generate_test_instances(list<pg_info_t*>& o)
3474{
3475 o.push_back(new pg_info_t);
3476 o.push_back(new pg_info_t);
3477 list<pg_history_t*> h;
3478 pg_history_t::generate_test_instances(h);
3479 o.back()->history = *h.back();
11fdf7f2 3480 o.back()->pgid = spg_t(pg_t(1, 2), shard_id_t::NO_SHARD);
7c673cae
FG
3481 o.back()->last_update = eversion_t(3, 4);
3482 o.back()->last_complete = eversion_t(5, 6);
3483 o.back()->last_user_version = 2;
3484 o.back()->log_tail = eversion_t(7, 8);
3485 o.back()->last_backfill = hobject_t(object_t("objname"), "key", 123, 456, -1, "");
7c673cae
FG
3486 {
3487 list<pg_stat_t*> s;
3488 pg_stat_t::generate_test_instances(s);
3489 o.back()->stats = *s.back();
3490 }
3491 {
3492 list<pg_hit_set_history_t*> s;
3493 pg_hit_set_history_t::generate_test_instances(s);
3494 o.back()->hit_set = *s.back();
3495 }
3496}
3497
3498// -- pg_notify_t --
9f95a23c 3499void pg_notify_t::encode(ceph::buffer::list &bl) const
7c673cae 3500{
9f95a23c 3501 ENCODE_START(3, 2, bl);
11fdf7f2
TL
3502 encode(query_epoch, bl);
3503 encode(epoch_sent, bl);
3504 encode(info, bl);
3505 encode(to, bl);
3506 encode(from, bl);
9f95a23c 3507 encode(past_intervals, bl);
7c673cae
FG
3508 ENCODE_FINISH(bl);
3509}
3510
9f95a23c 3511void pg_notify_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 3512{
9f95a23c 3513 DECODE_START(3, bl);
11fdf7f2
TL
3514 decode(query_epoch, bl);
3515 decode(epoch_sent, bl);
3516 decode(info, bl);
3517 decode(to, bl);
3518 decode(from, bl);
9f95a23c
TL
3519 if (struct_v >= 3) {
3520 decode(past_intervals, bl);
3521 }
7c673cae
FG
3522 DECODE_FINISH(bl);
3523}
3524
3525void pg_notify_t::dump(Formatter *f) const
3526{
3527 f->dump_int("from", from);
3528 f->dump_int("to", to);
3529 f->dump_unsigned("query_epoch", query_epoch);
3530 f->dump_unsigned("epoch_sent", epoch_sent);
3531 {
3532 f->open_object_section("info");
3533 info.dump(f);
3534 f->close_section();
3535 }
9f95a23c 3536 f->dump_object("past_intervals", past_intervals);
7c673cae
FG
3537}
3538
3539void pg_notify_t::generate_test_instances(list<pg_notify_t*>& o)
3540{
9f95a23c
TL
3541 o.push_back(new pg_notify_t(shard_id_t(3), shard_id_t::NO_SHARD, 1, 1,
3542 pg_info_t(), PastIntervals()));
3543 o.push_back(new pg_notify_t(shard_id_t(0), shard_id_t(0), 3, 10,
3544 pg_info_t(), PastIntervals()));
7c673cae
FG
3545}
3546
3547ostream &operator<<(ostream &lhs, const pg_notify_t &notify)
3548{
3549 lhs << "(query:" << notify.query_epoch
3550 << " sent:" << notify.epoch_sent
3551 << " " << notify.info;
3552 if (notify.from != shard_id_t::NO_SHARD ||
3553 notify.to != shard_id_t::NO_SHARD)
3554 lhs << " " << (unsigned)notify.from
3555 << "->" << (unsigned)notify.to;
9f95a23c 3556 lhs << " " << notify.past_intervals;
7c673cae
FG
3557 return lhs << ")";
3558}
3559
3560// -- pg_interval_t --
3561
9f95a23c 3562void PastIntervals::pg_interval_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
3563{
3564 ENCODE_START(4, 2, bl);
11fdf7f2
TL
3565 encode(first, bl);
3566 encode(last, bl);
3567 encode(up, bl);
3568 encode(acting, bl);
3569 encode(maybe_went_rw, bl);
3570 encode(primary, bl);
3571 encode(up_primary, bl);
7c673cae
FG
3572 ENCODE_FINISH(bl);
3573}
3574
9f95a23c 3575void PastIntervals::pg_interval_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
3576{
3577 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
11fdf7f2
TL
3578 decode(first, bl);
3579 decode(last, bl);
3580 decode(up, bl);
3581 decode(acting, bl);
3582 decode(maybe_went_rw, bl);
7c673cae 3583 if (struct_v >= 3) {
11fdf7f2 3584 decode(primary, bl);
7c673cae
FG
3585 } else {
3586 if (acting.size())
3587 primary = acting[0];
3588 }
3589 if (struct_v >= 4) {
11fdf7f2 3590 decode(up_primary, bl);
7c673cae
FG
3591 } else {
3592 if (up.size())
3593 up_primary = up[0];
3594 }
3595 DECODE_FINISH(bl);
3596}
3597
3598void PastIntervals::pg_interval_t::dump(Formatter *f) const
3599{
3600 f->dump_unsigned("first", first);
3601 f->dump_unsigned("last", last);
3602 f->dump_int("maybe_went_rw", maybe_went_rw ? 1 : 0);
3603 f->open_array_section("up");
9f95a23c 3604 for (auto p = up.cbegin(); p != up.cend(); ++p)
7c673cae
FG
3605 f->dump_int("osd", *p);
3606 f->close_section();
3607 f->open_array_section("acting");
9f95a23c 3608 for (auto p = acting.cbegin(); p != acting.cend(); ++p)
7c673cae
FG
3609 f->dump_int("osd", *p);
3610 f->close_section();
3611 f->dump_int("primary", primary);
3612 f->dump_int("up_primary", up_primary);
3613}
3614
3615void PastIntervals::pg_interval_t::generate_test_instances(list<pg_interval_t*>& o)
3616{
3617 o.push_back(new pg_interval_t);
3618 o.push_back(new pg_interval_t);
3619 o.back()->up.push_back(1);
3620 o.back()->acting.push_back(2);
3621 o.back()->acting.push_back(3);
3622 o.back()->first = 4;
3623 o.back()->last = 5;
3624 o.back()->maybe_went_rw = true;
3625}
3626
3627WRITE_CLASS_ENCODER(PastIntervals::pg_interval_t)
3628
7c673cae
FG
3629
3630/**
3631 * pi_compact_rep
3632 *
3633 * PastIntervals only needs to be able to answer two questions:
3634 * 1) Where should the primary look for unfound objects?
3635 * 2) List a set of subsets of the OSDs such that contacting at least
11fdf7f2 3636 * one from each subset guarantees we speak to at least one witness
7c673cae
FG
3637 * of any completed write.
3638 *
3639 * Crucially, 2) does not require keeping *all* past intervals. Certainly,
3640 * we don't need to keep any where maybe_went_rw would be false. We also
3641 * needn't keep two intervals where the actingset in one is a subset
3642 * of the other (only need to keep the smaller of the two sets). In order
3643 * to accurately trim the set of intervals as last_epoch_started changes
3644 * without rebuilding the set from scratch, we'll retain the larger set
3645 * if it in an older interval.
3646 */
3647struct compact_interval_t {
3648 epoch_t first;
3649 epoch_t last;
3650 set<pg_shard_t> acting;
3651 bool supersedes(const compact_interval_t &other) {
3652 for (auto &&i: acting) {
3653 if (!other.acting.count(i))
3654 return false;
3655 }
3656 return true;
3657 }
3658 void dump(Formatter *f) const {
3659 f->open_object_section("compact_interval_t");
3660 f->dump_stream("first") << first;
3661 f->dump_stream("last") << last;
3662 f->dump_stream("acting") << acting;
3663 f->close_section();
3664 }
9f95a23c 3665 void encode(ceph::buffer::list &bl) const {
7c673cae 3666 ENCODE_START(1, 1, bl);
11fdf7f2
TL
3667 encode(first, bl);
3668 encode(last, bl);
3669 encode(acting, bl);
7c673cae
FG
3670 ENCODE_FINISH(bl);
3671 }
9f95a23c 3672 void decode(ceph::buffer::list::const_iterator &bl) {
7c673cae 3673 DECODE_START(1, bl);
11fdf7f2
TL
3674 decode(first, bl);
3675 decode(last, bl);
3676 decode(acting, bl);
7c673cae
FG
3677 DECODE_FINISH(bl);
3678 }
3679 static void generate_test_instances(list<compact_interval_t*> & o) {
3680 /* Not going to be used, we'll generate pi_compact_rep directly */
3681 }
3682};
3683ostream &operator<<(ostream &o, const compact_interval_t &rhs)
3684{
3685 return o << "([" << rhs.first << "," << rhs.last
3686 << "] acting " << rhs.acting << ")";
3687}
3688WRITE_CLASS_ENCODER(compact_interval_t)
3689
3690class pi_compact_rep : public PastIntervals::interval_rep {
3691 epoch_t first = 0;
3692 epoch_t last = 0; // inclusive
3693 set<pg_shard_t> all_participants;
3694 list<compact_interval_t> intervals;
3695 pi_compact_rep(
3696 bool ec_pool,
3697 std::list<PastIntervals::pg_interval_t> &&intervals) {
3698 for (auto &&i: intervals)
3699 add_interval(ec_pool, i);
3700 }
3701public:
3702 pi_compact_rep() = default;
3703 pi_compact_rep(const pi_compact_rep &) = default;
3704 pi_compact_rep(pi_compact_rep &&) = default;
3705 pi_compact_rep &operator=(const pi_compact_rep &) = default;
3706 pi_compact_rep &operator=(pi_compact_rep &&) = default;
3707
3708 size_t size() const override { return intervals.size(); }
3709 bool empty() const override {
3710 return first > last || (first == 0 && last == 0);
3711 }
3712 void clear() override {
3713 *this = pi_compact_rep();
3714 }
3715 pair<epoch_t, epoch_t> get_bounds() const override {
3716 return make_pair(first, last + 1);
3717 }
11fdf7f2
TL
3718 void adjust_start_backwards(epoch_t last_epoch_clean) {
3719 first = last_epoch_clean;
3720 }
3721
7c673cae
FG
3722 set<pg_shard_t> get_all_participants(
3723 bool ec_pool) const override {
3724 return all_participants;
3725 }
3726 void add_interval(
3727 bool ec_pool, const PastIntervals::pg_interval_t &interval) override {
3728 if (first == 0)
3729 first = interval.first;
11fdf7f2 3730 ceph_assert(interval.last > last);
7c673cae
FG
3731 last = interval.last;
3732 set<pg_shard_t> acting;
3733 for (unsigned i = 0; i < interval.acting.size(); ++i) {
3734 if (interval.acting[i] == CRUSH_ITEM_NONE)
3735 continue;
3736 acting.insert(
3737 pg_shard_t(
3738 interval.acting[i],
3739 ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD));
3740 }
3741 all_participants.insert(acting.begin(), acting.end());
3742 if (!interval.maybe_went_rw)
3743 return;
3744 intervals.push_back(
3745 compact_interval_t{interval.first, interval.last, acting});
3746 auto plast = intervals.end();
3747 --plast;
3748 for (auto cur = intervals.begin(); cur != plast; ) {
3749 if (plast->supersedes(*cur)) {
3750 intervals.erase(cur++);
3751 } else {
3752 ++cur;
3753 }
3754 }
3755 }
3756 unique_ptr<PastIntervals::interval_rep> clone() const override {
3757 return unique_ptr<PastIntervals::interval_rep>(new pi_compact_rep(*this));
3758 }
3759 ostream &print(ostream &out) const override {
3760 return out << "([" << first << "," << last
9f95a23c
TL
3761 << "] all_participants=" << all_participants
3762 << " intervals=" << intervals << ")";
7c673cae 3763 }
9f95a23c 3764 void encode(ceph::buffer::list &bl) const override {
7c673cae 3765 ENCODE_START(1, 1, bl);
11fdf7f2
TL
3766 encode(first, bl);
3767 encode(last, bl);
3768 encode(all_participants, bl);
3769 encode(intervals, bl);
7c673cae
FG
3770 ENCODE_FINISH(bl);
3771 }
9f95a23c 3772 void decode(ceph::buffer::list::const_iterator &bl) override {
7c673cae 3773 DECODE_START(1, bl);
11fdf7f2
TL
3774 decode(first, bl);
3775 decode(last, bl);
3776 decode(all_participants, bl);
3777 decode(intervals, bl);
7c673cae
FG
3778 DECODE_FINISH(bl);
3779 }
3780 void dump(Formatter *f) const override {
3781 f->open_object_section("PastIntervals::compact_rep");
3782 f->dump_stream("first") << first;
3783 f->dump_stream("last") << last;
3784 f->open_array_section("all_participants");
3785 for (auto& i : all_participants) {
3786 f->dump_object("pg_shard", i);
3787 }
3788 f->close_section();
3789 f->open_array_section("intervals");
3790 for (auto &&i: intervals) {
3791 i.dump(f);
3792 }
3793 f->close_section();
3794 f->close_section();
3795 }
7c673cae
FG
3796 static void generate_test_instances(list<pi_compact_rep*> &o) {
3797 using ival = PastIntervals::pg_interval_t;
3798 using ivallst = std::list<ival>;
3799 o.push_back(
3800 new pi_compact_rep(
3801 true, ivallst
3802 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3803 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3804 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3805 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3806 }));
3807 o.push_back(
3808 new pi_compact_rep(
3809 false, ivallst
3810 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3811 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3812 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3813 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3814 }));
3815 o.push_back(
3816 new pi_compact_rep(
3817 true, ivallst
3818 { ival{{2, 1, 0}, {2, 1, 0}, 10, 20, true, 1, 1}
3819 , ival{{ 0, 2}, { 0, 2}, 21, 30, true, 0, 0}
3820 , ival{{ 0, 2}, {2, 0}, 31, 35, true, 2, 2}
3821 , ival{{ 0, 2}, { 0, 2}, 36, 50, true, 0, 0}
3822 }));
3823 }
3824 void iterate_mayberw_back_to(
7c673cae
FG
3825 epoch_t les,
3826 std::function<void(epoch_t, const set<pg_shard_t> &)> &&f) const override {
3827 for (auto i = intervals.rbegin(); i != intervals.rend(); ++i) {
3828 if (i->last < les)
3829 break;
3830 f(i->first, i->acting);
3831 }
3832 }
3833 virtual ~pi_compact_rep() override {}
3834};
3835WRITE_CLASS_ENCODER(pi_compact_rep)
3836
11fdf7f2
TL
3837PastIntervals::PastIntervals()
3838{
3839 past_intervals.reset(new pi_compact_rep);
3840}
3841
7c673cae
FG
3842PastIntervals::PastIntervals(const PastIntervals &rhs)
3843 : past_intervals(rhs.past_intervals ?
3844 rhs.past_intervals->clone() :
3845 nullptr) {}
3846
3847PastIntervals &PastIntervals::operator=(const PastIntervals &rhs)
3848{
3849 PastIntervals other(rhs);
31f18b77 3850 swap(other);
7c673cae
FG
3851 return *this;
3852}
3853
3854ostream& operator<<(ostream& out, const PastIntervals &i)
3855{
3856 if (i.past_intervals) {
3857 return i.past_intervals->print(out);
3858 } else {
3859 return out << "(empty)";
3860 }
3861}
3862
3863ostream& operator<<(ostream& out, const PastIntervals::PriorSet &i)
3864{
3865 return out << "PriorSet("
3866 << "ec_pool: " << i.ec_pool
3867 << ", probe: " << i.probe
3868 << ", down: " << i.down
3869 << ", blocked_by: " << i.blocked_by
3870 << ", pg_down: " << i.pg_down
3871 << ")";
3872}
3873
9f95a23c 3874void PastIntervals::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
3875{
3876 DECODE_START(1, bl);
3877 __u8 type = 0;
11fdf7f2 3878 decode(type, bl);
7c673cae
FG
3879 switch (type) {
3880 case 0:
3881 break;
3882 case 1:
11fdf7f2 3883 ceph_abort_msg("pi_simple_rep support removed post-luminous");
7c673cae
FG
3884 break;
3885 case 2:
3886 past_intervals.reset(new pi_compact_rep);
3887 past_intervals->decode(bl);
3888 break;
3889 }
3890 DECODE_FINISH(bl);
3891}
3892
7c673cae
FG
3893void PastIntervals::generate_test_instances(list<PastIntervals*> &o)
3894{
7c673cae
FG
3895 {
3896 list<pi_compact_rep *> compact;
3897 pi_compact_rep::generate_test_instances(compact);
3898 for (auto &&i: compact) {
3899 // takes ownership of contents
3900 o.push_back(new PastIntervals(i));
3901 }
3902 }
3903 return;
3904}
3905
7c673cae
FG
3906bool PastIntervals::is_new_interval(
3907 int old_acting_primary,
3908 int new_acting_primary,
3909 const vector<int> &old_acting,
3910 const vector<int> &new_acting,
3911 int old_up_primary,
3912 int new_up_primary,
3913 const vector<int> &old_up,
3914 const vector<int> &new_up,
3915 int old_size,
3916 int new_size,
3917 int old_min_size,
3918 int new_min_size,
3919 unsigned old_pg_num,
3920 unsigned new_pg_num,
11fdf7f2
TL
3921 unsigned old_pg_num_pending,
3922 unsigned new_pg_num_pending,
7c673cae
FG
3923 bool old_sort_bitwise,
3924 bool new_sort_bitwise,
c07f9fc5
FG
3925 bool old_recovery_deletes,
3926 bool new_recovery_deletes,
7c673cae
FG
3927 pg_t pgid) {
3928 return old_acting_primary != new_acting_primary ||
3929 new_acting != old_acting ||
3930 old_up_primary != new_up_primary ||
3931 new_up != old_up ||
3932 old_min_size != new_min_size ||
3933 old_size != new_size ||
3934 pgid.is_split(old_pg_num, new_pg_num, 0) ||
11fdf7f2
TL
3935 // (is or was) pre-merge source
3936 pgid.is_merge_source(old_pg_num_pending, new_pg_num_pending, 0) ||
3937 pgid.is_merge_source(new_pg_num_pending, old_pg_num_pending, 0) ||
3938 // merge source
3939 pgid.is_merge_source(old_pg_num, new_pg_num, 0) ||
3940 // (is or was) pre-merge target
3941 pgid.is_merge_target(old_pg_num_pending, new_pg_num_pending) ||
3942 pgid.is_merge_target(new_pg_num_pending, old_pg_num_pending) ||
3943 // merge target
3944 pgid.is_merge_target(old_pg_num, new_pg_num) ||
c07f9fc5
FG
3945 old_sort_bitwise != new_sort_bitwise ||
3946 old_recovery_deletes != new_recovery_deletes;
7c673cae
FG
3947}
3948
3949bool PastIntervals::is_new_interval(
3950 int old_acting_primary,
3951 int new_acting_primary,
3952 const vector<int> &old_acting,
3953 const vector<int> &new_acting,
3954 int old_up_primary,
3955 int new_up_primary,
3956 const vector<int> &old_up,
3957 const vector<int> &new_up,
9f95a23c
TL
3958 const OSDMap *osdmap,
3959 const OSDMap *lastmap,
11fdf7f2
TL
3960 pg_t pgid)
3961{
3962 const pg_pool_t *plast = lastmap->get_pg_pool(pgid.pool());
3963 if (!plast) {
3964 return false; // after pool is deleted there are no more interval changes
3965 }
3966 const pg_pool_t *pi = osdmap->get_pg_pool(pgid.pool());
3967 if (!pi) {
3968 return true; // pool was deleted this epoch -> (final!) interval change
3969 }
3970 return
7c673cae
FG
3971 is_new_interval(old_acting_primary,
3972 new_acting_primary,
3973 old_acting,
3974 new_acting,
3975 old_up_primary,
3976 new_up_primary,
3977 old_up,
3978 new_up,
11fdf7f2
TL
3979 plast->size,
3980 pi->size,
3981 plast->min_size,
3982 pi->min_size,
3983 plast->get_pg_num(),
3984 pi->get_pg_num(),
3985 plast->get_pg_num_pending(),
3986 pi->get_pg_num_pending(),
7c673cae
FG
3987 lastmap->test_flag(CEPH_OSDMAP_SORTBITWISE),
3988 osdmap->test_flag(CEPH_OSDMAP_SORTBITWISE),
c07f9fc5
FG
3989 lastmap->test_flag(CEPH_OSDMAP_RECOVERY_DELETES),
3990 osdmap->test_flag(CEPH_OSDMAP_RECOVERY_DELETES),
7c673cae
FG
3991 pgid);
3992}
3993
3994bool PastIntervals::check_new_interval(
3995 int old_acting_primary,
3996 int new_acting_primary,
3997 const vector<int> &old_acting,
3998 const vector<int> &new_acting,
3999 int old_up_primary,
4000 int new_up_primary,
4001 const vector<int> &old_up,
4002 const vector<int> &new_up,
4003 epoch_t same_interval_since,
4004 epoch_t last_epoch_clean,
9f95a23c
TL
4005 const OSDMap *osdmap,
4006 const OSDMap *lastmap,
7c673cae 4007 pg_t pgid,
9f95a23c 4008 const IsPGRecoverablePredicate &could_have_gone_active,
7c673cae
FG
4009 PastIntervals *past_intervals,
4010 std::ostream *out)
4011{
4012 /*
4013 * We have to be careful to gracefully deal with situations like
4014 * so. Say we have a power outage or something that takes out both
4015 * OSDs, but the monitor doesn't mark them down in the same epoch.
4016 * The history may look like
4017 *
4018 * 1: A B
4019 * 2: B
4020 * 3: let's say B dies for good, too (say, from the power spike)
4021 * 4: A
4022 *
4023 * which makes it look like B may have applied updates to the PG
4024 * that we need in order to proceed. This sucks...
4025 *
4026 * To minimize the risk of this happening, we CANNOT go active if
4027 * _any_ OSDs in the prior set are down until we send an MOSDAlive
4028 * to the monitor such that the OSDMap sets osd_up_thru to an epoch.
4029 * Then, we have something like
4030 *
4031 * 1: A B
4032 * 2: B up_thru[B]=0
4033 * 3:
4034 * 4: A
4035 *
4036 * -> we can ignore B, bc it couldn't have gone active (up_thru still 0).
4037 *
4038 * or,
4039 *
4040 * 1: A B
4041 * 2: B up_thru[B]=0
4042 * 3: B up_thru[B]=2
4043 * 4:
4044 * 5: A
4045 *
4046 * -> we must wait for B, bc it was alive through 2, and could have
4047 * written to the pg.
4048 *
4049 * If B is really dead, then an administrator will need to manually
4050 * intervene by marking the OSD as "lost."
4051 */
4052
4053 // remember past interval
4054 // NOTE: a change in the up set primary triggers an interval
4055 // change, even though the interval members in the pg_interval_t
4056 // do not change.
11fdf7f2
TL
4057 ceph_assert(past_intervals);
4058 ceph_assert(past_intervals->past_intervals);
7c673cae
FG
4059 if (is_new_interval(
4060 old_acting_primary,
4061 new_acting_primary,
4062 old_acting,
4063 new_acting,
4064 old_up_primary,
4065 new_up_primary,
4066 old_up,
4067 new_up,
4068 osdmap,
4069 lastmap,
4070 pgid)) {
4071 pg_interval_t i;
4072 i.first = same_interval_since;
4073 i.last = osdmap->get_epoch() - 1;
11fdf7f2 4074 ceph_assert(i.first <= i.last);
7c673cae
FG
4075 i.acting = old_acting;
4076 i.up = old_up;
4077 i.primary = old_acting_primary;
4078 i.up_primary = old_up_primary;
4079
4080 unsigned num_acting = 0;
9f95a23c 4081 for (auto p = i.acting.cbegin(); p != i.acting.cend(); ++p)
7c673cae
FG
4082 if (*p != CRUSH_ITEM_NONE)
4083 ++num_acting;
4084
11fdf7f2 4085 ceph_assert(lastmap->get_pools().count(pgid.pool()));
7c673cae
FG
4086 const pg_pool_t& old_pg_pool = lastmap->get_pools().find(pgid.pool())->second;
4087 set<pg_shard_t> old_acting_shards;
4088 old_pg_pool.convert_to_pg_shards(old_acting, &old_acting_shards);
4089
4090 if (num_acting &&
4091 i.primary != -1 &&
4092 num_acting >= old_pg_pool.min_size &&
9f95a23c 4093 could_have_gone_active(old_acting_shards)) {
7c673cae
FG
4094 if (out)
4095 *out << __func__ << " " << i
7c673cae
FG
4096 << " up_thru " << lastmap->get_up_thru(i.primary)
4097 << " up_from " << lastmap->get_up_from(i.primary)
11fdf7f2 4098 << " last_epoch_clean " << last_epoch_clean;
7c673cae
FG
4099 if (lastmap->get_up_thru(i.primary) >= i.first &&
4100 lastmap->get_up_from(i.primary) <= i.first) {
4101 i.maybe_went_rw = true;
4102 if (out)
11fdf7f2 4103 *out << " " << i
7c673cae
FG
4104 << " : primary up " << lastmap->get_up_from(i.primary)
4105 << "-" << lastmap->get_up_thru(i.primary)
4106 << " includes interval"
11fdf7f2 4107 << std::endl;
7c673cae
FG
4108 } else if (last_epoch_clean >= i.first &&
4109 last_epoch_clean <= i.last) {
4110 // If the last_epoch_clean is included in this interval, then
4111 // the pg must have been rw (for recovery to have completed).
4112 // This is important because we won't know the _real_
4113 // first_epoch because we stop at last_epoch_clean, and we
4114 // don't want the oldest interval to randomly have
4115 // maybe_went_rw false depending on the relative up_thru vs
4116 // last_epoch_clean timing.
4117 i.maybe_went_rw = true;
4118 if (out)
11fdf7f2 4119 *out << " " << i
7c673cae
FG
4120 << " : includes last_epoch_clean " << last_epoch_clean
4121 << " and presumed to have been rw"
4122 << std::endl;
4123 } else {
4124 i.maybe_went_rw = false;
4125 if (out)
11fdf7f2 4126 *out << " " << i
7c673cae
FG
4127 << " : primary up " << lastmap->get_up_from(i.primary)
4128 << "-" << lastmap->get_up_thru(i.primary)
4129 << " does not include interval"
11fdf7f2 4130 << std::endl;
7c673cae
FG
4131 }
4132 } else {
4133 i.maybe_went_rw = false;
4134 if (out)
4135 *out << __func__ << " " << i << " : acting set is too small" << std::endl;
4136 }
11fdf7f2 4137 past_intervals->past_intervals->add_interval(old_pg_pool.is_erasure(), i);
7c673cae
FG
4138 return true;
4139 } else {
4140 return false;
4141 }
4142}
4143
4144
4145// true if the given map affects the prior set
4146bool PastIntervals::PriorSet::affected_by_map(
4147 const OSDMap &osdmap,
4148 const DoutPrefixProvider *dpp) const
4149{
9f95a23c 4150 for (auto p = probe.begin(); p != probe.end(); ++p) {
7c673cae
FG
4151 int o = p->osd;
4152
4153 // did someone in the prior set go down?
4154 if (osdmap.is_down(o) && down.count(o) == 0) {
4155 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now down" << dendl;
4156 return true;
4157 }
4158
4159 // did a down osd in cur get (re)marked as lost?
9f95a23c 4160 auto r = blocked_by.find(o);
7c673cae
FG
4161 if (r != blocked_by.end()) {
4162 if (!osdmap.exists(o)) {
4163 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
4164 return true;
4165 }
4166 if (osdmap.get_info(o).lost_at != r->second) {
4167 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
4168 return true;
4169 }
4170 }
4171 }
4172
4173 // did someone in the prior down set go up?
9f95a23c 4174 for (auto p = down.cbegin(); p != down.cend(); ++p) {
7c673cae
FG
4175 int o = *p;
4176
4177 if (osdmap.is_up(o)) {
4178 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now up" << dendl;
4179 return true;
4180 }
4181
4182 // did someone in the prior set get lost or destroyed?
4183 if (!osdmap.exists(o)) {
4184 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
4185 return true;
4186 }
4187 // did a down osd in down get (re)marked as lost?
9f95a23c 4188 auto r = blocked_by.find(o);
7c673cae
FG
4189 if (r != blocked_by.end()) {
4190 if (osdmap.get_info(o).lost_at != r->second) {
4191 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
4192 return true;
4193 }
4194 }
4195 }
4196
4197 return false;
4198}
4199
4200ostream& operator<<(ostream& out, const PastIntervals::pg_interval_t& i)
4201{
4202 out << "interval(" << i.first << "-" << i.last
4203 << " up " << i.up << "(" << i.up_primary << ")"
4204 << " acting " << i.acting << "(" << i.primary << ")";
4205 if (i.maybe_went_rw)
4206 out << " maybe_went_rw";
4207 out << ")";
4208 return out;
4209}
4210
4211
4212
4213// -- pg_query_t --
4214
9f95a23c 4215void pg_query_t::encode(ceph::buffer::list &bl, uint64_t features) const {
7c673cae 4216 ENCODE_START(3, 3, bl);
11fdf7f2
TL
4217 encode(type, bl);
4218 encode(since, bl);
7c673cae 4219 history.encode(bl);
11fdf7f2
TL
4220 encode(epoch_sent, bl);
4221 encode(to, bl);
4222 encode(from, bl);
7c673cae
FG
4223 ENCODE_FINISH(bl);
4224}
4225
9f95a23c 4226void pg_query_t::decode(ceph::buffer::list::const_iterator &bl) {
7c673cae 4227 DECODE_START(3, bl);
11fdf7f2
TL
4228 decode(type, bl);
4229 decode(since, bl);
7c673cae 4230 history.decode(bl);
11fdf7f2
TL
4231 decode(epoch_sent, bl);
4232 decode(to, bl);
4233 decode(from, bl);
7c673cae
FG
4234 DECODE_FINISH(bl);
4235}
4236
4237void pg_query_t::dump(Formatter *f) const
4238{
4239 f->dump_int("from", from);
4240 f->dump_int("to", to);
4241 f->dump_string("type", get_type_name());
4242 f->dump_stream("since") << since;
4243 f->dump_stream("epoch_sent") << epoch_sent;
4244 f->open_object_section("history");
4245 history.dump(f);
4246 f->close_section();
4247}
4248void pg_query_t::generate_test_instances(list<pg_query_t*>& o)
4249{
4250 o.push_back(new pg_query_t());
4251 list<pg_history_t*> h;
4252 pg_history_t::generate_test_instances(h);
4253 o.push_back(new pg_query_t(pg_query_t::INFO, shard_id_t(1), shard_id_t(2), *h.back(), 4));
4254 o.push_back(new pg_query_t(pg_query_t::MISSING, shard_id_t(2), shard_id_t(3), *h.back(), 4));
4255 o.push_back(new pg_query_t(pg_query_t::LOG, shard_id_t(0), shard_id_t(0),
4256 eversion_t(4, 5), *h.back(), 4));
4257 o.push_back(new pg_query_t(pg_query_t::FULLLOG,
4258 shard_id_t::NO_SHARD, shard_id_t::NO_SHARD,
4259 *h.back(), 5));
4260}
4261
9f95a23c
TL
4262// -- pg_lease_t --
4263
4264void pg_lease_t::encode(bufferlist& bl) const
4265{
4266 ENCODE_START(1, 1, bl);
4267 encode(readable_until, bl);
4268 encode(readable_until_ub, bl);
4269 encode(interval, bl);
4270 ENCODE_FINISH(bl);
4271}
4272
4273void pg_lease_t::decode(bufferlist::const_iterator& p)
4274{
4275 DECODE_START(1, p);
4276 decode(readable_until, p);
4277 decode(readable_until_ub, p);
4278 decode(interval, p);
4279 DECODE_FINISH(p);
4280}
4281
4282void pg_lease_t::dump(Formatter *f) const
4283{
4284 f->dump_stream("readable_until") << readable_until;
4285 f->dump_stream("readable_until_ub") << readable_until_ub;
4286 f->dump_stream("interval") << interval;
4287}
4288
4289void pg_lease_t::generate_test_instances(std::list<pg_lease_t*>& o)
4290{
4291 o.push_back(new pg_lease_t());
4292 o.push_back(new pg_lease_t());
4293 o.back()->readable_until = make_timespan(1.5);
4294 o.back()->readable_until_ub = make_timespan(3.4);
4295 o.back()->interval = make_timespan(1.0);
4296}
4297
4298// -- pg_lease_ack_t --
4299
4300void pg_lease_ack_t::encode(bufferlist& bl) const
4301{
4302 ENCODE_START(1, 1, bl);
4303 encode(readable_until_ub, bl);
4304 ENCODE_FINISH(bl);
4305}
4306
4307void pg_lease_ack_t::decode(bufferlist::const_iterator& p)
4308{
4309 DECODE_START(1, p);
4310 decode(readable_until_ub, p);
4311 DECODE_FINISH(p);
4312}
4313
4314void pg_lease_ack_t::dump(Formatter *f) const
4315{
4316 f->dump_stream("readable_until_ub") << readable_until_ub;
4317}
4318
4319void pg_lease_ack_t::generate_test_instances(std::list<pg_lease_ack_t*>& o)
4320{
4321 o.push_back(new pg_lease_ack_t());
4322 o.push_back(new pg_lease_ack_t());
4323 o.back()->readable_until_ub = make_timespan(3.4);
4324}
4325
4326
7c673cae
FG
4327// -- ObjectModDesc --
4328void ObjectModDesc::visit(Visitor *visitor) const
4329{
11fdf7f2 4330 auto bp = bl.cbegin();
7c673cae
FG
4331 try {
4332 while (!bp.end()) {
4333 DECODE_START(max_required_version, bp);
4334 uint8_t code;
11fdf7f2 4335 decode(code, bp);
7c673cae
FG
4336 switch (code) {
4337 case APPEND: {
4338 uint64_t size;
11fdf7f2 4339 decode(size, bp);
7c673cae
FG
4340 visitor->append(size);
4341 break;
4342 }
4343 case SETATTRS: {
9f95a23c 4344 map<string, std::optional<ceph::buffer::list> > attrs;
11fdf7f2 4345 decode(attrs, bp);
7c673cae
FG
4346 visitor->setattrs(attrs);
4347 break;
4348 }
4349 case DELETE: {
4350 version_t old_version;
11fdf7f2 4351 decode(old_version, bp);
7c673cae
FG
4352 visitor->rmobject(old_version);
4353 break;
4354 }
4355 case CREATE: {
4356 visitor->create();
4357 break;
4358 }
4359 case UPDATE_SNAPS: {
4360 set<snapid_t> snaps;
11fdf7f2 4361 decode(snaps, bp);
7c673cae
FG
4362 visitor->update_snaps(snaps);
4363 break;
4364 }
4365 case TRY_DELETE: {
4366 version_t old_version;
11fdf7f2 4367 decode(old_version, bp);
7c673cae
FG
4368 visitor->try_rmobject(old_version);
4369 break;
4370 }
4371 case ROLLBACK_EXTENTS: {
4372 vector<pair<uint64_t, uint64_t> > extents;
4373 version_t gen;
11fdf7f2
TL
4374 decode(gen, bp);
4375 decode(extents, bp);
7c673cae
FG
4376 visitor->rollback_extents(gen,extents);
4377 break;
4378 }
4379 default:
11fdf7f2 4380 ceph_abort_msg("Invalid rollback code");
7c673cae
FG
4381 }
4382 DECODE_FINISH(bp);
4383 }
4384 } catch (...) {
11fdf7f2 4385 ceph_abort_msg("Invalid encoding");
7c673cae
FG
4386 }
4387}
4388
4389struct DumpVisitor : public ObjectModDesc::Visitor {
4390 Formatter *f;
4391 explicit DumpVisitor(Formatter *f) : f(f) {}
4392 void append(uint64_t old_size) override {
4393 f->open_object_section("op");
4394 f->dump_string("code", "APPEND");
4395 f->dump_unsigned("old_size", old_size);
4396 f->close_section();
4397 }
9f95a23c 4398 void setattrs(map<string, std::optional<ceph::buffer::list> > &attrs) override {
7c673cae
FG
4399 f->open_object_section("op");
4400 f->dump_string("code", "SETATTRS");
4401 f->open_array_section("attrs");
9f95a23c 4402 for (auto i = attrs.begin(); i != attrs.end(); ++i) {
7c673cae
FG
4403 f->dump_string("attr_name", i->first);
4404 }
4405 f->close_section();
4406 f->close_section();
4407 }
4408 void rmobject(version_t old_version) override {
4409 f->open_object_section("op");
4410 f->dump_string("code", "RMOBJECT");
4411 f->dump_unsigned("old_version", old_version);
4412 f->close_section();
4413 }
4414 void try_rmobject(version_t old_version) override {
4415 f->open_object_section("op");
4416 f->dump_string("code", "TRY_RMOBJECT");
4417 f->dump_unsigned("old_version", old_version);
4418 f->close_section();
4419 }
4420 void create() override {
4421 f->open_object_section("op");
4422 f->dump_string("code", "CREATE");
4423 f->close_section();
4424 }
4425 void update_snaps(const set<snapid_t> &snaps) override {
4426 f->open_object_section("op");
4427 f->dump_string("code", "UPDATE_SNAPS");
4428 f->dump_stream("snaps") << snaps;
4429 f->close_section();
4430 }
4431 void rollback_extents(
4432 version_t gen,
4433 const vector<pair<uint64_t, uint64_t> > &extents) override {
4434 f->open_object_section("op");
4435 f->dump_string("code", "ROLLBACK_EXTENTS");
4436 f->dump_unsigned("gen", gen);
4437 f->dump_stream("snaps") << extents;
4438 f->close_section();
4439 }
4440};
4441
4442void ObjectModDesc::dump(Formatter *f) const
4443{
4444 f->open_object_section("object_mod_desc");
4445 f->dump_bool("can_local_rollback", can_local_rollback);
4446 f->dump_bool("rollback_info_completed", rollback_info_completed);
4447 {
4448 f->open_array_section("ops");
4449 DumpVisitor vis(f);
4450 visit(&vis);
4451 f->close_section();
4452 }
4453 f->close_section();
4454}
4455
4456void ObjectModDesc::generate_test_instances(list<ObjectModDesc*>& o)
4457{
9f95a23c 4458 map<string, std::optional<ceph::buffer::list> > attrs;
7c673cae
FG
4459 attrs[OI_ATTR];
4460 attrs[SS_ATTR];
4461 attrs["asdf"];
4462 o.push_back(new ObjectModDesc());
4463 o.back()->append(100);
4464 o.back()->setattrs(attrs);
4465 o.push_back(new ObjectModDesc());
4466 o.back()->rmobject(1001);
4467 o.push_back(new ObjectModDesc());
4468 o.back()->create();
4469 o.back()->setattrs(attrs);
4470 o.push_back(new ObjectModDesc());
4471 o.back()->create();
4472 o.back()->setattrs(attrs);
4473 o.back()->mark_unrollbackable();
4474 o.back()->append(1000);
4475}
4476
9f95a23c 4477void ObjectModDesc::encode(ceph::buffer::list &_bl) const
7c673cae
FG
4478{
4479 ENCODE_START(max_required_version, max_required_version, _bl);
11fdf7f2
TL
4480 encode(can_local_rollback, _bl);
4481 encode(rollback_info_completed, _bl);
4482 encode(bl, _bl);
7c673cae
FG
4483 ENCODE_FINISH(_bl);
4484}
9f95a23c 4485void ObjectModDesc::decode(ceph::buffer::list::const_iterator &_bl)
7c673cae
FG
4486{
4487 DECODE_START(2, _bl);
4488 max_required_version = struct_v;
11fdf7f2
TL
4489 decode(can_local_rollback, _bl);
4490 decode(rollback_info_completed, _bl);
4491 decode(bl, _bl);
9f95a23c 4492 // ensure bl does not pin a larger ceph::buffer in memory
7c673cae 4493 bl.rebuild();
31f18b77 4494 bl.reassign_to_mempool(mempool::mempool_osd_pglog);
7c673cae
FG
4495 DECODE_FINISH(_bl);
4496}
4497
9f95a23c
TL
4498std::atomic<uint32_t> ObjectCleanRegions::max_num_intervals = {10};
4499
4500void ObjectCleanRegions::set_max_num_intervals(uint32_t num)
4501{
4502 max_num_intervals = num;
4503}
4504
4505void ObjectCleanRegions::trim()
4506{
4507 while(clean_offsets.num_intervals() > max_num_intervals) {
4508 typename interval_set<uint64_t>::iterator shortest_interval = clean_offsets.begin();
4509 if (shortest_interval == clean_offsets.end())
4510 break;
4511 for (typename interval_set<uint64_t>::iterator it = clean_offsets.begin();
4512 it != clean_offsets.end();
4513 ++it) {
4514 if (it.get_len() < shortest_interval.get_len())
4515 shortest_interval = it;
4516 }
4517 clean_offsets.erase(shortest_interval);
4518 }
4519}
4520
4521void ObjectCleanRegions::merge(const ObjectCleanRegions &other)
4522{
4523 clean_offsets.intersection_of(other.clean_offsets);
4524 clean_omap = clean_omap && other.clean_omap;
4525 trim();
4526}
4527
4528void ObjectCleanRegions::mark_data_region_dirty(uint64_t offset, uint64_t len)
4529{
4530 interval_set<uint64_t> clean_region;
4531 clean_region.insert(0, (uint64_t)-1);
4532 clean_region.erase(offset, len);
4533 clean_offsets.intersection_of(clean_region);
4534 trim();
4535}
4536
4537void ObjectCleanRegions::mark_omap_dirty()
4538{
4539 clean_omap = false;
4540}
4541
4542void ObjectCleanRegions::mark_object_new()
4543{
4544 new_object = true;
4545}
4546
4547void ObjectCleanRegions::mark_fully_dirty()
4548{
4549 mark_data_region_dirty(0, (uint64_t)-1);
4550 mark_omap_dirty();
4551 mark_object_new();
4552}
4553
4554interval_set<uint64_t> ObjectCleanRegions::get_dirty_regions() const
4555{
4556 interval_set<uint64_t> dirty_region;
4557 dirty_region.insert(0, (uint64_t)-1);
4558 dirty_region.subtract(clean_offsets);
4559 return dirty_region;
4560}
4561
4562bool ObjectCleanRegions::omap_is_dirty() const
4563{
4564 return !clean_omap;
4565}
4566
4567bool ObjectCleanRegions::object_is_exist() const
4568{
4569 return !new_object;
4570}
4571
4572void ObjectCleanRegions::encode(bufferlist &bl) const
4573{
4574 ENCODE_START(1, 1, bl);
4575 using ceph::encode;
4576 encode(clean_offsets, bl);
4577 encode(clean_omap, bl);
4578 encode(new_object, bl);
4579 ENCODE_FINISH(bl);
4580}
4581
4582void ObjectCleanRegions::decode(bufferlist::const_iterator &bl)
4583{
4584 DECODE_START(1, bl);
4585 using ceph::decode;
4586 decode(clean_offsets, bl);
4587 decode(clean_omap, bl);
4588 decode(new_object, bl);
4589 DECODE_FINISH(bl);
4590}
4591
4592void ObjectCleanRegions::dump(Formatter *f) const
4593{
4594 f->open_object_section("object_clean_regions");
4595 f->dump_stream("clean_offsets") << clean_offsets;
4596 f->dump_bool("clean_omap", clean_omap);
4597 f->dump_bool("new_object", new_object);
4598 f->close_section();
4599}
4600
4601void ObjectCleanRegions::generate_test_instances(list<ObjectCleanRegions*>& o)
4602{
4603 o.push_back(new ObjectCleanRegions());
4604 o.push_back(new ObjectCleanRegions());
4605 o.back()->mark_data_region_dirty(4096, 40960);
4606 o.back()->mark_omap_dirty();
4607 o.back()->mark_object_new();
4608}
4609
4610ostream& operator<<(ostream& out, const ObjectCleanRegions& ocr)
4611{
4612 return out << "clean_offsets: " << ocr.clean_offsets
4613 << ", clean_omap: " << ocr.clean_omap
4614 << ", new_object: " << ocr.new_object;
4615}
4616
7c673cae
FG
4617// -- pg_log_entry_t --
4618
4619string pg_log_entry_t::get_key_name() const
4620{
4621 return version.get_key_name();
4622}
4623
9f95a23c 4624void pg_log_entry_t::encode_with_checksum(ceph::buffer::list& bl) const
7c673cae 4625{
11fdf7f2 4626 using ceph::encode;
9f95a23c 4627 ceph::buffer::list ebl(sizeof(*this)*2);
11fdf7f2 4628 this->encode(ebl);
7c673cae 4629 __u32 crc = ebl.crc32c(0);
11fdf7f2
TL
4630 encode(ebl, bl);
4631 encode(crc, bl);
7c673cae
FG
4632}
4633
9f95a23c 4634void pg_log_entry_t::decode_with_checksum(ceph::buffer::list::const_iterator& p)
7c673cae 4635{
11fdf7f2 4636 using ceph::decode;
9f95a23c 4637 ceph::buffer::list bl;
11fdf7f2 4638 decode(bl, p);
7c673cae 4639 __u32 crc;
11fdf7f2 4640 decode(crc, p);
7c673cae 4641 if (crc != bl.crc32c(0))
9f95a23c 4642 throw ceph::buffer::malformed_input("bad checksum on pg_log_entry_t");
11fdf7f2
TL
4643 auto q = bl.cbegin();
4644 this->decode(q);
7c673cae
FG
4645}
4646
9f95a23c 4647void pg_log_entry_t::encode(ceph::buffer::list &bl) const
7c673cae 4648{
9f95a23c 4649 ENCODE_START(14, 4, bl);
11fdf7f2
TL
4650 encode(op, bl);
4651 encode(soid, bl);
4652 encode(version, bl);
7c673cae
FG
4653
4654 /**
4655 * Added with reverting_to:
4656 * Previous code used prior_version to encode
4657 * what we now call reverting_to. This will
4658 * allow older code to decode reverting_to
4659 * into prior_version as expected.
4660 */
4661 if (op == LOST_REVERT)
11fdf7f2 4662 encode(reverting_to, bl);
7c673cae 4663 else
11fdf7f2 4664 encode(prior_version, bl);
7c673cae 4665
11fdf7f2
TL
4666 encode(reqid, bl);
4667 encode(mtime, bl);
7c673cae 4668 if (op == LOST_REVERT)
11fdf7f2
TL
4669 encode(prior_version, bl);
4670 encode(snaps, bl);
4671 encode(user_version, bl);
4672 encode(mod_desc, bl);
4673 encode(extra_reqids, bl);
7c673cae 4674 if (op == ERROR)
11fdf7f2
TL
4675 encode(return_code, bl);
4676 if (!extra_reqids.empty())
4677 encode(extra_reqid_return_codes, bl);
9f95a23c
TL
4678 encode(clean_regions, bl);
4679 if (op != ERROR)
4680 encode(return_code, bl);
4681 encode(op_returns, bl);
7c673cae
FG
4682 ENCODE_FINISH(bl);
4683}
4684
9f95a23c 4685void pg_log_entry_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 4686{
9f95a23c 4687 DECODE_START_LEGACY_COMPAT_LEN(14, 4, 4, bl);
11fdf7f2 4688 decode(op, bl);
7c673cae
FG
4689 if (struct_v < 2) {
4690 sobject_t old_soid;
11fdf7f2 4691 decode(old_soid, bl);
7c673cae
FG
4692 soid.oid = old_soid.oid;
4693 soid.snap = old_soid.snap;
4694 invalid_hash = true;
4695 } else {
11fdf7f2 4696 decode(soid, bl);
7c673cae
FG
4697 }
4698 if (struct_v < 3)
4699 invalid_hash = true;
11fdf7f2 4700 decode(version, bl);
7c673cae
FG
4701
4702 if (struct_v >= 6 && op == LOST_REVERT)
11fdf7f2 4703 decode(reverting_to, bl);
7c673cae 4704 else
11fdf7f2 4705 decode(prior_version, bl);
7c673cae 4706
11fdf7f2 4707 decode(reqid, bl);
7c673cae 4708
11fdf7f2 4709 decode(mtime, bl);
7c673cae
FG
4710 if (struct_v < 5)
4711 invalid_pool = true;
4712
4713 if (op == LOST_REVERT) {
4714 if (struct_v >= 6) {
11fdf7f2 4715 decode(prior_version, bl);
7c673cae
FG
4716 } else {
4717 reverting_to = prior_version;
4718 }
4719 }
4720 if (struct_v >= 7 || // for v >= 7, this is for all ops.
4721 op == CLONE) { // for v < 7, it's only present for CLONE.
11fdf7f2 4722 decode(snaps, bl);
9f95a23c 4723 // ensure snaps does not pin a larger ceph::buffer in memory
7c673cae 4724 snaps.rebuild();
31f18b77 4725 snaps.reassign_to_mempool(mempool::mempool_osd_pglog);
7c673cae
FG
4726 }
4727
4728 if (struct_v >= 8)
11fdf7f2 4729 decode(user_version, bl);
7c673cae
FG
4730 else
4731 user_version = version.version;
4732
4733 if (struct_v >= 9)
11fdf7f2 4734 decode(mod_desc, bl);
7c673cae
FG
4735 else
4736 mod_desc.mark_unrollbackable();
4737 if (struct_v >= 10)
11fdf7f2 4738 decode(extra_reqids, bl);
7c673cae 4739 if (struct_v >= 11 && op == ERROR)
11fdf7f2
TL
4740 decode(return_code, bl);
4741 if (struct_v >= 12 && !extra_reqids.empty())
4742 decode(extra_reqid_return_codes, bl);
9f95a23c
TL
4743 if (struct_v >= 13)
4744 decode(clean_regions, bl);
4745 else
4746 clean_regions.mark_fully_dirty();
4747 if (struct_v >= 14) {
4748 if (op != ERROR) {
4749 decode(return_code, bl);
4750 }
4751 decode(op_returns, bl);
4752 }
7c673cae
FG
4753 DECODE_FINISH(bl);
4754}
4755
4756void pg_log_entry_t::dump(Formatter *f) const
4757{
4758 f->dump_string("op", get_op_name());
4759 f->dump_stream("object") << soid;
4760 f->dump_stream("version") << version;
4761 f->dump_stream("prior_version") << prior_version;
4762 f->dump_stream("reqid") << reqid;
4763 f->open_array_section("extra_reqids");
11fdf7f2 4764 uint32_t idx = 0;
31f18b77 4765 for (auto p = extra_reqids.begin();
7c673cae 4766 p != extra_reqids.end();
11fdf7f2 4767 ++idx, ++p) {
7c673cae
FG
4768 f->open_object_section("extra_reqid");
4769 f->dump_stream("reqid") << p->first;
4770 f->dump_stream("user_version") << p->second;
11fdf7f2
TL
4771 auto it = extra_reqid_return_codes.find(idx);
4772 if (it != extra_reqid_return_codes.end()) {
4773 f->dump_int("return_code", it->second);
4774 }
7c673cae
FG
4775 f->close_section();
4776 }
4777 f->close_section();
4778 f->dump_stream("mtime") << mtime;
4779 f->dump_int("return_code", return_code);
9f95a23c
TL
4780 if (!op_returns.empty()) {
4781 f->open_array_section("op_returns");
4782 for (auto& i : op_returns) {
4783 f->dump_object("op", i);
4784 }
4785 f->close_section();
4786 }
7c673cae
FG
4787 if (snaps.length() > 0) {
4788 vector<snapid_t> v;
9f95a23c 4789 ceph::buffer::list c = snaps;
11fdf7f2 4790 auto p = c.cbegin();
7c673cae 4791 try {
11fdf7f2
TL
4792 using ceph::decode;
4793 decode(v, p);
7c673cae
FG
4794 } catch (...) {
4795 v.clear();
4796 }
4797 f->open_object_section("snaps");
9f95a23c 4798 for (auto p = v.begin(); p != v.end(); ++p)
7c673cae
FG
4799 f->dump_unsigned("snap", *p);
4800 f->close_section();
4801 }
4802 {
4803 f->open_object_section("mod_desc");
4804 mod_desc.dump(f);
4805 f->close_section();
4806 }
9f95a23c
TL
4807 {
4808 f->open_object_section("clean_regions");
4809 clean_regions.dump(f);
4810 f->close_section();
4811 }
7c673cae
FG
4812}
4813
4814void pg_log_entry_t::generate_test_instances(list<pg_log_entry_t*>& o)
4815{
4816 o.push_back(new pg_log_entry_t());
4817 hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
4818 o.push_back(new pg_log_entry_t(MODIFY, oid, eversion_t(1,2), eversion_t(3,4),
4819 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4820 utime_t(8,9), 0));
4821 o.push_back(new pg_log_entry_t(ERROR, oid, eversion_t(1,2), eversion_t(3,4),
4822 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4823 utime_t(8,9), -ENOENT));
4824}
4825
4826ostream& operator<<(ostream& out, const pg_log_entry_t& e)
4827{
4828 out << e.version << " (" << e.prior_version << ") "
4829 << std::left << std::setw(8) << e.get_op_name() << ' '
4830 << e.soid << " by " << e.reqid << " " << e.mtime
4831 << " " << e.return_code;
9f95a23c
TL
4832 if (!e.op_returns.empty()) {
4833 out << " " << e.op_returns;
4834 }
7c673cae
FG
4835 if (e.snaps.length()) {
4836 vector<snapid_t> snaps;
9f95a23c 4837 ceph::buffer::list c = e.snaps;
11fdf7f2 4838 auto p = c.cbegin();
7c673cae 4839 try {
11fdf7f2 4840 decode(snaps, p);
7c673cae
FG
4841 } catch (...) {
4842 snaps.clear();
4843 }
4844 out << " snaps " << snaps;
4845 }
9f95a23c 4846 out << " ObjectCleanRegions " << e.clean_regions;
7c673cae
FG
4847 return out;
4848}
4849
c07f9fc5
FG
4850// -- pg_log_dup_t --
4851
11fdf7f2 4852std::string pg_log_dup_t::get_key_name() const
c07f9fc5 4853{
11fdf7f2
TL
4854 static const char prefix[] = "dup_";
4855 std::string key(36, ' ');
4856 memcpy(&key[0], prefix, 4);
4857 version.get_key_name(&key[4]);
4858 key.resize(35); // remove the null terminator
4859 return key;
c07f9fc5
FG
4860}
4861
9f95a23c 4862void pg_log_dup_t::encode(ceph::buffer::list &bl) const
c07f9fc5 4863{
9f95a23c 4864 ENCODE_START(2, 1, bl);
11fdf7f2
TL
4865 encode(reqid, bl);
4866 encode(version, bl);
4867 encode(user_version, bl);
4868 encode(return_code, bl);
9f95a23c 4869 encode(op_returns, bl);
c07f9fc5
FG
4870 ENCODE_FINISH(bl);
4871}
4872
9f95a23c 4873void pg_log_dup_t::decode(ceph::buffer::list::const_iterator &bl)
c07f9fc5 4874{
9f95a23c 4875 DECODE_START(2, bl);
11fdf7f2
TL
4876 decode(reqid, bl);
4877 decode(version, bl);
4878 decode(user_version, bl);
4879 decode(return_code, bl);
9f95a23c
TL
4880 if (struct_v >= 2) {
4881 decode(op_returns, bl);
4882 }
c07f9fc5
FG
4883 DECODE_FINISH(bl);
4884}
4885
4886void pg_log_dup_t::dump(Formatter *f) const
4887{
4888 f->dump_stream("reqid") << reqid;
4889 f->dump_stream("version") << version;
4890 f->dump_stream("user_version") << user_version;
4891 f->dump_stream("return_code") << return_code;
9f95a23c
TL
4892 if (!op_returns.empty()) {
4893 f->open_array_section("op_returns");
4894 for (auto& i : op_returns) {
4895 f->dump_object("op", i);
4896 }
4897 f->close_section();
4898 }
c07f9fc5
FG
4899}
4900
4901void pg_log_dup_t::generate_test_instances(list<pg_log_dup_t*>& o)
4902{
4903 o.push_back(new pg_log_dup_t());
4904 o.push_back(new pg_log_dup_t(eversion_t(1,2),
4905 1,
4906 osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4907 0));
4908 o.push_back(new pg_log_dup_t(eversion_t(1,2),
4909 2,
4910 osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4911 -ENOENT));
4912}
4913
4914
4915std::ostream& operator<<(std::ostream& out, const pg_log_dup_t& e) {
9f95a23c 4916 out << "log_dup(reqid=" << e.reqid <<
c07f9fc5 4917 " v=" << e.version << " uv=" << e.user_version <<
9f95a23c
TL
4918 " rc=" << e.return_code;
4919 if (!e.op_returns.empty()) {
4920 out << " " << e.op_returns;
4921 }
4922 return out << ")";
c07f9fc5
FG
4923}
4924
7c673cae
FG
4925
4926// -- pg_log_t --
4927
4928// out: pg_log_t that only has entries that apply to import_pgid using curmap
4929// reject: Entries rejected from "in" are in the reject.log. Other fields not set.
4930void pg_log_t::filter_log(spg_t import_pgid, const OSDMap &curmap,
4931 const string &hit_set_namespace, const pg_log_t &in,
4932 pg_log_t &out, pg_log_t &reject)
4933{
4934 out = in;
4935 out.log.clear();
4936 reject.log.clear();
4937
9f95a23c 4938 for (auto i = in.log.cbegin(); i != in.log.cend(); ++i) {
7c673cae
FG
4939
4940 // Reject pg log entries for temporary objects
4941 if (i->soid.is_temp()) {
4942 reject.log.push_back(*i);
4943 continue;
4944 }
4945
4946 if (i->soid.nspace != hit_set_namespace) {
4947 object_t oid = i->soid.oid;
4948 object_locator_t loc(i->soid);
4949 pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
4950 pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
4951
4952 if (import_pgid.pgid == pgid) {
4953 out.log.push_back(*i);
4954 } else {
4955 reject.log.push_back(*i);
4956 }
4957 } else {
4958 out.log.push_back(*i);
4959 }
4960 }
4961}
4962
9f95a23c 4963void pg_log_t::encode(ceph::buffer::list& bl) const
7c673cae 4964{
c07f9fc5 4965 ENCODE_START(7, 3, bl);
11fdf7f2
TL
4966 encode(head, bl);
4967 encode(tail, bl);
4968 encode(log, bl);
4969 encode(can_rollback_to, bl);
4970 encode(rollback_info_trimmed_to, bl);
4971 encode(dups, bl);
7c673cae
FG
4972 ENCODE_FINISH(bl);
4973}
4974
9f95a23c 4975void pg_log_t::decode(ceph::buffer::list::const_iterator &bl, int64_t pool)
7c673cae 4976{
c07f9fc5 4977 DECODE_START_LEGACY_COMPAT_LEN(7, 3, 3, bl);
11fdf7f2
TL
4978 decode(head, bl);
4979 decode(tail, bl);
7c673cae
FG
4980 if (struct_v < 2) {
4981 bool backlog;
11fdf7f2 4982 decode(backlog, bl);
7c673cae 4983 }
11fdf7f2 4984 decode(log, bl);
7c673cae 4985 if (struct_v >= 5)
11fdf7f2 4986 decode(can_rollback_to, bl);
7c673cae
FG
4987
4988 if (struct_v >= 6)
11fdf7f2 4989 decode(rollback_info_trimmed_to, bl);
7c673cae
FG
4990 else
4991 rollback_info_trimmed_to = tail;
c07f9fc5
FG
4992
4993 if (struct_v >= 7)
11fdf7f2 4994 decode(dups, bl);
c07f9fc5 4995
7c673cae
FG
4996 DECODE_FINISH(bl);
4997
4998 // handle hobject_t format change
4999 if (struct_v < 4) {
9f95a23c 5000 for (auto i = log.begin(); i != log.end(); ++i) {
7c673cae
FG
5001 if (!i->soid.is_max() && i->soid.pool == -1)
5002 i->soid.pool = pool;
5003 }
5004 }
5005}
5006
5007void pg_log_t::dump(Formatter *f) const
5008{
5009 f->dump_stream("head") << head;
5010 f->dump_stream("tail") << tail;
5011 f->open_array_section("log");
9f95a23c 5012 for (auto p = log.cbegin(); p != log.cend(); ++p) {
7c673cae
FG
5013 f->open_object_section("entry");
5014 p->dump(f);
5015 f->close_section();
5016 }
5017 f->close_section();
c07f9fc5
FG
5018 f->open_array_section("dups");
5019 for (const auto& entry : dups) {
5020 f->open_object_section("entry");
5021 entry.dump(f);
5022 f->close_section();
5023 }
5024 f->close_section();
7c673cae
FG
5025}
5026
5027void pg_log_t::generate_test_instances(list<pg_log_t*>& o)
5028{
5029 o.push_back(new pg_log_t);
5030
5031 // this is nonsensical:
5032 o.push_back(new pg_log_t);
5033 o.back()->head = eversion_t(1,2);
5034 o.back()->tail = eversion_t(3,4);
5035 list<pg_log_entry_t*> e;
5036 pg_log_entry_t::generate_test_instances(e);
9f95a23c 5037 for (auto p = e.begin(); p != e.end(); ++p)
7c673cae
FG
5038 o.back()->log.push_back(**p);
5039}
5040
81eedcae
TL
5041static void _handle_dups(CephContext* cct, pg_log_t &target, const pg_log_t &other, unsigned maxdups)
5042{
5043 auto earliest_dup_version =
5044 target.head.version < maxdups ? 0u : target.head.version - maxdups + 1;
5045 lgeneric_subdout(cct, osd, 20) << "copy_up_to/copy_after earliest_dup_version " << earliest_dup_version << dendl;
5046
5047 for (auto d = other.dups.cbegin(); d != other.dups.cend(); ++d) {
5048 if (d->version.version >= earliest_dup_version) {
5049 lgeneric_subdout(cct, osd, 20)
5050 << "copy_up_to/copy_after copy dup version "
5051 << d->version << dendl;
5052 target.dups.push_back(pg_log_dup_t(*d));
5053 }
5054 }
5055
5056 for (auto i = other.log.cbegin(); i != other.log.cend(); ++i) {
5057 ceph_assert(i->version > other.tail);
5058 if (i->version > target.tail)
5059 break;
5060 if (i->version.version >= earliest_dup_version) {
5061 lgeneric_subdout(cct, osd, 20)
5062 << "copy_up_to/copy_after copy dup from log version "
5063 << i->version << dendl;
5064 target.dups.push_back(pg_log_dup_t(*i));
5065 }
5066 }
5067}
5068
5069
5070void pg_log_t::copy_after(CephContext* cct, const pg_log_t &other, eversion_t v)
7c673cae
FG
5071{
5072 can_rollback_to = other.can_rollback_to;
5073 head = other.head;
5074 tail = other.tail;
81eedcae 5075 lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v << dendl;
9f95a23c 5076 for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) {
11fdf7f2 5077 ceph_assert(i->version > other.tail);
7c673cae
FG
5078 if (i->version <= v) {
5079 // make tail accurate.
5080 tail = i->version;
5081 break;
5082 }
81eedcae 5083 lgeneric_subdout(cct, osd, 20) << __func__ << " copy log version " << i->version << dendl;
7c673cae
FG
5084 log.push_front(*i);
5085 }
81eedcae 5086 _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked);
7c673cae
FG
5087}
5088
81eedcae 5089void pg_log_t::copy_up_to(CephContext* cct, const pg_log_t &other, int max)
7c673cae
FG
5090{
5091 can_rollback_to = other.can_rollback_to;
5092 int n = 0;
5093 head = other.head;
5094 tail = other.tail;
81eedcae 5095 lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max << dendl;
9f95a23c 5096 for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) {
81eedcae 5097 ceph_assert(i->version > other.tail);
7c673cae
FG
5098 if (n++ >= max) {
5099 tail = i->version;
5100 break;
5101 }
81eedcae 5102 lgeneric_subdout(cct, osd, 20) << __func__ << " copy log version " << i->version << dendl;
7c673cae
FG
5103 log.push_front(*i);
5104 }
81eedcae 5105 _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked);
7c673cae
FG
5106}
5107
c07f9fc5 5108ostream& pg_log_t::print(ostream& out) const
7c673cae
FG
5109{
5110 out << *this << std::endl;
9f95a23c 5111 for (auto p = log.cbegin(); p != log.cend(); ++p)
7c673cae 5112 out << *p << std::endl;
c07f9fc5
FG
5113 for (const auto& entry : dups) {
5114 out << " dup entry: " << entry << std::endl;
5115 }
7c673cae
FG
5116 return out;
5117}
5118
5119// -- pg_missing_t --
5120
5121ostream& operator<<(ostream& out, const pg_missing_item& i)
5122{
5123 out << i.need;
5124 if (i.have != eversion_t())
5125 out << "(" << i.have << ")";
9f95a23c
TL
5126 out << " flags = " << i.flag_str()
5127 << " " << i.clean_regions;
7c673cae
FG
5128 return out;
5129}
5130
5131// -- object_copy_cursor_t --
5132
9f95a23c 5133void object_copy_cursor_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
5134{
5135 ENCODE_START(1, 1, bl);
11fdf7f2
TL
5136 encode(attr_complete, bl);
5137 encode(data_offset, bl);
5138 encode(data_complete, bl);
5139 encode(omap_offset, bl);
5140 encode(omap_complete, bl);
7c673cae
FG
5141 ENCODE_FINISH(bl);
5142}
5143
9f95a23c 5144void object_copy_cursor_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
5145{
5146 DECODE_START(1, bl);
11fdf7f2
TL
5147 decode(attr_complete, bl);
5148 decode(data_offset, bl);
5149 decode(data_complete, bl);
5150 decode(omap_offset, bl);
5151 decode(omap_complete, bl);
7c673cae
FG
5152 DECODE_FINISH(bl);
5153}
5154
5155void object_copy_cursor_t::dump(Formatter *f) const
5156{
5157 f->dump_unsigned("attr_complete", (int)attr_complete);
5158 f->dump_unsigned("data_offset", data_offset);
5159 f->dump_unsigned("data_complete", (int)data_complete);
5160 f->dump_string("omap_offset", omap_offset);
5161 f->dump_unsigned("omap_complete", (int)omap_complete);
5162}
5163
5164void object_copy_cursor_t::generate_test_instances(list<object_copy_cursor_t*>& o)
5165{
5166 o.push_back(new object_copy_cursor_t);
5167 o.push_back(new object_copy_cursor_t);
5168 o.back()->attr_complete = true;
5169 o.back()->data_offset = 123;
5170 o.push_back(new object_copy_cursor_t);
5171 o.back()->attr_complete = true;
5172 o.back()->data_complete = true;
5173 o.back()->omap_offset = "foo";
5174 o.push_back(new object_copy_cursor_t);
5175 o.back()->attr_complete = true;
5176 o.back()->data_complete = true;
5177 o.back()->omap_complete = true;
5178}
5179
5180// -- object_copy_data_t --
5181
9f95a23c 5182void object_copy_data_t::encode(ceph::buffer::list& bl, uint64_t features) const
7c673cae 5183{
11fdf7f2
TL
5184 ENCODE_START(8, 5, bl);
5185 encode(size, bl);
5186 encode(mtime, bl);
5187 encode(attrs, bl);
5188 encode(data, bl);
5189 encode(omap_data, bl);
5190 encode(cursor, bl);
5191 encode(omap_header, bl);
5192 encode(snaps, bl);
5193 encode(snap_seq, bl);
5194 encode(flags, bl);
5195 encode(data_digest, bl);
5196 encode(omap_digest, bl);
5197 encode(reqids, bl);
5198 encode(truncate_seq, bl);
5199 encode(truncate_size, bl);
5200 encode(reqid_return_codes, bl);
7c673cae
FG
5201 ENCODE_FINISH(bl);
5202}
5203
9f95a23c 5204void object_copy_data_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae 5205{
9f95a23c 5206 DECODE_START(8, bl);
7c673cae
FG
5207 if (struct_v < 5) {
5208 // old
11fdf7f2
TL
5209 decode(size, bl);
5210 decode(mtime, bl);
7c673cae
FG
5211 {
5212 string category;
11fdf7f2 5213 decode(category, bl); // no longer used
7c673cae 5214 }
11fdf7f2
TL
5215 decode(attrs, bl);
5216 decode(data, bl);
7c673cae 5217 {
9f95a23c 5218 map<string,ceph::buffer::list> omap;
11fdf7f2 5219 decode(omap, bl);
7c673cae 5220 omap_data.clear();
11fdf7f2
TL
5221 if (!omap.empty()) {
5222 using ceph::encode;
5223 encode(omap, omap_data);
5224 }
7c673cae 5225 }
11fdf7f2 5226 decode(cursor, bl);
7c673cae 5227 if (struct_v >= 2)
11fdf7f2 5228 decode(omap_header, bl);
7c673cae 5229 if (struct_v >= 3) {
11fdf7f2
TL
5230 decode(snaps, bl);
5231 decode(snap_seq, bl);
7c673cae
FG
5232 } else {
5233 snaps.clear();
5234 snap_seq = 0;
5235 }
5236 if (struct_v >= 4) {
11fdf7f2
TL
5237 decode(flags, bl);
5238 decode(data_digest, bl);
5239 decode(omap_digest, bl);
7c673cae
FG
5240 }
5241 } else {
5242 // current
11fdf7f2
TL
5243 decode(size, bl);
5244 decode(mtime, bl);
5245 decode(attrs, bl);
5246 decode(data, bl);
5247 decode(omap_data, bl);
5248 decode(cursor, bl);
5249 decode(omap_header, bl);
5250 decode(snaps, bl);
5251 decode(snap_seq, bl);
7c673cae 5252 if (struct_v >= 4) {
11fdf7f2
TL
5253 decode(flags, bl);
5254 decode(data_digest, bl);
5255 decode(omap_digest, bl);
7c673cae
FG
5256 }
5257 if (struct_v >= 6) {
11fdf7f2 5258 decode(reqids, bl);
7c673cae
FG
5259 }
5260 if (struct_v >= 7) {
11fdf7f2
TL
5261 decode(truncate_seq, bl);
5262 decode(truncate_size, bl);
5263 }
5264 if (struct_v >= 8) {
5265 decode(reqid_return_codes, bl);
7c673cae
FG
5266 }
5267 }
5268 DECODE_FINISH(bl);
5269}
5270
5271void object_copy_data_t::generate_test_instances(list<object_copy_data_t*>& o)
5272{
5273 o.push_back(new object_copy_data_t());
5274
5275 list<object_copy_cursor_t*> cursors;
5276 object_copy_cursor_t::generate_test_instances(cursors);
9f95a23c 5277 auto ci = cursors.begin();
7c673cae
FG
5278 o.back()->cursor = **(ci++);
5279
5280 o.push_back(new object_copy_data_t());
5281 o.back()->cursor = **(ci++);
5282
5283 o.push_back(new object_copy_data_t());
5284 o.back()->size = 1234;
5285 o.back()->mtime.set_from_double(1234);
9f95a23c
TL
5286 ceph::buffer::ptr bp("there", 5);
5287 ceph::buffer::list bl;
7c673cae
FG
5288 bl.push_back(bp);
5289 o.back()->attrs["hello"] = bl;
9f95a23c
TL
5290 ceph::buffer::ptr bp2("not", 3);
5291 ceph::buffer::list bl2;
7c673cae 5292 bl2.push_back(bp2);
9f95a23c 5293 map<string,ceph::buffer::list> omap;
7c673cae 5294 omap["why"] = bl2;
11fdf7f2
TL
5295 using ceph::encode;
5296 encode(omap, o.back()->omap_data);
9f95a23c 5297 ceph::buffer::ptr databp("iamsomedatatocontain", 20);
7c673cae
FG
5298 o.back()->data.push_back(databp);
5299 o.back()->omap_header.append("this is an omap header");
5300 o.back()->snaps.push_back(123);
5301 o.back()->reqids.push_back(make_pair(osd_reqid_t(), version_t()));
5302}
5303
5304void object_copy_data_t::dump(Formatter *f) const
5305{
5306 f->open_object_section("cursor");
5307 cursor.dump(f);
5308 f->close_section(); // cursor
5309 f->dump_int("size", size);
5310 f->dump_stream("mtime") << mtime;
9f95a23c 5311 /* we should really print out the attrs here, but ceph::buffer::list
7c673cae
FG
5312 const-correctness prevents that */
5313 f->dump_int("attrs_size", attrs.size());
5314 f->dump_int("flags", flags);
5315 f->dump_unsigned("data_digest", data_digest);
5316 f->dump_unsigned("omap_digest", omap_digest);
5317 f->dump_int("omap_data_length", omap_data.length());
5318 f->dump_int("omap_header_length", omap_header.length());
5319 f->dump_int("data_length", data.length());
5320 f->open_array_section("snaps");
9f95a23c 5321 for (auto p = snaps.cbegin(); p != snaps.cend(); ++p)
7c673cae
FG
5322 f->dump_unsigned("snap", *p);
5323 f->close_section();
5324 f->open_array_section("reqids");
11fdf7f2 5325 uint32_t idx = 0;
31f18b77 5326 for (auto p = reqids.begin();
7c673cae 5327 p != reqids.end();
11fdf7f2 5328 ++idx, ++p) {
7c673cae
FG
5329 f->open_object_section("extra_reqid");
5330 f->dump_stream("reqid") << p->first;
5331 f->dump_stream("user_version") << p->second;
11fdf7f2
TL
5332 auto it = reqid_return_codes.find(idx);
5333 if (it != reqid_return_codes.end()) {
5334 f->dump_int("return_code", it->second);
5335 }
7c673cae
FG
5336 f->close_section();
5337 }
5338 f->close_section();
5339}
5340
5341// -- pg_create_t --
5342
9f95a23c 5343void pg_create_t::encode(ceph::buffer::list &bl) const
7c673cae
FG
5344{
5345 ENCODE_START(1, 1, bl);
11fdf7f2
TL
5346 encode(created, bl);
5347 encode(parent, bl);
5348 encode(split_bits, bl);
7c673cae
FG
5349 ENCODE_FINISH(bl);
5350}
5351
9f95a23c 5352void pg_create_t::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
5353{
5354 DECODE_START(1, bl);
11fdf7f2
TL
5355 decode(created, bl);
5356 decode(parent, bl);
5357 decode(split_bits, bl);
7c673cae
FG
5358 DECODE_FINISH(bl);
5359}
5360
5361void pg_create_t::dump(Formatter *f) const
5362{
5363 f->dump_unsigned("created", created);
5364 f->dump_stream("parent") << parent;
5365 f->dump_int("split_bits", split_bits);
5366}
5367
5368void pg_create_t::generate_test_instances(list<pg_create_t*>& o)
5369{
5370 o.push_back(new pg_create_t);
11fdf7f2 5371 o.push_back(new pg_create_t(1, pg_t(3, 4), 2));
7c673cae
FG
5372}
5373
5374
5375// -- pg_hit_set_info_t --
5376
9f95a23c 5377void pg_hit_set_info_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
5378{
5379 ENCODE_START(2, 1, bl);
11fdf7f2
TL
5380 encode(begin, bl);
5381 encode(end, bl);
5382 encode(version, bl);
5383 encode(using_gmt, bl);
7c673cae
FG
5384 ENCODE_FINISH(bl);
5385}
5386
9f95a23c 5387void pg_hit_set_info_t::decode(ceph::buffer::list::const_iterator& p)
7c673cae
FG
5388{
5389 DECODE_START(2, p);
11fdf7f2
TL
5390 decode(begin, p);
5391 decode(end, p);
5392 decode(version, p);
7c673cae 5393 if (struct_v >= 2) {
11fdf7f2 5394 decode(using_gmt, p);
7c673cae
FG
5395 } else {
5396 using_gmt = false;
5397 }
5398 DECODE_FINISH(p);
5399}
5400
5401void pg_hit_set_info_t::dump(Formatter *f) const
5402{
5403 f->dump_stream("begin") << begin;
5404 f->dump_stream("end") << end;
5405 f->dump_stream("version") << version;
5406 f->dump_stream("using_gmt") << using_gmt;
5407}
5408
5409void pg_hit_set_info_t::generate_test_instances(list<pg_hit_set_info_t*>& ls)
5410{
5411 ls.push_back(new pg_hit_set_info_t);
5412 ls.push_back(new pg_hit_set_info_t);
5413 ls.back()->begin = utime_t(1, 2);
5414 ls.back()->end = utime_t(3, 4);
5415}
5416
5417
5418// -- pg_hit_set_history_t --
5419
9f95a23c 5420void pg_hit_set_history_t::encode(ceph::buffer::list& bl) const
7c673cae
FG
5421{
5422 ENCODE_START(1, 1, bl);
11fdf7f2 5423 encode(current_last_update, bl);
7c673cae
FG
5424 {
5425 utime_t dummy_stamp;
11fdf7f2 5426 encode(dummy_stamp, bl);
7c673cae
FG
5427 }
5428 {
5429 pg_hit_set_info_t dummy_info;
11fdf7f2 5430 encode(dummy_info, bl);
7c673cae 5431 }
11fdf7f2 5432 encode(history, bl);
7c673cae
FG
5433 ENCODE_FINISH(bl);
5434}
5435
9f95a23c 5436void pg_hit_set_history_t::decode(ceph::buffer::list::const_iterator& p)
7c673cae
FG
5437{
5438 DECODE_START(1, p);
11fdf7f2 5439 decode(current_last_update, p);
7c673cae
FG
5440 {
5441 utime_t dummy_stamp;
11fdf7f2 5442 decode(dummy_stamp, p);
7c673cae
FG
5443 }
5444 {
5445 pg_hit_set_info_t dummy_info;
11fdf7f2 5446 decode(dummy_info, p);
7c673cae 5447 }
11fdf7f2 5448 decode(history, p);
7c673cae
FG
5449 DECODE_FINISH(p);
5450}
5451
5452void pg_hit_set_history_t::dump(Formatter *f) const
5453{
5454 f->dump_stream("current_last_update") << current_last_update;
5455 f->open_array_section("history");
9f95a23c 5456 for (auto p = history.cbegin(); p != history.cend(); ++p) {
7c673cae
FG
5457 f->open_object_section("info");
5458 p->dump(f);
5459 f->close_section();
5460 }
5461 f->close_section();
5462}
5463
5464void pg_hit_set_history_t::generate_test_instances(list<pg_hit_set_history_t*>& ls)
5465{
5466 ls.push_back(new pg_hit_set_history_t);
5467 ls.push_back(new pg_hit_set_history_t);
5468 ls.back()->current_last_update = eversion_t(1, 2);
5469 ls.back()->history.push_back(pg_hit_set_info_t());
5470}
5471
7c673cae
FG
5472// -- OSDSuperblock --
5473
9f95a23c 5474void OSDSuperblock::encode(ceph::buffer::list &bl) const
7c673cae 5475{
9f95a23c 5476 ENCODE_START(9, 5, bl);
11fdf7f2
TL
5477 encode(cluster_fsid, bl);
5478 encode(whoami, bl);
5479 encode(current_epoch, bl);
5480 encode(oldest_map, bl);
5481 encode(newest_map, bl);
5482 encode(weight, bl);
7c673cae 5483 compat_features.encode(bl);
11fdf7f2
TL
5484 encode(clean_thru, bl);
5485 encode(mounted, bl);
5486 encode(osd_fsid, bl);
5487 encode((epoch_t)0, bl); // epoch_t last_epoch_marked_full
5488 encode((uint32_t)0, bl); // map<int64_t,epoch_t> pool_last_epoch_marked_full
9f95a23c
TL
5489 encode(purged_snaps_last, bl);
5490 encode(last_purged_snaps_scrub, bl);
7c673cae
FG
5491 ENCODE_FINISH(bl);
5492}
5493
9f95a23c 5494void OSDSuperblock::decode(ceph::buffer::list::const_iterator &bl)
7c673cae 5495{
9f95a23c 5496 DECODE_START_LEGACY_COMPAT_LEN(9, 5, 5, bl);
7c673cae
FG
5497 if (struct_v < 3) {
5498 string magic;
11fdf7f2
TL
5499 decode(magic, bl);
5500 }
5501 decode(cluster_fsid, bl);
5502 decode(whoami, bl);
5503 decode(current_epoch, bl);
5504 decode(oldest_map, bl);
5505 decode(newest_map, bl);
5506 decode(weight, bl);
7c673cae
FG
5507 if (struct_v >= 2) {
5508 compat_features.decode(bl);
5509 } else { //upgrade it!
5510 compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
5511 }
11fdf7f2
TL
5512 decode(clean_thru, bl);
5513 decode(mounted, bl);
7c673cae 5514 if (struct_v >= 4)
11fdf7f2 5515 decode(osd_fsid, bl);
7c673cae
FG
5516 if (struct_v >= 6) {
5517 epoch_t last_map_marked_full;
11fdf7f2 5518 decode(last_map_marked_full, bl);
7c673cae
FG
5519 }
5520 if (struct_v >= 7) {
5521 map<int64_t,epoch_t> pool_last_map_marked_full;
11fdf7f2 5522 decode(pool_last_map_marked_full, bl);
7c673cae 5523 }
9f95a23c
TL
5524 if (struct_v >= 9) {
5525 decode(purged_snaps_last, bl);
5526 decode(last_purged_snaps_scrub, bl);
5527 } else {
5528 purged_snaps_last = 0;
5529 }
7c673cae
FG
5530 DECODE_FINISH(bl);
5531}
5532
5533void OSDSuperblock::dump(Formatter *f) const
5534{
5535 f->dump_stream("cluster_fsid") << cluster_fsid;
5536 f->dump_stream("osd_fsid") << osd_fsid;
5537 f->dump_int("whoami", whoami);
5538 f->dump_int("current_epoch", current_epoch);
5539 f->dump_int("oldest_map", oldest_map);
5540 f->dump_int("newest_map", newest_map);
5541 f->dump_float("weight", weight);
5542 f->open_object_section("compat");
5543 compat_features.dump(f);
5544 f->close_section();
5545 f->dump_int("clean_thru", clean_thru);
5546 f->dump_int("last_epoch_mounted", mounted);
9f95a23c
TL
5547 f->dump_unsigned("purged_snaps_last", purged_snaps_last);
5548 f->dump_stream("last_purged_snaps_scrub") << last_purged_snaps_scrub;
7c673cae
FG
5549}
5550
5551void OSDSuperblock::generate_test_instances(list<OSDSuperblock*>& o)
5552{
5553 OSDSuperblock z;
5554 o.push_back(new OSDSuperblock(z));
11fdf7f2
TL
5555 z.cluster_fsid.parse("01010101-0101-0101-0101-010101010101");
5556 z.osd_fsid.parse("02020202-0202-0202-0202-020202020202");
7c673cae
FG
5557 z.whoami = 3;
5558 z.current_epoch = 4;
5559 z.oldest_map = 5;
5560 z.newest_map = 9;
5561 z.mounted = 8;
5562 z.clean_thru = 7;
5563 o.push_back(new OSDSuperblock(z));
5564 o.push_back(new OSDSuperblock(z));
5565}
5566
5567// -- SnapSet --
5568
9f95a23c 5569void SnapSet::encode(ceph::buffer::list& bl) const
7c673cae
FG
5570{
5571 ENCODE_START(3, 2, bl);
11fdf7f2
TL
5572 encode(seq, bl);
5573 encode(true, bl); // head_exists
5574 encode(snaps, bl);
5575 encode(clones, bl);
5576 encode(clone_overlap, bl);
5577 encode(clone_size, bl);
5578 encode(clone_snaps, bl);
7c673cae
FG
5579 ENCODE_FINISH(bl);
5580}
5581
9f95a23c 5582void SnapSet::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
5583{
5584 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
11fdf7f2 5585 decode(seq, bl);
9f95a23c 5586 bl += 1u; // skip legacy head_exists (always true)
11fdf7f2
TL
5587 decode(snaps, bl);
5588 decode(clones, bl);
5589 decode(clone_overlap, bl);
5590 decode(clone_size, bl);
7c673cae 5591 if (struct_v >= 3) {
11fdf7f2 5592 decode(clone_snaps, bl);
7c673cae
FG
5593 } else {
5594 clone_snaps.clear();
5595 }
5596 DECODE_FINISH(bl);
5597}
5598
5599void SnapSet::dump(Formatter *f) const
5600{
9f95a23c 5601 f->dump_unsigned("seq", seq);
7c673cae 5602 f->open_array_section("clones");
9f95a23c 5603 for (auto p = clones.cbegin(); p != clones.cend(); ++p) {
7c673cae
FG
5604 f->open_object_section("clone");
5605 f->dump_unsigned("snap", *p);
94b18763
FG
5606 auto cs = clone_size.find(*p);
5607 if (cs != clone_size.end())
5608 f->dump_unsigned("size", cs->second);
5609 else
5610 f->dump_string("size", "????");
5611 auto co = clone_overlap.find(*p);
5612 if (co != clone_overlap.end())
5613 f->dump_stream("overlap") << co->second;
5614 else
5615 f->dump_stream("overlap") << "????";
7c673cae
FG
5616 auto q = clone_snaps.find(*p);
5617 if (q != clone_snaps.end()) {
5618 f->open_array_section("snaps");
5619 for (auto s : q->second) {
5620 f->dump_unsigned("snap", s);
5621 }
5622 f->close_section();
5623 }
5624 f->close_section();
5625 }
5626 f->close_section();
5627}
5628
5629void SnapSet::generate_test_instances(list<SnapSet*>& o)
5630{
5631 o.push_back(new SnapSet);
5632 o.push_back(new SnapSet);
7c673cae
FG
5633 o.back()->seq = 123;
5634 o.back()->snaps.push_back(123);
5635 o.back()->snaps.push_back(12);
5636 o.push_back(new SnapSet);
7c673cae
FG
5637 o.back()->seq = 123;
5638 o.back()->snaps.push_back(123);
5639 o.back()->snaps.push_back(12);
5640 o.back()->clones.push_back(12);
5641 o.back()->clone_size[12] = 12345;
5642 o.back()->clone_overlap[12];
5643 o.back()->clone_snaps[12] = {12, 10, 8};
5644}
5645
5646ostream& operator<<(ostream& out, const SnapSet& cs)
5647{
11fdf7f2
TL
5648 return out << cs.seq << "=" << cs.snaps << ":"
5649 << cs.clone_snaps;
7c673cae
FG
5650}
5651
5652void SnapSet::from_snap_set(const librados::snap_set_t& ss, bool legacy)
5653{
5654 // NOTE: our reconstruction of snaps (and the snapc) is not strictly
5655 // correct: it will not include snaps that still logically exist
5656 // but for which there was no clone that is defined. For all
5657 // practical purposes this doesn't matter, since we only use that
5658 // information to clone on the OSD, and we have already moved
5659 // forward past that part of the object history.
5660
5661 seq = ss.seq;
5662 set<snapid_t> _snaps;
5663 set<snapid_t> _clones;
9f95a23c 5664 for (auto p = ss.clones.cbegin(); p != ss.clones.cend(); ++p) {
11fdf7f2 5665 if (p->cloneid != librados::SNAP_HEAD) {
7c673cae
FG
5666 _clones.insert(p->cloneid);
5667 _snaps.insert(p->snaps.begin(), p->snaps.end());
5668 clone_size[p->cloneid] = p->size;
5669 clone_overlap[p->cloneid]; // the entry must exist, even if it's empty.
9f95a23c 5670 for (auto q = p->overlap.cbegin(); q != p->overlap.cend(); ++q)
7c673cae
FG
5671 clone_overlap[p->cloneid].insert(q->first, q->second);
5672 if (!legacy) {
5673 // p->snaps is ascending; clone_snaps is descending
5674 vector<snapid_t>& v = clone_snaps[p->cloneid];
5675 for (auto q = p->snaps.rbegin(); q != p->snaps.rend(); ++q) {
5676 v.push_back(*q);
5677 }
5678 }
5679 }
5680 }
5681
5682 // ascending
5683 clones.clear();
5684 clones.reserve(_clones.size());
9f95a23c 5685 for (auto p = _clones.begin(); p != _clones.end(); ++p)
7c673cae
FG
5686 clones.push_back(*p);
5687
5688 // descending
5689 snaps.clear();
5690 snaps.reserve(_snaps.size());
9f95a23c 5691 for (auto p = _snaps.rbegin();
7c673cae
FG
5692 p != _snaps.rend(); ++p)
5693 snaps.push_back(*p);
5694}
5695
5696uint64_t SnapSet::get_clone_bytes(snapid_t clone) const
5697{
11fdf7f2 5698 ceph_assert(clone_size.count(clone));
7c673cae 5699 uint64_t size = clone_size.find(clone)->second;
11fdf7f2 5700 ceph_assert(clone_overlap.count(clone));
7c673cae 5701 const interval_set<uint64_t> &overlap = clone_overlap.find(clone)->second;
11fdf7f2
TL
5702 ceph_assert(size >= (uint64_t)overlap.size());
5703 return size - overlap.size();
7c673cae
FG
5704}
5705
5706void SnapSet::filter(const pg_pool_t &pinfo)
5707{
5708 vector<snapid_t> oldsnaps;
5709 oldsnaps.swap(snaps);
9f95a23c 5710 for (auto i = oldsnaps.cbegin(); i != oldsnaps.cend(); ++i) {
7c673cae
FG
5711 if (!pinfo.is_removed_snap(*i))
5712 snaps.push_back(*i);
5713 }
5714}
5715
5716SnapSet SnapSet::get_filtered(const pg_pool_t &pinfo) const
5717{
5718 SnapSet ss = *this;
5719 ss.filter(pinfo);
5720 return ss;
5721}
5722
5723// -- watch_info_t --
5724
9f95a23c 5725void watch_info_t::encode(ceph::buffer::list& bl, uint64_t features) const
7c673cae
FG
5726{
5727 ENCODE_START(4, 3, bl);
11fdf7f2
TL
5728 encode(cookie, bl);
5729 encode(timeout_seconds, bl);
5730 encode(addr, bl, features);
7c673cae
FG
5731 ENCODE_FINISH(bl);
5732}
5733
9f95a23c 5734void watch_info_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
5735{
5736 DECODE_START_LEGACY_COMPAT_LEN(4, 3, 3, bl);
11fdf7f2 5737 decode(cookie, bl);
7c673cae
FG
5738 if (struct_v < 2) {
5739 uint64_t ver;
11fdf7f2 5740 decode(ver, bl);
7c673cae 5741 }
11fdf7f2 5742 decode(timeout_seconds, bl);
7c673cae 5743 if (struct_v >= 4) {
11fdf7f2 5744 decode(addr, bl);
7c673cae
FG
5745 }
5746 DECODE_FINISH(bl);
5747}
5748
5749void watch_info_t::dump(Formatter *f) const
5750{
5751 f->dump_unsigned("cookie", cookie);
5752 f->dump_unsigned("timeout_seconds", timeout_seconds);
5753 f->open_object_section("addr");
5754 addr.dump(f);
5755 f->close_section();
5756}
5757
5758void watch_info_t::generate_test_instances(list<watch_info_t*>& o)
5759{
5760 o.push_back(new watch_info_t);
5761 o.push_back(new watch_info_t);
5762 o.back()->cookie = 123;
5763 o.back()->timeout_seconds = 99;
5764 entity_addr_t ea;
5765 ea.set_type(entity_addr_t::TYPE_LEGACY);
5766 ea.set_nonce(1);
5767 ea.set_family(AF_INET);
5768 ea.set_in4_quad(0, 127);
5769 ea.set_in4_quad(1, 0);
5770 ea.set_in4_quad(2, 1);
5771 ea.set_in4_quad(3, 2);
5772 ea.set_port(2);
5773 o.back()->addr = ea;
5774}
5775
11fdf7f2
TL
5776// -- chunk_info_t --
5777
9f95a23c 5778void chunk_info_t::encode(ceph::buffer::list& bl) const
11fdf7f2
TL
5779{
5780 ENCODE_START(1, 1, bl);
5781 encode(offset, bl);
5782 encode(length, bl);
5783 encode(oid, bl);
5784 __u32 _flags = flags;
5785 encode(_flags, bl);
5786 ENCODE_FINISH(bl);
5787}
5788
9f95a23c 5789void chunk_info_t::decode(ceph::buffer::list::const_iterator& bl)
11fdf7f2
TL
5790{
5791 DECODE_START(1, bl);
5792 decode(offset, bl);
5793 decode(length, bl);
5794 decode(oid, bl);
5795 __u32 _flags;
5796 decode(_flags, bl);
5797 flags = (cflag_t)_flags;
5798 DECODE_FINISH(bl);
5799}
5800
5801void chunk_info_t::dump(Formatter *f) const
5802{
5803 f->dump_unsigned("length", length);
5804 f->open_object_section("oid");
5805 oid.dump(f);
5806 f->close_section();
5807 f->dump_unsigned("flags", flags);
5808}
5809
5810ostream& operator<<(ostream& out, const chunk_info_t& ci)
5811{
5812 return out << "(len: " << ci.length << " oid: " << ci.oid
5813 << " offset: " << ci.offset
5814 << " flags: " << ci.get_flag_string(ci.flags) << ")";
5815}
5816
31f18b77
FG
5817// -- object_manifest_t --
5818
9f95a23c 5819void object_manifest_t::encode(ceph::buffer::list& bl) const
31f18b77
FG
5820{
5821 ENCODE_START(1, 1, bl);
11fdf7f2 5822 encode(type, bl);
31f18b77
FG
5823 switch (type) {
5824 case TYPE_NONE: break;
5825 case TYPE_REDIRECT:
11fdf7f2
TL
5826 encode(redirect_target, bl);
5827 break;
5828 case TYPE_CHUNKED:
9f95a23c 5829 encode(chunk_map, bl);
31f18b77
FG
5830 break;
5831 default:
5832 ceph_abort();
5833 }
5834 ENCODE_FINISH(bl);
5835}
5836
9f95a23c 5837void object_manifest_t::decode(ceph::buffer::list::const_iterator& bl)
31f18b77
FG
5838{
5839 DECODE_START(1, bl);
11fdf7f2 5840 decode(type, bl);
31f18b77
FG
5841 switch (type) {
5842 case TYPE_NONE: break;
5843 case TYPE_REDIRECT:
11fdf7f2
TL
5844 decode(redirect_target, bl);
5845 break;
5846 case TYPE_CHUNKED:
5847 decode(chunk_map, bl);
31f18b77
FG
5848 break;
5849 default:
5850 ceph_abort();
5851 }
5852 DECODE_FINISH(bl);
5853}
5854
5855void object_manifest_t::dump(Formatter *f) const
5856{
5857 f->dump_unsigned("type", type);
11fdf7f2
TL
5858 if (type == TYPE_REDIRECT) {
5859 f->open_object_section("redirect_target");
5860 redirect_target.dump(f);
5861 f->close_section();
5862 } else if (type == TYPE_CHUNKED) {
5863 f->open_array_section("chunk_map");
5864 for (auto& p : chunk_map) {
5865 f->open_object_section("chunk");
5866 f->dump_unsigned("offset", p.first);
5867 p.second.dump(f);
5868 f->close_section();
5869 }
5870 f->close_section();
5871 }
31f18b77
FG
5872}
5873
5874void object_manifest_t::generate_test_instances(list<object_manifest_t*>& o)
5875{
5876 o.push_back(new object_manifest_t());
5877 o.back()->type = TYPE_REDIRECT;
5878}
5879
5880ostream& operator<<(ostream& out, const object_manifest_t& om)
5881{
11fdf7f2
TL
5882 out << "manifest(" << om.get_type_name();
5883 if (om.is_redirect()) {
5884 out << " " << om.redirect_target;
5885 } else if (om.is_chunked()) {
5886 out << " " << om.chunk_map;
5887 }
5888 out << ")";
5889 return out;
31f18b77 5890}
7c673cae
FG
5891
5892// -- object_info_t --
5893
5894void object_info_t::copy_user_bits(const object_info_t& other)
5895{
5896 // these bits are copied from head->clone.
5897 size = other.size;
5898 mtime = other.mtime;
5899 local_mtime = other.local_mtime;
5900 last_reqid = other.last_reqid;
5901 truncate_seq = other.truncate_seq;
5902 truncate_size = other.truncate_size;
5903 flags = other.flags;
5904 user_version = other.user_version;
5905 data_digest = other.data_digest;
5906 omap_digest = other.omap_digest;
5907}
5908
9f95a23c 5909void object_info_t::encode(ceph::buffer::list& bl, uint64_t features) const
7c673cae
FG
5910{
5911 object_locator_t myoloc(soid);
5912 map<entity_name_t, watch_info_t> old_watchers;
9f95a23c 5913 for (auto i = watchers.cbegin(); i != watchers.cend(); ++i) {
7c673cae
FG
5914 old_watchers.insert(make_pair(i->first.second, i->second));
5915 }
31f18b77 5916 ENCODE_START(17, 8, bl);
11fdf7f2
TL
5917 encode(soid, bl);
5918 encode(myoloc, bl); //Retained for compatibility
5919 encode((__u32)0, bl); // was category, no longer used
5920 encode(version, bl);
5921 encode(prior_version, bl);
5922 encode(last_reqid, bl);
5923 encode(size, bl);
5924 encode(mtime, bl);
7c673cae 5925 if (soid.snap == CEPH_NOSNAP)
11fdf7f2 5926 encode(osd_reqid_t(), bl); // used to be wrlock_by
7c673cae 5927 else
11fdf7f2
TL
5928 encode((uint32_t)0, bl); // was legacy_snaps
5929 encode(truncate_seq, bl);
5930 encode(truncate_size, bl);
5931 encode(is_lost(), bl);
5932 encode(old_watchers, bl, features);
7c673cae
FG
5933 /* shenanigans to avoid breaking backwards compatibility in the disk format.
5934 * When we can, switch this out for simply putting the version_t on disk. */
5935 eversion_t user_eversion(0, user_version);
11fdf7f2
TL
5936 encode(user_eversion, bl);
5937 encode(test_flag(FLAG_USES_TMAP), bl);
5938 encode(watchers, bl, features);
7c673cae 5939 __u32 _flags = flags;
11fdf7f2
TL
5940 encode(_flags, bl);
5941 encode(local_mtime, bl);
5942 encode(data_digest, bl);
5943 encode(omap_digest, bl);
5944 encode(expected_object_size, bl);
5945 encode(expected_write_size, bl);
5946 encode(alloc_hint_flags, bl);
31f18b77 5947 if (has_manifest()) {
11fdf7f2 5948 encode(manifest, bl);
31f18b77 5949 }
7c673cae
FG
5950 ENCODE_FINISH(bl);
5951}
5952
9f95a23c 5953void object_info_t::decode(ceph::buffer::list::const_iterator& bl)
7c673cae
FG
5954{
5955 object_locator_t myoloc;
31f18b77 5956 DECODE_START_LEGACY_COMPAT_LEN(17, 8, 8, bl);
7c673cae 5957 map<entity_name_t, watch_info_t> old_watchers;
11fdf7f2
TL
5958 decode(soid, bl);
5959 decode(myoloc, bl);
7c673cae
FG
5960 {
5961 string category;
11fdf7f2 5962 decode(category, bl); // no longer used
7c673cae 5963 }
11fdf7f2
TL
5964 decode(version, bl);
5965 decode(prior_version, bl);
5966 decode(last_reqid, bl);
5967 decode(size, bl);
5968 decode(mtime, bl);
7c673cae
FG
5969 if (soid.snap == CEPH_NOSNAP) {
5970 osd_reqid_t wrlock_by;
11fdf7f2 5971 decode(wrlock_by, bl);
7c673cae 5972 } else {
11fdf7f2
TL
5973 vector<snapid_t> legacy_snaps;
5974 decode(legacy_snaps, bl);
7c673cae 5975 }
11fdf7f2
TL
5976 decode(truncate_seq, bl);
5977 decode(truncate_size, bl);
7c673cae
FG
5978
5979 // if this is struct_v >= 13, we will overwrite this
5980 // below since this field is just here for backwards
5981 // compatibility
5982 __u8 lo;
11fdf7f2 5983 decode(lo, bl);
7c673cae
FG
5984 flags = (flag_t)lo;
5985
11fdf7f2 5986 decode(old_watchers, bl);
7c673cae 5987 eversion_t user_eversion;
11fdf7f2 5988 decode(user_eversion, bl);
7c673cae
FG
5989 user_version = user_eversion.version;
5990
5991 if (struct_v >= 9) {
5992 bool uses_tmap = false;
11fdf7f2 5993 decode(uses_tmap, bl);
7c673cae
FG
5994 if (uses_tmap)
5995 set_flag(FLAG_USES_TMAP);
5996 } else {
5997 set_flag(FLAG_USES_TMAP);
5998 }
5999 if (struct_v < 10)
6000 soid.pool = myoloc.pool;
6001 if (struct_v >= 11) {
11fdf7f2 6002 decode(watchers, bl);
7c673cae 6003 } else {
9f95a23c 6004 for (auto i = old_watchers.begin(); i != old_watchers.end(); ++i) {
7c673cae
FG
6005 watchers.insert(
6006 make_pair(
6007 make_pair(i->second.cookie, i->first), i->second));
6008 }
6009 }
6010 if (struct_v >= 13) {
6011 __u32 _flags;
11fdf7f2 6012 decode(_flags, bl);
7c673cae
FG
6013 flags = (flag_t)_flags;
6014 }
6015 if (struct_v >= 14) {
11fdf7f2 6016 decode(local_mtime, bl);
7c673cae
FG
6017 } else {
6018 local_mtime = utime_t();
6019 }
6020 if (struct_v >= 15) {
11fdf7f2
TL
6021 decode(data_digest, bl);
6022 decode(omap_digest, bl);
7c673cae
FG
6023 } else {
6024 data_digest = omap_digest = -1;
6025 clear_flag(FLAG_DATA_DIGEST);
6026 clear_flag(FLAG_OMAP_DIGEST);
6027 }
6028 if (struct_v >= 16) {
11fdf7f2
TL
6029 decode(expected_object_size, bl);
6030 decode(expected_write_size, bl);
6031 decode(alloc_hint_flags, bl);
7c673cae
FG
6032 } else {
6033 expected_object_size = 0;
6034 expected_write_size = 0;
6035 alloc_hint_flags = 0;
6036 }
31f18b77
FG
6037 if (struct_v >= 17) {
6038 if (has_manifest()) {
11fdf7f2 6039 decode(manifest, bl);
31f18b77
FG
6040 }
6041 }
7c673cae
FG
6042 DECODE_FINISH(bl);
6043}
6044
6045void object_info_t::dump(Formatter *f) const
6046{
6047 f->open_object_section("oid");
6048 soid.dump(f);
6049 f->close_section();
6050 f->dump_stream("version") << version;
6051 f->dump_stream("prior_version") << prior_version;
6052 f->dump_stream("last_reqid") << last_reqid;
6053 f->dump_unsigned("user_version", user_version);
6054 f->dump_unsigned("size", size);
6055 f->dump_stream("mtime") << mtime;
6056 f->dump_stream("local_mtime") << local_mtime;
6057 f->dump_unsigned("lost", (int)is_lost());
94b18763
FG
6058 vector<string> sv = get_flag_vector(flags);
6059 f->open_array_section("flags");
6060 for (auto str: sv)
6061 f->dump_string("flags", str);
6062 f->close_section();
7c673cae
FG
6063 f->dump_unsigned("truncate_seq", truncate_seq);
6064 f->dump_unsigned("truncate_size", truncate_size);
94b18763
FG
6065 f->dump_format("data_digest", "0x%08x", data_digest);
6066 f->dump_format("omap_digest", "0x%08x", omap_digest);
7c673cae
FG
6067 f->dump_unsigned("expected_object_size", expected_object_size);
6068 f->dump_unsigned("expected_write_size", expected_write_size);
6069 f->dump_unsigned("alloc_hint_flags", alloc_hint_flags);
31f18b77 6070 f->dump_object("manifest", manifest);
7c673cae 6071 f->open_object_section("watchers");
9f95a23c 6072 for (auto p = watchers.cbegin(); p != watchers.cend(); ++p) {
7c673cae
FG
6073 stringstream ss;
6074 ss << p->first.second;
6075 f->open_object_section(ss.str().c_str());
6076 p->second.dump(f);
6077 f->close_section();
6078 }
6079 f->close_section();
6080}
6081
6082void object_info_t::generate_test_instances(list<object_info_t*>& o)
6083{
6084 o.push_back(new object_info_t());
6085
6086 // fixme
6087}
6088
6089
6090ostream& operator<<(ostream& out, const object_info_t& oi)
6091{
6092 out << oi.soid << "(" << oi.version
6093 << " " << oi.last_reqid;
7c673cae
FG
6094 if (oi.flags)
6095 out << " " << oi.get_flag_string();
6096 out << " s " << oi.size;
6097 out << " uv " << oi.user_version;
6098 if (oi.is_data_digest())
6099 out << " dd " << std::hex << oi.data_digest << std::dec;
6100 if (oi.is_omap_digest())
6101 out << " od " << std::hex << oi.omap_digest << std::dec;
6102 out << " alloc_hint [" << oi.expected_object_size
6103 << " " << oi.expected_write_size
6104 << " " << oi.alloc_hint_flags << "]";
31f18b77
FG
6105 if (oi.has_manifest())
6106 out << " " << oi.manifest;
7c673cae
FG
6107 out << ")";
6108 return out;
6109}
6110
6111// -- ObjectRecovery --
9f95a23c 6112void ObjectRecoveryProgress::encode(ceph::buffer::list &bl) const
7c673cae
FG
6113{
6114 ENCODE_START(1, 1, bl);
11fdf7f2
TL
6115 encode(first, bl);
6116 encode(data_complete, bl);
6117 encode(data_recovered_to, bl);
6118 encode(omap_recovered_to, bl);
6119 encode(omap_complete, bl);
7c673cae
FG
6120 ENCODE_FINISH(bl);
6121}
6122
9f95a23c 6123void ObjectRecoveryProgress::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
6124{
6125 DECODE_START(1, bl);
11fdf7f2
TL
6126 decode(first, bl);
6127 decode(data_complete, bl);
6128 decode(data_recovered_to, bl);
6129 decode(omap_recovered_to, bl);
6130 decode(omap_complete, bl);
7c673cae
FG
6131 DECODE_FINISH(bl);
6132}
6133
6134ostream &operator<<(ostream &out, const ObjectRecoveryProgress &prog)
6135{
6136 return prog.print(out);
6137}
6138
6139void ObjectRecoveryProgress::generate_test_instances(
6140 list<ObjectRecoveryProgress*>& o)
6141{
6142 o.push_back(new ObjectRecoveryProgress);
6143 o.back()->first = false;
6144 o.back()->data_complete = true;
6145 o.back()->omap_complete = true;
6146 o.back()->data_recovered_to = 100;
6147
6148 o.push_back(new ObjectRecoveryProgress);
6149 o.back()->first = true;
6150 o.back()->data_complete = false;
6151 o.back()->omap_complete = false;
6152 o.back()->data_recovered_to = 0;
6153}
6154
6155ostream &ObjectRecoveryProgress::print(ostream &out) const
6156{
6157 return out << "ObjectRecoveryProgress("
6158 << ( first ? "" : "!" ) << "first, "
6159 << "data_recovered_to:" << data_recovered_to
6160 << ", data_complete:" << ( data_complete ? "true" : "false" )
6161 << ", omap_recovered_to:" << omap_recovered_to
6162 << ", omap_complete:" << ( omap_complete ? "true" : "false" )
224ce89b 6163 << ", error:" << ( error ? "true" : "false" )
7c673cae
FG
6164 << ")";
6165}
6166
6167void ObjectRecoveryProgress::dump(Formatter *f) const
6168{
6169 f->dump_int("first?", first);
6170 f->dump_int("data_complete?", data_complete);
6171 f->dump_unsigned("data_recovered_to", data_recovered_to);
6172 f->dump_int("omap_complete?", omap_complete);
6173 f->dump_string("omap_recovered_to", omap_recovered_to);
6174}
6175
9f95a23c 6176void ObjectRecoveryInfo::encode(ceph::buffer::list &bl, uint64_t features) const
7c673cae 6177{
9f95a23c 6178 ENCODE_START(3, 1, bl);
11fdf7f2
TL
6179 encode(soid, bl);
6180 encode(version, bl);
6181 encode(size, bl);
6182 encode(oi, bl, features);
6183 encode(ss, bl);
6184 encode(copy_subset, bl);
6185 encode(clone_subset, bl);
9f95a23c 6186 encode(object_exist, bl);
7c673cae
FG
6187 ENCODE_FINISH(bl);
6188}
6189
9f95a23c 6190void ObjectRecoveryInfo::decode(ceph::buffer::list::const_iterator &bl,
7c673cae
FG
6191 int64_t pool)
6192{
9f95a23c 6193 DECODE_START(3, bl);
11fdf7f2
TL
6194 decode(soid, bl);
6195 decode(version, bl);
6196 decode(size, bl);
6197 decode(oi, bl);
6198 decode(ss, bl);
6199 decode(copy_subset, bl);
6200 decode(clone_subset, bl);
9f95a23c
TL
6201 if (struct_v > 2)
6202 decode(object_exist, bl);
6203 else
6204 object_exist = false;
7c673cae 6205 DECODE_FINISH(bl);
7c673cae
FG
6206 if (struct_v < 2) {
6207 if (!soid.is_max() && soid.pool == -1)
6208 soid.pool = pool;
6209 map<hobject_t, interval_set<uint64_t>> tmp;
6210 tmp.swap(clone_subset);
9f95a23c 6211 for (auto i = tmp.begin(); i != tmp.end(); ++i) {
7c673cae
FG
6212 hobject_t first(i->first);
6213 if (!first.is_max() && first.pool == -1)
6214 first.pool = pool;
6215 clone_subset[first].swap(i->second);
6216 }
6217 }
6218}
6219
6220void ObjectRecoveryInfo::generate_test_instances(
6221 list<ObjectRecoveryInfo*>& o)
6222{
6223 o.push_back(new ObjectRecoveryInfo);
6224 o.back()->soid = hobject_t(sobject_t("key", CEPH_NOSNAP));
6225 o.back()->version = eversion_t(0,0);
6226 o.back()->size = 100;
9f95a23c 6227 o.back()->object_exist = false;
7c673cae
FG
6228}
6229
6230
6231void ObjectRecoveryInfo::dump(Formatter *f) const
6232{
6233 f->dump_stream("object") << soid;
6234 f->dump_stream("at_version") << version;
6235 f->dump_stream("size") << size;
6236 {
6237 f->open_object_section("object_info");
6238 oi.dump(f);
6239 f->close_section();
6240 }
6241 {
6242 f->open_object_section("snapset");
6243 ss.dump(f);
6244 f->close_section();
6245 }
6246 f->dump_stream("copy_subset") << copy_subset;
6247 f->dump_stream("clone_subset") << clone_subset;
9f95a23c 6248 f->dump_stream("object_exist") << object_exist;
7c673cae
FG
6249}
6250
6251ostream& operator<<(ostream& out, const ObjectRecoveryInfo &inf)
6252{
6253 return inf.print(out);
6254}
6255
6256ostream &ObjectRecoveryInfo::print(ostream &out) const
6257{
6258 return out << "ObjectRecoveryInfo("
6259 << soid << "@" << version
6260 << ", size: " << size
6261 << ", copy_subset: " << copy_subset
6262 << ", clone_subset: " << clone_subset
6263 << ", snapset: " << ss
9f95a23c 6264 << ", object_exist: " << object_exist
7c673cae
FG
6265 << ")";
6266}
6267
6268// -- PushReplyOp --
6269void PushReplyOp::generate_test_instances(list<PushReplyOp*> &o)
6270{
6271 o.push_back(new PushReplyOp);
6272 o.push_back(new PushReplyOp);
6273 o.back()->soid = hobject_t(sobject_t("asdf", 2));
6274 o.push_back(new PushReplyOp);
6275 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
6276}
6277
9f95a23c 6278void PushReplyOp::encode(ceph::buffer::list &bl) const
7c673cae
FG
6279{
6280 ENCODE_START(1, 1, bl);
11fdf7f2 6281 encode(soid, bl);
7c673cae
FG
6282 ENCODE_FINISH(bl);
6283}
6284
9f95a23c 6285void PushReplyOp::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
6286{
6287 DECODE_START(1, bl);
11fdf7f2 6288 decode(soid, bl);
7c673cae
FG
6289 DECODE_FINISH(bl);
6290}
6291
6292void PushReplyOp::dump(Formatter *f) const
6293{
6294 f->dump_stream("soid") << soid;
6295}
6296
6297ostream &PushReplyOp::print(ostream &out) const
6298{
6299 return out
6300 << "PushReplyOp(" << soid
6301 << ")";
6302}
6303
6304ostream& operator<<(ostream& out, const PushReplyOp &op)
6305{
6306 return op.print(out);
6307}
6308
6309uint64_t PushReplyOp::cost(CephContext *cct) const
6310{
6311
6312 return cct->_conf->osd_push_per_object_cost +
6313 cct->_conf->osd_recovery_max_chunk;
6314}
6315
6316// -- PullOp --
6317void PullOp::generate_test_instances(list<PullOp*> &o)
6318{
6319 o.push_back(new PullOp);
6320 o.push_back(new PullOp);
6321 o.back()->soid = hobject_t(sobject_t("asdf", 2));
6322 o.back()->recovery_info.version = eversion_t(3, 10);
6323 o.push_back(new PullOp);
6324 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
6325 o.back()->recovery_info.version = eversion_t(0, 0);
6326}
6327
9f95a23c 6328void PullOp::encode(ceph::buffer::list &bl, uint64_t features) const
7c673cae
FG
6329{
6330 ENCODE_START(1, 1, bl);
11fdf7f2
TL
6331 encode(soid, bl);
6332 encode(recovery_info, bl, features);
6333 encode(recovery_progress, bl);
7c673cae
FG
6334 ENCODE_FINISH(bl);
6335}
6336
9f95a23c 6337void PullOp::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
6338{
6339 DECODE_START(1, bl);
11fdf7f2
TL
6340 decode(soid, bl);
6341 decode(recovery_info, bl);
6342 decode(recovery_progress, bl);
7c673cae
FG
6343 DECODE_FINISH(bl);
6344}
6345
6346void PullOp::dump(Formatter *f) const
6347{
6348 f->dump_stream("soid") << soid;
6349 {
6350 f->open_object_section("recovery_info");
6351 recovery_info.dump(f);
6352 f->close_section();
6353 }
6354 {
6355 f->open_object_section("recovery_progress");
6356 recovery_progress.dump(f);
6357 f->close_section();
6358 }
6359}
6360
6361ostream &PullOp::print(ostream &out) const
6362{
6363 return out
6364 << "PullOp(" << soid
6365 << ", recovery_info: " << recovery_info
6366 << ", recovery_progress: " << recovery_progress
6367 << ")";
6368}
6369
6370ostream& operator<<(ostream& out, const PullOp &op)
6371{
6372 return op.print(out);
6373}
6374
6375uint64_t PullOp::cost(CephContext *cct) const
6376{
6377 return cct->_conf->osd_push_per_object_cost +
6378 cct->_conf->osd_recovery_max_chunk;
6379}
6380
6381// -- PushOp --
6382void PushOp::generate_test_instances(list<PushOp*> &o)
6383{
6384 o.push_back(new PushOp);
6385 o.push_back(new PushOp);
6386 o.back()->soid = hobject_t(sobject_t("asdf", 2));
6387 o.back()->version = eversion_t(3, 10);
6388 o.push_back(new PushOp);
6389 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
6390 o.back()->version = eversion_t(0, 0);
6391}
6392
9f95a23c 6393void PushOp::encode(ceph::buffer::list &bl, uint64_t features) const
7c673cae
FG
6394{
6395 ENCODE_START(1, 1, bl);
11fdf7f2
TL
6396 encode(soid, bl);
6397 encode(version, bl);
6398 encode(data, bl);
6399 encode(data_included, bl);
6400 encode(omap_header, bl);
6401 encode(omap_entries, bl);
6402 encode(attrset, bl);
6403 encode(recovery_info, bl, features);
6404 encode(after_progress, bl);
6405 encode(before_progress, bl);
7c673cae
FG
6406 ENCODE_FINISH(bl);
6407}
6408
9f95a23c 6409void PushOp::decode(ceph::buffer::list::const_iterator &bl)
7c673cae
FG
6410{
6411 DECODE_START(1, bl);
11fdf7f2
TL
6412 decode(soid, bl);
6413 decode(version, bl);
6414 decode(data, bl);
6415 decode(data_included, bl);
6416 decode(omap_header, bl);
6417 decode(omap_entries, bl);
6418 decode(attrset, bl);
6419 decode(recovery_info, bl);
6420 decode(after_progress, bl);
6421 decode(before_progress, bl);
7c673cae
FG
6422 DECODE_FINISH(bl);
6423}
6424
6425void PushOp::dump(Formatter *f) const
6426{
6427 f->dump_stream("soid") << soid;
6428 f->dump_stream("version") << version;
6429 f->dump_int("data_len", data.length());
6430 f->dump_stream("data_included") << data_included;
6431 f->dump_int("omap_header_len", omap_header.length());
6432 f->dump_int("omap_entries_len", omap_entries.size());
6433 f->dump_int("attrset_len", attrset.size());
6434 {
6435 f->open_object_section("recovery_info");
6436 recovery_info.dump(f);
6437 f->close_section();
6438 }
6439 {
6440 f->open_object_section("after_progress");
6441 after_progress.dump(f);
6442 f->close_section();
6443 }
6444 {
6445 f->open_object_section("before_progress");
6446 before_progress.dump(f);
6447 f->close_section();
6448 }
6449}
6450
6451ostream &PushOp::print(ostream &out) const
6452{
6453 return out
6454 << "PushOp(" << soid
6455 << ", version: " << version
6456 << ", data_included: " << data_included
6457 << ", data_size: " << data.length()
6458 << ", omap_header_size: " << omap_header.length()
6459 << ", omap_entries_size: " << omap_entries.size()
6460 << ", attrset_size: " << attrset.size()
6461 << ", recovery_info: " << recovery_info
6462 << ", after_progress: " << after_progress
6463 << ", before_progress: " << before_progress
6464 << ")";
6465}
6466
6467ostream& operator<<(ostream& out, const PushOp &op)
6468{
6469 return op.print(out);
6470}
6471
6472uint64_t PushOp::cost(CephContext *cct) const
6473{
6474 uint64_t cost = data_included.size();
9f95a23c 6475 for (auto i = omap_entries.cbegin(); i != omap_entries.cend(); ++i) {
7c673cae
FG
6476 cost += i->second.length();
6477 }
6478 cost += cct->_conf->osd_push_per_object_cost;
6479 return cost;
6480}
6481
6482// -- ScrubMap --
6483
6484void ScrubMap::merge_incr(const ScrubMap &l)
6485{
11fdf7f2 6486 ceph_assert(valid_through == l.incr_since);
7c673cae
FG
6487 valid_through = l.valid_through;
6488
9f95a23c 6489 for (auto p = l.objects.cbegin(); p != l.objects.cend(); ++p){
7c673cae 6490 if (p->second.negative) {
9f95a23c 6491 auto q = objects.find(p->first);
7c673cae
FG
6492 if (q != objects.end()) {
6493 objects.erase(q);
6494 }
6495 } else {
6496 objects[p->first] = p->second;
6497 }
6498 }
6499}
6500
9f95a23c 6501void ScrubMap::encode(ceph::buffer::list& bl) const
7c673cae
FG
6502{
6503 ENCODE_START(3, 2, bl);
11fdf7f2
TL
6504 encode(objects, bl);
6505 encode((__u32)0, bl); // used to be attrs; now deprecated
9f95a23c 6506 ceph::buffer::list old_logbl; // not used
11fdf7f2
TL
6507 encode(old_logbl, bl);
6508 encode(valid_through, bl);
6509 encode(incr_since, bl);
7c673cae
FG
6510 ENCODE_FINISH(bl);
6511}
6512
9f95a23c 6513void ScrubMap::decode(ceph::buffer::list::const_iterator& bl, int64_t pool)
7c673cae
FG
6514{
6515 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
11fdf7f2 6516 decode(objects, bl);
7c673cae
FG
6517 {
6518 map<string,string> attrs; // deprecated
11fdf7f2 6519 decode(attrs, bl);
7c673cae 6520 }
9f95a23c 6521 ceph::buffer::list old_logbl; // not used
11fdf7f2
TL
6522 decode(old_logbl, bl);
6523 decode(valid_through, bl);
6524 decode(incr_since, bl);
7c673cae
FG
6525 DECODE_FINISH(bl);
6526
6527 // handle hobject_t upgrade
6528 if (struct_v < 3) {
6529 map<hobject_t, object> tmp;
6530 tmp.swap(objects);
9f95a23c 6531 for (auto i = tmp.begin(); i != tmp.end(); ++i) {
7c673cae
FG
6532 hobject_t first(i->first);
6533 if (!first.is_max() && first.pool == -1)
6534 first.pool = pool;
6535 objects[first] = i->second;
6536 }
6537 }
6538}
6539
6540void ScrubMap::dump(Formatter *f) const
6541{
6542 f->dump_stream("valid_through") << valid_through;
6543 f->dump_stream("incremental_since") << incr_since;
6544 f->open_array_section("objects");
9f95a23c 6545 for (auto p = objects.cbegin(); p != objects.cend(); ++p) {
7c673cae
FG
6546 f->open_object_section("object");
6547 f->dump_string("name", p->first.oid.name);
6548 f->dump_unsigned("hash", p->first.get_hash());
6549 f->dump_string("key", p->first.get_key());
6550 f->dump_int("snapid", p->first.snap);
6551 p->second.dump(f);
6552 f->close_section();
6553 }
6554 f->close_section();
6555}
6556
6557void ScrubMap::generate_test_instances(list<ScrubMap*>& o)
6558{
6559 o.push_back(new ScrubMap);
6560 o.push_back(new ScrubMap);
6561 o.back()->valid_through = eversion_t(1, 2);
6562 o.back()->incr_since = eversion_t(3, 4);
6563 list<object*> obj;
6564 object::generate_test_instances(obj);
6565 o.back()->objects[hobject_t(object_t("foo"), "fookey", 123, 456, 0, "")] = *obj.back();
6566 obj.pop_back();
6567 o.back()->objects[hobject_t(object_t("bar"), string(), 123, 456, 0, "")] = *obj.back();
6568}
6569
6570// -- ScrubMap::object --
6571
9f95a23c 6572void ScrubMap::object::encode(ceph::buffer::list& bl) const
7c673cae
FG
6573{
6574 bool compat_read_error = read_error || ec_hash_mismatch || ec_size_mismatch;
11fdf7f2
TL
6575 ENCODE_START(10, 7, bl);
6576 encode(size, bl);
6577 encode(negative, bl);
6578 encode(attrs, bl);
6579 encode(digest, bl);
6580 encode(digest_present, bl);
6581 encode((uint32_t)0, bl); // obsolete nlinks
6582 encode((uint32_t)0, bl); // snapcolls
6583 encode(omap_digest, bl);
6584 encode(omap_digest_present, bl);
6585 encode(compat_read_error, bl);
6586 encode(stat_error, bl);
6587 encode(read_error, bl);
6588 encode(ec_hash_mismatch, bl);
6589 encode(ec_size_mismatch, bl);
6590 encode(large_omap_object_found, bl);
6591 encode(large_omap_object_key_count, bl);
6592 encode(large_omap_object_value_size, bl);
6593 encode(object_omap_bytes, bl);
6594 encode(object_omap_keys, bl);
7c673cae
FG
6595 ENCODE_FINISH(bl);
6596}
6597
9f95a23c 6598void ScrubMap::object::decode(ceph::buffer::list::const_iterator& bl)
7c673cae 6599{
11fdf7f2
TL
6600 DECODE_START(10, bl);
6601 decode(size, bl);
7c673cae 6602 bool tmp, compat_read_error = false;
11fdf7f2 6603 decode(tmp, bl);
7c673cae 6604 negative = tmp;
11fdf7f2
TL
6605 decode(attrs, bl);
6606 decode(digest, bl);
6607 decode(tmp, bl);
7c673cae
FG
6608 digest_present = tmp;
6609 {
6610 uint32_t nlinks;
11fdf7f2 6611 decode(nlinks, bl);
7c673cae 6612 set<snapid_t> snapcolls;
11fdf7f2 6613 decode(snapcolls, bl);
7c673cae 6614 }
11fdf7f2
TL
6615 decode(omap_digest, bl);
6616 decode(tmp, bl);
7c673cae 6617 omap_digest_present = tmp;
11fdf7f2
TL
6618 decode(compat_read_error, bl);
6619 decode(tmp, bl);
7c673cae
FG
6620 stat_error = tmp;
6621 if (struct_v >= 8) {
11fdf7f2 6622 decode(tmp, bl);
7c673cae 6623 read_error = tmp;
11fdf7f2 6624 decode(tmp, bl);
7c673cae 6625 ec_hash_mismatch = tmp;
11fdf7f2 6626 decode(tmp, bl);
7c673cae
FG
6627 ec_size_mismatch = tmp;
6628 }
6629 // If older encoder found a read_error, set read_error
6630 if (compat_read_error && !read_error && !ec_hash_mismatch && !ec_size_mismatch)
6631 read_error = true;
28e407b8 6632 if (struct_v >= 9) {
11fdf7f2 6633 decode(tmp, bl);
28e407b8 6634 large_omap_object_found = tmp;
11fdf7f2
TL
6635 decode(large_omap_object_key_count, bl);
6636 decode(large_omap_object_value_size, bl);
6637 }
6638 if (struct_v >= 10) {
6639 decode(object_omap_bytes, bl);
6640 decode(object_omap_keys, bl);
28e407b8 6641 }
7c673cae
FG
6642 DECODE_FINISH(bl);
6643}
6644
6645void ScrubMap::object::dump(Formatter *f) const
6646{
6647 f->dump_int("size", size);
6648 f->dump_int("negative", negative);
6649 f->open_array_section("attrs");
9f95a23c 6650 for (auto p = attrs.cbegin(); p != attrs.cend(); ++p) {
7c673cae
FG
6651 f->open_object_section("attr");
6652 f->dump_string("name", p->first);
6653 f->dump_int("length", p->second.length());
6654 f->close_section();
6655 }
6656 f->close_section();
6657}
6658
6659void ScrubMap::object::generate_test_instances(list<object*>& o)
6660{
6661 o.push_back(new object);
6662 o.push_back(new object);
6663 o.back()->negative = true;
6664 o.push_back(new object);
6665 o.back()->size = 123;
9f95a23c
TL
6666 o.back()->attrs["foo"] = ceph::buffer::copy("foo", 3);
6667 o.back()->attrs["bar"] = ceph::buffer::copy("barval", 6);
7c673cae
FG
6668}
6669
6670// -- OSDOp --
6671
6672ostream& operator<<(ostream& out, const OSDOp& op)
6673{
6674 out << ceph_osd_op_name(op.op.op);
6675 if (ceph_osd_op_type_data(op.op.op)) {
6676 // data extent
6677 switch (op.op.op) {
6678 case CEPH_OSD_OP_ASSERT_VER:
6679 out << " v" << op.op.assert_ver.ver;
6680 break;
6681 case CEPH_OSD_OP_TRUNCATE:
6682 out << " " << op.op.extent.offset;
6683 break;
6684 case CEPH_OSD_OP_MASKTRUNC:
6685 case CEPH_OSD_OP_TRIMTRUNC:
6686 out << " " << op.op.extent.truncate_seq << "@"
6687 << (int64_t)op.op.extent.truncate_size;
6688 break;
6689 case CEPH_OSD_OP_ROLLBACK:
6690 out << " " << snapid_t(op.op.snap.snapid);
6691 break;
6692 case CEPH_OSD_OP_WATCH:
6693 out << " " << ceph_osd_watch_op_name(op.op.watch.op)
6694 << " cookie " << op.op.watch.cookie;
6695 if (op.op.watch.gen)
6696 out << " gen " << op.op.watch.gen;
6697 break;
6698 case CEPH_OSD_OP_NOTIFY:
7c673cae
FG
6699 out << " cookie " << op.op.notify.cookie;
6700 break;
6701 case CEPH_OSD_OP_COPY_GET:
6702 out << " max " << op.op.copy_get.max;
6703 break;
6704 case CEPH_OSD_OP_COPY_FROM:
6705 out << " ver " << op.op.copy_from.src_version;
6706 break;
6707 case CEPH_OSD_OP_SETALLOCHINT:
6708 out << " object_size " << op.op.alloc_hint.expected_object_size
6709 << " write_size " << op.op.alloc_hint.expected_write_size;
6710 break;
6711 case CEPH_OSD_OP_READ:
6712 case CEPH_OSD_OP_SPARSE_READ:
6713 case CEPH_OSD_OP_SYNC_READ:
6714 case CEPH_OSD_OP_WRITE:
6715 case CEPH_OSD_OP_WRITEFULL:
6716 case CEPH_OSD_OP_ZERO:
6717 case CEPH_OSD_OP_APPEND:
6718 case CEPH_OSD_OP_MAPEXT:
11fdf7f2 6719 case CEPH_OSD_OP_CMPEXT:
7c673cae
FG
6720 out << " " << op.op.extent.offset << "~" << op.op.extent.length;
6721 if (op.op.extent.truncate_seq)
6722 out << " [" << op.op.extent.truncate_seq << "@"
6723 << (int64_t)op.op.extent.truncate_size << "]";
6724 if (op.op.flags)
6725 out << " [" << ceph_osd_op_flag_string(op.op.flags) << "]";
6726 default:
6727 // don't show any arg info
6728 break;
6729 }
6730 } else if (ceph_osd_op_type_attr(op.op.op)) {
6731 // xattr name
6732 if (op.op.xattr.name_len && op.indata.length()) {
6733 out << " ";
6734 op.indata.write(0, op.op.xattr.name_len, out);
6735 }
6736 if (op.op.xattr.value_len)
6737 out << " (" << op.op.xattr.value_len << ")";
6738 if (op.op.op == CEPH_OSD_OP_CMPXATTR)
6739 out << " op " << (int)op.op.xattr.cmp_op
6740 << " mode " << (int)op.op.xattr.cmp_mode;
6741 } else if (ceph_osd_op_type_exec(op.op.op)) {
6742 // class.method
6743 if (op.op.cls.class_len && op.indata.length()) {
6744 out << " ";
6745 op.indata.write(0, op.op.cls.class_len, out);
6746 out << ".";
6747 op.indata.write(op.op.cls.class_len, op.op.cls.method_len, out);
6748 }
6749 } else if (ceph_osd_op_type_pg(op.op.op)) {
6750 switch (op.op.op) {
6751 case CEPH_OSD_OP_PGLS:
6752 case CEPH_OSD_OP_PGLS_FILTER:
6753 case CEPH_OSD_OP_PGNLS:
6754 case CEPH_OSD_OP_PGNLS_FILTER:
6755 out << " start_epoch " << op.op.pgls.start_epoch;
6756 break;
6757 case CEPH_OSD_OP_PG_HITSET_LS:
6758 break;
6759 case CEPH_OSD_OP_PG_HITSET_GET:
6760 out << " " << utime_t(op.op.hit_set_get.stamp);
6761 break;
6762 case CEPH_OSD_OP_SCRUBLS:
6763 break;
6764 }
6765 }
9f95a23c
TL
6766 if (op.indata.length()) {
6767 out << " in=" << op.indata.length() << "b";
6768 }
6769 if (op.outdata.length()) {
6770 out << " out=" << op.outdata.length() << "b";
6771 }
7c673cae
FG
6772 return out;
6773}
6774
6775
9f95a23c 6776void OSDOp::split_osd_op_vector_in_data(vector<OSDOp>& ops, ceph::buffer::list& in)
7c673cae 6777{
9f95a23c 6778 ceph::buffer::list::iterator datap = in.begin();
7c673cae
FG
6779 for (unsigned i = 0; i < ops.size(); i++) {
6780 if (ops[i].op.payload_len) {
6781 datap.copy(ops[i].op.payload_len, ops[i].indata);
6782 }
6783 }
6784}
6785
9f95a23c 6786void OSDOp::merge_osd_op_vector_in_data(vector<OSDOp>& ops, ceph::buffer::list& out)
7c673cae
FG
6787{
6788 for (unsigned i = 0; i < ops.size(); i++) {
6789 if (ops[i].indata.length()) {
6790 ops[i].op.payload_len = ops[i].indata.length();
6791 out.append(ops[i].indata);
6792 }
6793 }
6794}
6795
9f95a23c 6796void OSDOp::split_osd_op_vector_out_data(vector<OSDOp>& ops, ceph::buffer::list& in)
7c673cae 6797{
9f95a23c 6798 auto datap = in.begin();
7c673cae
FG
6799 for (unsigned i = 0; i < ops.size(); i++) {
6800 if (ops[i].op.payload_len) {
6801 datap.copy(ops[i].op.payload_len, ops[i].outdata);
6802 }
6803 }
6804}
6805
9f95a23c 6806void OSDOp::merge_osd_op_vector_out_data(vector<OSDOp>& ops, ceph::buffer::list& out)
7c673cae
FG
6807{
6808 for (unsigned i = 0; i < ops.size(); i++) {
9f95a23c 6809 ops[i].op.payload_len = ops[i].outdata.length();
7c673cae 6810 if (ops[i].outdata.length()) {
7c673cae
FG
6811 out.append(ops[i].outdata);
6812 }
6813 }
6814}
6815
224ce89b
WB
6816void OSDOp::clear_data(vector<OSDOp>& ops)
6817{
6818 for (unsigned i = 0; i < ops.size(); i++) {
6819 OSDOp& op = ops[i];
6820 op.outdata.clear();
6821 if (ceph_osd_op_type_attr(op.op.op) &&
6822 op.op.xattr.name_len &&
6823 op.indata.length() >= op.op.xattr.name_len) {
9f95a23c
TL
6824 ceph::buffer::list bl;
6825 bl.push_back(ceph::buffer::ptr_node::create(op.op.xattr.name_len));
6826 bl.begin().copy_in(op.op.xattr.name_len, op.indata);
224ce89b
WB
6827 op.indata.claim(bl);
6828 } else if (ceph_osd_op_type_exec(op.op.op) &&
6829 op.op.cls.class_len &&
6830 op.indata.length() >
6831 (op.op.cls.class_len + op.op.cls.method_len)) {
6832 __u8 len = op.op.cls.class_len + op.op.cls.method_len;
9f95a23c
TL
6833 ceph::buffer::list bl;
6834 bl.push_back(ceph::buffer::ptr_node::create(len));
6835 bl.begin().copy_in(len, op.indata);
224ce89b
WB
6836 op.indata.claim(bl);
6837 } else {
6838 op.indata.clear();
6839 }
6840 }
6841}
9f95a23c
TL
6842
6843int prepare_info_keymap(
6844 CephContext* cct,
6845 map<string,bufferlist> *km,
6846 string *key_to_remove,
6847 epoch_t epoch,
6848 pg_info_t &info,
6849 pg_info_t &last_written_info,
6850 PastIntervals &past_intervals,
6851 bool dirty_big_info,
6852 bool dirty_epoch,
6853 bool try_fast_info,
6854 PerfCounters *logger,
6855 DoutPrefixProvider *dpp)
6856{
6857 if (dirty_epoch) {
6858 encode(epoch, (*km)[string(epoch_key)]);
6859 }
6860
6861 if (logger)
6862 logger->inc(l_osd_pg_info);
6863
6864 // try to do info efficiently?
6865 if (!dirty_big_info && try_fast_info &&
6866 info.last_update > last_written_info.last_update) {
6867 pg_fast_info_t fast;
6868 fast.populate_from(info);
6869 bool did = fast.try_apply_to(&last_written_info);
6870 ceph_assert(did); // we verified last_update increased above
6871 if (info == last_written_info) {
6872 encode(fast, (*km)[string(fastinfo_key)]);
6873 if (logger)
6874 logger->inc(l_osd_pg_fastinfo);
6875 return 0;
6876 }
6877 if (dpp) {
6878 ldpp_dout(dpp, 30) << __func__ << " fastinfo failed, info:\n";
6879 {
6880 JSONFormatter jf(true);
6881 jf.dump_object("info", info);
6882 jf.flush(*_dout);
6883 }
6884 {
6885 *_dout << "\nlast_written_info:\n";
6886 JSONFormatter jf(true);
6887 jf.dump_object("last_written_info", last_written_info);
6888 jf.flush(*_dout);
6889 }
6890 *_dout << dendl;
6891 }
6892 } else if (info.last_update <= last_written_info.last_update) {
6893 // clean up any potentially stale fastinfo key resulting from last_update
6894 // not moving forwards (e.g., a backwards jump during peering)
6895 *key_to_remove = fastinfo_key;
6896 }
6897
6898 last_written_info = info;
6899
6900 // info. store purged_snaps separately.
6901 interval_set<snapid_t> purged_snaps;
6902 purged_snaps.swap(info.purged_snaps);
6903 encode(info, (*km)[string(info_key)]);
6904 purged_snaps.swap(info.purged_snaps);
6905
6906 if (dirty_big_info) {
6907 // potentially big stuff
6908 bufferlist& bigbl = (*km)[string(biginfo_key)];
6909 encode(past_intervals, bigbl);
6910 encode(info.purged_snaps, bigbl);
6911 //dout(20) << "write_info bigbl " << bigbl.length() << dendl;
6912 if (logger)
6913 logger->inc(l_osd_pg_biginfo);
6914 }
6915
6916 return 0;
6917}
6918
6919void create_pg_collection(
6920 ceph::os::Transaction& t, spg_t pgid, int bits)
6921{
6922 coll_t coll(pgid);
6923 t.create_collection(coll, bits);
6924}
6925
6926void init_pg_ondisk(
6927 ceph::os::Transaction& t,
6928 spg_t pgid,
6929 const pg_pool_t *pool)
6930{
6931 coll_t coll(pgid);
6932 if (pool) {
6933 // Give a hint to the PG collection
6934 bufferlist hint;
6935 uint32_t pg_num = pool->get_pg_num();
6936 uint64_t expected_num_objects_pg = pool->expected_num_objects / pg_num;
6937 encode(pg_num, hint);
6938 encode(expected_num_objects_pg, hint);
6939 uint32_t hint_type = ceph::os::Transaction::COLL_HINT_EXPECTED_NUM_OBJECTS;
6940 t.collection_hint(coll, hint_type, hint);
6941 }
6942
6943 ghobject_t pgmeta_oid(pgid.make_pgmeta_oid());
6944 t.touch(coll, pgmeta_oid);
6945 map<string,bufferlist> values;
6946 __u8 struct_v = pg_latest_struct_v;
6947 encode(struct_v, values[string(infover_key)]);
6948 t.omap_setkeys(coll, pgmeta_oid, values);
6949}
6950
6951PGLSFilter::PGLSFilter() : cct(nullptr)
6952{
6953}
6954
6955PGLSFilter::~PGLSFilter()
6956{
6957}
6958
6959int PGLSPlainFilter::init(ceph::bufferlist::const_iterator &params)
6960{
6961 try {
6962 decode(xattr, params);
6963 decode(val, params);
6964 } catch (buffer::error &e) {
6965 return -EINVAL;
6966 }
6967 return 0;
6968}
6969
6970bool PGLSPlainFilter::filter(const hobject_t& obj,
6971 const ceph::bufferlist& xattr_data) const
6972{
6973 return xattr_data.contents_equal(val.c_str(), val.size());
6974}