]> git.proxmox.com Git - ceph.git/blame - ceph/src/osd/osd_types.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / osd / osd_types.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2011 New Dream Network
7 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License version 2.1, as published by the Free Software
14 * Foundation. See file COPYING.
15 *
16 */
17
18#include <boost/assign/list_of.hpp>
19
20#include "osd_types.h"
21#include "include/ceph_features.h"
11fdf7f2 22#include "include/stringify.h"
7c673cae
FG
23extern "C" {
24#include "crush/hash.h"
25}
7c673cae 26#include "OSDMap.h"
7c673cae
FG
27
28const char *ceph_osd_flag_name(unsigned flag)
29{
30 switch (flag) {
31 case CEPH_OSD_FLAG_ACK: return "ack";
32 case CEPH_OSD_FLAG_ONNVRAM: return "onnvram";
33 case CEPH_OSD_FLAG_ONDISK: return "ondisk";
34 case CEPH_OSD_FLAG_RETRY: return "retry";
35 case CEPH_OSD_FLAG_READ: return "read";
36 case CEPH_OSD_FLAG_WRITE: return "write";
37 case CEPH_OSD_FLAG_ORDERSNAP: return "ordersnap";
38 case CEPH_OSD_FLAG_PEERSTAT_OLD: return "peerstat_old";
39 case CEPH_OSD_FLAG_BALANCE_READS: return "balance_reads";
40 case CEPH_OSD_FLAG_PARALLELEXEC: return "parallelexec";
41 case CEPH_OSD_FLAG_PGOP: return "pgop";
42 case CEPH_OSD_FLAG_EXEC: return "exec";
43 case CEPH_OSD_FLAG_EXEC_PUBLIC: return "exec_public";
44 case CEPH_OSD_FLAG_LOCALIZE_READS: return "localize_reads";
45 case CEPH_OSD_FLAG_RWORDERED: return "rwordered";
46 case CEPH_OSD_FLAG_IGNORE_CACHE: return "ignore_cache";
47 case CEPH_OSD_FLAG_SKIPRWLOCKS: return "skiprwlocks";
48 case CEPH_OSD_FLAG_IGNORE_OVERLAY: return "ignore_overlay";
49 case CEPH_OSD_FLAG_FLUSH: return "flush";
50 case CEPH_OSD_FLAG_MAP_SNAP_CLONE: return "map_snap_clone";
51 case CEPH_OSD_FLAG_ENFORCE_SNAPC: return "enforce_snapc";
52 case CEPH_OSD_FLAG_REDIRECTED: return "redirected";
53 case CEPH_OSD_FLAG_KNOWN_REDIR: return "known_if_redirected";
54 case CEPH_OSD_FLAG_FULL_TRY: return "full_try";
55 case CEPH_OSD_FLAG_FULL_FORCE: return "full_force";
224ce89b 56 case CEPH_OSD_FLAG_IGNORE_REDIRECT: return "ignore_redirect";
7c673cae
FG
57 default: return "???";
58 }
59}
60
61string ceph_osd_flag_string(unsigned flags)
62{
63 string s;
64 for (unsigned i=0; i<32; ++i) {
65 if (flags & (1u<<i)) {
66 if (s.length())
67 s += "+";
68 s += ceph_osd_flag_name(1u << i);
69 }
70 }
71 if (s.length())
72 return s;
73 return string("-");
74}
75
76const char * ceph_osd_op_flag_name(unsigned flag)
77{
78 const char *name;
79
80 switch(flag) {
81 case CEPH_OSD_OP_FLAG_EXCL:
82 name = "excl";
83 break;
84 case CEPH_OSD_OP_FLAG_FAILOK:
85 name = "failok";
86 break;
87 case CEPH_OSD_OP_FLAG_FADVISE_RANDOM:
88 name = "fadvise_random";
89 break;
90 case CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL:
91 name = "fadvise_sequential";
92 break;
93 case CEPH_OSD_OP_FLAG_FADVISE_WILLNEED:
94 name = "favise_willneed";
95 break;
96 case CEPH_OSD_OP_FLAG_FADVISE_DONTNEED:
97 name = "fadvise_dontneed";
98 break;
99 case CEPH_OSD_OP_FLAG_FADVISE_NOCACHE:
100 name = "fadvise_nocache";
101 break;
11fdf7f2
TL
102 case CEPH_OSD_OP_FLAG_WITH_REFERENCE:
103 name = "with_reference";
104 break;
91327a77
AA
105 case CEPH_OSD_OP_FLAG_BYPASS_CLEAN_CACHE:
106 name = "bypass_clean_cache";
107 break;
7c673cae
FG
108 default:
109 name = "???";
110 };
111
112 return name;
113}
114
115string ceph_osd_op_flag_string(unsigned flags)
116{
117 string s;
118 for (unsigned i=0; i<32; ++i) {
119 if (flags & (1u<<i)) {
120 if (s.length())
121 s += "+";
122 s += ceph_osd_op_flag_name(1u << i);
123 }
124 }
125 if (s.length())
126 return s;
127 return string("-");
128}
129
130string ceph_osd_alloc_hint_flag_string(unsigned flags)
131{
132 string s;
133 for (unsigned i=0; i<32; ++i) {
134 if (flags & (1u<<i)) {
135 if (s.length())
136 s += "+";
137 s += ceph_osd_alloc_hint_flag_name(1u << i);
138 }
139 }
140 if (s.length())
141 return s;
142 return string("-");
143}
144
145void pg_shard_t::encode(bufferlist &bl) const
146{
147 ENCODE_START(1, 1, bl);
11fdf7f2
TL
148 encode(osd, bl);
149 encode(shard, bl);
7c673cae
FG
150 ENCODE_FINISH(bl);
151}
11fdf7f2 152void pg_shard_t::decode(bufferlist::const_iterator &bl)
7c673cae
FG
153{
154 DECODE_START(1, bl);
11fdf7f2
TL
155 decode(osd, bl);
156 decode(shard, bl);
7c673cae
FG
157 DECODE_FINISH(bl);
158}
159
160ostream &operator<<(ostream &lhs, const pg_shard_t &rhs)
161{
162 if (rhs.is_undefined())
163 return lhs << "?";
164 if (rhs.shard == shard_id_t::NO_SHARD)
b32b8144
FG
165 return lhs << rhs.get_osd();
166 return lhs << rhs.get_osd() << '(' << (unsigned)(rhs.shard) << ')';
7c673cae
FG
167}
168
11fdf7f2
TL
169void dump(Formatter* f, const osd_alerts_t& alerts)
170{
171 for (auto& a : alerts) {
172 string s0 = " osd: ";
173 s0 += stringify(a.first);
174 string s;
175 for (auto& aa : a.second) {
176 s = s0;
177 s += " ";
178 s += aa.first;
179 s += ":";
180 s += aa.second;
181 f->dump_string("alert", s);
182 }
183 }
184}
185
7c673cae
FG
186// -- osd_reqid_t --
187void osd_reqid_t::dump(Formatter *f) const
188{
189 f->dump_stream("name") << name;
190 f->dump_int("inc", inc);
191 f->dump_unsigned("tid", tid);
192}
193
194void osd_reqid_t::generate_test_instances(list<osd_reqid_t*>& o)
195{
196 o.push_back(new osd_reqid_t);
197 o.push_back(new osd_reqid_t(entity_name_t::CLIENT(123), 1, 45678));
198}
199
200// -- object_locator_t --
201
202void object_locator_t::encode(bufferlist& bl) const
203{
204 // verify that nobody's corrupted the locator
11fdf7f2 205 ceph_assert(hash == -1 || key.empty());
7c673cae
FG
206 __u8 encode_compat = 3;
207 ENCODE_START(6, encode_compat, bl);
11fdf7f2 208 encode(pool, bl);
7c673cae 209 int32_t preferred = -1; // tell old code there is no preferred osd (-1).
11fdf7f2
TL
210 encode(preferred, bl);
211 encode(key, bl);
212 encode(nspace, bl);
213 encode(hash, bl);
7c673cae 214 if (hash != -1)
11fdf7f2 215 encode_compat = std::max<std::uint8_t>(encode_compat, 6); // need to interpret the hash
7c673cae
FG
216 ENCODE_FINISH_NEW_COMPAT(bl, encode_compat);
217}
218
11fdf7f2 219void object_locator_t::decode(bufferlist::const_iterator& p)
7c673cae
FG
220{
221 DECODE_START_LEGACY_COMPAT_LEN(6, 3, 3, p);
222 if (struct_v < 2) {
223 int32_t op;
11fdf7f2 224 decode(op, p);
7c673cae
FG
225 pool = op;
226 int16_t pref;
11fdf7f2 227 decode(pref, p);
7c673cae 228 } else {
11fdf7f2 229 decode(pool, p);
7c673cae 230 int32_t preferred;
11fdf7f2 231 decode(preferred, p);
7c673cae 232 }
11fdf7f2 233 decode(key, p);
7c673cae 234 if (struct_v >= 5)
11fdf7f2 235 decode(nspace, p);
7c673cae 236 if (struct_v >= 6)
11fdf7f2 237 decode(hash, p);
7c673cae
FG
238 else
239 hash = -1;
240 DECODE_FINISH(p);
241 // verify that nobody's corrupted the locator
11fdf7f2 242 ceph_assert(hash == -1 || key.empty());
7c673cae
FG
243}
244
245void object_locator_t::dump(Formatter *f) const
246{
247 f->dump_int("pool", pool);
248 f->dump_string("key", key);
249 f->dump_string("namespace", nspace);
250 f->dump_int("hash", hash);
251}
252
253void object_locator_t::generate_test_instances(list<object_locator_t*>& o)
254{
255 o.push_back(new object_locator_t);
256 o.push_back(new object_locator_t(123));
257 o.push_back(new object_locator_t(123, 876));
258 o.push_back(new object_locator_t(1, "n2"));
259 o.push_back(new object_locator_t(1234, "", "key"));
260 o.push_back(new object_locator_t(12, "n1", "key2"));
261}
262
263// -- request_redirect_t --
264void request_redirect_t::encode(bufferlist& bl) const
265{
266 ENCODE_START(1, 1, bl);
11fdf7f2
TL
267 encode(redirect_locator, bl);
268 encode(redirect_object, bl);
269 // legacy of the removed osd_instructions member
270 encode((uint32_t)0, bl);
7c673cae
FG
271 ENCODE_FINISH(bl);
272}
273
11fdf7f2 274void request_redirect_t::decode(bufferlist::const_iterator& bl)
7c673cae
FG
275{
276 DECODE_START(1, bl);
11fdf7f2
TL
277 uint32_t legacy_osd_instructions_len;
278 decode(redirect_locator, bl);
279 decode(redirect_object, bl);
280 decode(legacy_osd_instructions_len, bl);
281 if (legacy_osd_instructions_len) {
282 bl.advance(legacy_osd_instructions_len);
283 }
7c673cae
FG
284 DECODE_FINISH(bl);
285}
286
287void request_redirect_t::dump(Formatter *f) const
288{
289 f->dump_string("object", redirect_object);
290 f->open_object_section("locator");
291 redirect_locator.dump(f);
292 f->close_section(); // locator
293}
294
295void request_redirect_t::generate_test_instances(list<request_redirect_t*>& o)
296{
297 object_locator_t loc(1, "redir_obj");
298 o.push_back(new request_redirect_t());
299 o.push_back(new request_redirect_t(loc, 0));
300 o.push_back(new request_redirect_t(loc, "redir_obj"));
301 o.push_back(new request_redirect_t(loc));
302}
303
304void objectstore_perf_stat_t::dump(Formatter *f) const
305{
11fdf7f2
TL
306 // *_ms values just for compatibility.
307 f->dump_float("commit_latency_ms", os_commit_latency_ns / 1000000.0);
308 f->dump_float("apply_latency_ms", os_apply_latency_ns / 1000000.0);
309 f->dump_unsigned("commit_latency_ns", os_commit_latency_ns);
310 f->dump_unsigned("apply_latency_ns", os_apply_latency_ns);
7c673cae
FG
311}
312
11fdf7f2 313void objectstore_perf_stat_t::encode(bufferlist &bl, uint64_t features) const
7c673cae 314{
11fdf7f2
TL
315 uint8_t target_v = 2;
316 if (!HAVE_FEATURE(features, OS_PERF_STAT_NS)) {
317 target_v = 1;
318 }
319 ENCODE_START(target_v, target_v, bl);
320 if (target_v >= 2) {
321 encode(os_commit_latency_ns, bl);
322 encode(os_apply_latency_ns, bl);
323 } else {
324 constexpr auto NS_PER_MS = std::chrono::nanoseconds(1ms).count();
325 uint32_t commit_latency_ms = os_commit_latency_ns / NS_PER_MS;
326 uint32_t apply_latency_ms = os_apply_latency_ns / NS_PER_MS;
327 encode(commit_latency_ms, bl); // for compatibility with older monitor.
328 encode(apply_latency_ms, bl); // for compatibility with older monitor.
329 }
7c673cae
FG
330 ENCODE_FINISH(bl);
331}
332
11fdf7f2 333void objectstore_perf_stat_t::decode(bufferlist::const_iterator &bl)
7c673cae 334{
11fdf7f2
TL
335 DECODE_START(2, bl);
336 if (struct_v >= 2) {
337 decode(os_commit_latency_ns, bl);
338 decode(os_apply_latency_ns, bl);
339 } else {
340 uint32_t commit_latency_ms;
341 uint32_t apply_latency_ms;
342 decode(commit_latency_ms, bl);
343 decode(apply_latency_ms, bl);
344 constexpr auto NS_PER_MS = std::chrono::nanoseconds(1ms).count();
345 os_commit_latency_ns = commit_latency_ms * NS_PER_MS;
346 os_apply_latency_ns = apply_latency_ms * NS_PER_MS;
347 }
7c673cae
FG
348 DECODE_FINISH(bl);
349}
350
351void objectstore_perf_stat_t::generate_test_instances(std::list<objectstore_perf_stat_t*>& o)
352{
353 o.push_back(new objectstore_perf_stat_t());
354 o.push_back(new objectstore_perf_stat_t());
11fdf7f2
TL
355 o.back()->os_commit_latency_ns = 20000000;
356 o.back()->os_apply_latency_ns = 30000000;
7c673cae
FG
357}
358
359// -- osd_stat_t --
360void osd_stat_t::dump(Formatter *f) const
361{
31f18b77
FG
362 f->dump_unsigned("up_from", up_from);
363 f->dump_unsigned("seq", seq);
35e4c445 364 f->dump_unsigned("num_pgs", num_pgs);
11fdf7f2
TL
365
366 /// dump legacy stats fields to ensure backward compatibility.
367 f->dump_unsigned("kb", statfs.kb());
368 f->dump_unsigned("kb_used", statfs.kb_used_raw());
369 f->dump_unsigned("kb_used_data", statfs.kb_used_data());
370 f->dump_unsigned("kb_used_omap", statfs.kb_used_omap());
371 f->dump_unsigned("kb_used_meta", statfs.kb_used_internal_metadata());
372 f->dump_unsigned("kb_avail", statfs.kb_avail());
373 ////////////////////
374
375 f->open_object_section("statfs");
376 statfs.dump(f);
377 f->close_section();
7c673cae
FG
378 f->open_array_section("hb_peers");
379 for (auto p : hb_peers)
380 f->dump_int("osd", p);
381 f->close_section();
382 f->dump_int("snap_trim_queue_len", snap_trim_queue_len);
383 f->dump_int("num_snap_trimming", num_snap_trimming);
11fdf7f2 384 f->dump_int("num_shards_repaired", num_shards_repaired);
7c673cae
FG
385 f->open_object_section("op_queue_age_hist");
386 op_queue_age_hist.dump(f);
387 f->close_section();
388 f->open_object_section("perf_stat");
389 os_perf_stat.dump(f);
390 f->close_section();
11fdf7f2
TL
391 f->open_array_section("alerts");
392 ::dump(f, os_alerts);
393 f->close_section();
7c673cae
FG
394}
395
11fdf7f2
TL
396void osd_stat_t::encode(bufferlist &bl, uint64_t features) const
397{
398 ENCODE_START(11, 2, bl);
399
400 //////// for compatibility ////////
401 int64_t kb = statfs.kb();
402 int64_t kb_used = statfs.kb_used_raw();
403 int64_t kb_avail = statfs.kb_avail();
404 encode(kb, bl);
405 encode(kb_used, bl);
406 encode(kb_avail, bl);
407 ///////////////////////////////////
408
409 encode(snap_trim_queue_len, bl);
410 encode(num_snap_trimming, bl);
411 encode(hb_peers, bl);
412 encode((uint32_t)0, bl);
413 encode(op_queue_age_hist, bl);
414 encode(os_perf_stat, bl, features);
415 encode(up_from, bl);
416 encode(seq, bl);
417 encode(num_pgs, bl);
418
419 //////// for compatibility ////////
420 int64_t kb_used_data = statfs.kb_used_data();
421 int64_t kb_used_omap = statfs.kb_used_omap();
422 int64_t kb_used_meta = statfs.kb_used_internal_metadata();
423 encode(kb_used_data, bl);
424 encode(kb_used_omap, bl);
425 encode(kb_used_meta, bl);
426 encode(statfs, bl);
427 ///////////////////////////////////
428 encode(os_alerts, bl);
429 encode(num_shards_repaired, bl);
7c673cae
FG
430 ENCODE_FINISH(bl);
431}
432
11fdf7f2 433void osd_stat_t::decode(bufferlist::const_iterator &bl)
7c673cae 434{
11fdf7f2
TL
435 int64_t kb, kb_used,kb_avail;
436 int64_t kb_used_data, kb_used_omap, kb_used_meta;
437 DECODE_START_LEGACY_COMPAT_LEN(11, 2, 2, bl);
438 decode(kb, bl);
439 decode(kb_used, bl);
440 decode(kb_avail, bl);
441 decode(snap_trim_queue_len, bl);
442 decode(num_snap_trimming, bl);
443 decode(hb_peers, bl);
7c673cae 444 vector<int> num_hb_out;
11fdf7f2 445 decode(num_hb_out, bl);
7c673cae 446 if (struct_v >= 3)
11fdf7f2 447 decode(op_queue_age_hist, bl);
7c673cae 448 if (struct_v >= 4)
11fdf7f2 449 decode(os_perf_stat, bl);
31f18b77 450 if (struct_v >= 6) {
11fdf7f2
TL
451 decode(up_from, bl);
452 decode(seq, bl);
31f18b77 453 }
35e4c445 454 if (struct_v >= 7) {
11fdf7f2
TL
455 decode(num_pgs, bl);
456 }
457 if (struct_v >= 8) {
458 decode(kb_used_data, bl);
459 decode(kb_used_omap, bl);
460 decode(kb_used_meta, bl);
461 } else {
462 kb_used_data = kb_used;
463 kb_used_omap = 0;
464 kb_used_meta = 0;
465 }
466 if (struct_v >= 9) {
467 decode(statfs, bl);
468 } else {
469 statfs.reset();
470 statfs.total = kb << 10;
471 statfs.available = kb_avail << 10;
472 // actually it's totally unexpected to have ststfs.total < statfs.available
473 // here but unfortunately legacy generate_test_instances produced such a
474 // case hence inserting some handling rather than assert
475 statfs.internally_reserved =
476 statfs.total > statfs.available ? statfs.total - statfs.available : 0;
477 kb_used <<= 10;
478 if ((int64_t)statfs.internally_reserved > kb_used) {
479 statfs.internally_reserved -= kb_used;
480 } else {
481 statfs.internally_reserved = 0;
482 }
483 statfs.allocated = kb_used_data << 10;
484 statfs.omap_allocated = kb_used_omap << 10;
485 statfs.internal_metadata = kb_used_meta << 10;
486 }
487 if (struct_v >= 10) {
488 decode(os_alerts, bl);
489 } else {
490 os_alerts.clear();
491 }
492 if (struct_v >= 11) {
493 decode(num_shards_repaired, bl);
494 } else {
495 num_shards_repaired = 0;
35e4c445 496 }
7c673cae
FG
497 DECODE_FINISH(bl);
498}
499
500void osd_stat_t::generate_test_instances(std::list<osd_stat_t*>& o)
501{
502 o.push_back(new osd_stat_t);
503
504 o.push_back(new osd_stat_t);
11fdf7f2
TL
505 list<store_statfs_t*> ll;
506 store_statfs_t::generate_test_instances(ll);
507 o.back()->statfs = *ll.back();
7c673cae
FG
508 o.back()->hb_peers.push_back(7);
509 o.back()->snap_trim_queue_len = 8;
510 o.back()->num_snap_trimming = 99;
11fdf7f2
TL
511 o.back()->num_shards_repaired = 101;
512 o.back()->os_alerts[0].emplace(
513 "some alert", "some alert details");
514 o.back()->os_alerts[1].emplace(
515 "some alert2", "some alert2 details");
7c673cae
FG
516}
517
518// -- pg_t --
519
520int pg_t::print(char *o, int maxlen) const
521{
11fdf7f2 522 return snprintf(o, maxlen, "%llu.%x", (unsigned long long)pool(), ps());
7c673cae
FG
523}
524
525bool pg_t::parse(const char *s)
526{
527 uint64_t ppool;
528 uint32_t pseed;
11fdf7f2 529 int r = sscanf(s, "%llu.%x", (long long unsigned *)&ppool, &pseed);
7c673cae
FG
530 if (r < 2)
531 return false;
532 m_pool = ppool;
533 m_seed = pseed;
7c673cae
FG
534 return true;
535}
536
537bool spg_t::parse(const char *s)
538{
7c673cae
FG
539 shard = shard_id_t::NO_SHARD;
540 uint64_t ppool;
541 uint32_t pseed;
7c673cae
FG
542 uint32_t pshard;
543 int r = sscanf(s, "%llu.%x", (long long unsigned *)&ppool, &pseed);
544 if (r < 2)
545 return false;
546 pgid.set_pool(ppool);
547 pgid.set_ps(pseed);
548
11fdf7f2 549 const char *p = strchr(s, 's');
7c673cae 550 if (p) {
11fdf7f2 551 r = sscanf(p, "s%u", &pshard);
7c673cae
FG
552 if (r == 1) {
553 shard = shard_id_t(pshard);
554 } else {
555 return false;
556 }
557 }
558 return true;
559}
560
561char *spg_t::calc_name(char *buf, const char *suffix_backwords) const
562{
563 while (*suffix_backwords)
564 *--buf = *suffix_backwords++;
565
566 if (!is_no_shard()) {
567 buf = ritoa<uint8_t, 10>((uint8_t)shard.id, buf);
568 *--buf = 's';
569 }
570
571 return pgid.calc_name(buf, "");
572}
573
574ostream& operator<<(ostream& out, const spg_t &pg)
575{
576 char buf[spg_t::calc_name_buf_size];
577 buf[spg_t::calc_name_buf_size - 1] = '\0';
578 out << pg.calc_name(buf + spg_t::calc_name_buf_size - 1, "");
579 return out;
580}
581
582pg_t pg_t::get_ancestor(unsigned old_pg_num) const
583{
584 int old_bits = cbits(old_pg_num);
585 int old_mask = (1 << old_bits) - 1;
586 pg_t ret = *this;
587 ret.m_seed = ceph_stable_mod(m_seed, old_pg_num, old_mask);
588 return ret;
589}
590
591bool pg_t::is_split(unsigned old_pg_num, unsigned new_pg_num, set<pg_t> *children) const
592{
11fdf7f2
TL
593 //ceph_assert(m_seed < old_pg_num);
594 if (m_seed >= old_pg_num) {
595 // degenerate case
596 return false;
597 }
7c673cae
FG
598 if (new_pg_num <= old_pg_num)
599 return false;
600
601 bool split = false;
602 if (true) {
603 unsigned old_bits = cbits(old_pg_num);
604 unsigned old_mask = (1 << old_bits) - 1;
605 for (unsigned n = 1; ; n++) {
606 unsigned next_bit = (n << (old_bits-1));
607 unsigned s = next_bit | m_seed;
608
609 if (s < old_pg_num || s == m_seed)
610 continue;
611 if (s >= new_pg_num)
612 break;
613 if ((unsigned)ceph_stable_mod(s, old_pg_num, old_mask) == m_seed) {
614 split = true;
615 if (children)
11fdf7f2 616 children->insert(pg_t(s, m_pool));
7c673cae
FG
617 }
618 }
619 }
620 if (false) {
621 // brute force
622 int old_bits = cbits(old_pg_num);
623 int old_mask = (1 << old_bits) - 1;
624 for (unsigned x = old_pg_num; x < new_pg_num; ++x) {
625 unsigned o = ceph_stable_mod(x, old_pg_num, old_mask);
626 if (o == m_seed) {
627 split = true;
11fdf7f2 628 children->insert(pg_t(x, m_pool));
7c673cae
FG
629 }
630 }
631 }
632 return split;
633}
634
635unsigned pg_t::get_split_bits(unsigned pg_num) const {
636 if (pg_num == 1)
637 return 0;
11fdf7f2 638 ceph_assert(pg_num > 1);
7c673cae
FG
639
640 // Find unique p such that pg_num \in [2^(p-1), 2^p)
641 unsigned p = cbits(pg_num);
11fdf7f2 642 ceph_assert(p); // silence coverity #751330
7c673cae
FG
643
644 if ((m_seed % (1<<(p-1))) < (pg_num % (1<<(p-1))))
645 return p;
646 else
647 return p - 1;
648}
649
11fdf7f2
TL
650bool pg_t::is_merge_source(
651 unsigned old_pg_num,
652 unsigned new_pg_num,
653 pg_t *parent) const
654{
655 if (m_seed < old_pg_num &&
656 m_seed >= new_pg_num) {
657 if (parent) {
658 pg_t t = *this;
659 while (t.m_seed >= new_pg_num) {
660 t = t.get_parent();
661 }
662 *parent = t;
663 }
664 return true;
665 }
666 return false;
667}
668
7c673cae
FG
669pg_t pg_t::get_parent() const
670{
671 unsigned bits = cbits(m_seed);
11fdf7f2 672 ceph_assert(bits);
7c673cae
FG
673 pg_t retval = *this;
674 retval.m_seed &= ~((~0)<<(bits - 1));
675 return retval;
676}
677
678hobject_t pg_t::get_hobj_start() const
679{
11fdf7f2 680 return hobject_t(object_t(), string(), 0, m_seed, m_pool,
7c673cae
FG
681 string());
682}
683
684hobject_t pg_t::get_hobj_end(unsigned pg_num) const
685{
686 // note: this assumes a bitwise sort; with the legacy nibblewise
687 // sort a PG did not always cover a single contiguous range of the
688 // (bit-reversed) hash range.
689 unsigned bits = get_split_bits(pg_num);
690 uint64_t rev_start = hobject_t::_reverse_bits(m_seed);
691 uint64_t rev_end = (rev_start | (0xffffffff >> bits)) + 1;
692 if (rev_end >= 0x100000000) {
11fdf7f2 693 ceph_assert(rev_end == 0x100000000);
7c673cae
FG
694 return hobject_t::get_max();
695 } else {
696 return hobject_t(object_t(), string(), CEPH_NOSNAP,
697 hobject_t::_reverse_bits(rev_end), m_pool,
698 string());
699 }
700}
701
702void pg_t::dump(Formatter *f) const
703{
704 f->dump_unsigned("pool", m_pool);
705 f->dump_unsigned("seed", m_seed);
7c673cae
FG
706}
707
708void pg_t::generate_test_instances(list<pg_t*>& o)
709{
710 o.push_back(new pg_t);
11fdf7f2
TL
711 o.push_back(new pg_t(1, 2));
712 o.push_back(new pg_t(13123, 3));
713 o.push_back(new pg_t(131223, 4));
7c673cae
FG
714}
715
716char *pg_t::calc_name(char *buf, const char *suffix_backwords) const
717{
718 while (*suffix_backwords)
719 *--buf = *suffix_backwords++;
720
7c673cae
FG
721 buf = ritoa<uint32_t, 16>(m_seed, buf);
722
723 *--buf = '.';
724
725 return ritoa<uint64_t, 10>(m_pool, buf);
726}
727
728ostream& operator<<(ostream& out, const pg_t &pg)
729{
730 char buf[pg_t::calc_name_buf_size];
731 buf[pg_t::calc_name_buf_size - 1] = '\0';
732 out << pg.calc_name(buf + pg_t::calc_name_buf_size - 1, "");
733 return out;
734}
735
736
737// -- coll_t --
738
739void coll_t::calc_str()
740{
741 switch (type) {
742 case TYPE_META:
743 strcpy(_str_buff, "meta");
744 _str = _str_buff;
745 break;
746 case TYPE_PG:
747 _str_buff[spg_t::calc_name_buf_size - 1] = '\0';
748 _str = pgid.calc_name(_str_buff + spg_t::calc_name_buf_size - 1, "daeh_");
749 break;
750 case TYPE_PG_TEMP:
751 _str_buff[spg_t::calc_name_buf_size - 1] = '\0';
752 _str = pgid.calc_name(_str_buff + spg_t::calc_name_buf_size - 1, "PMET_");
753 break;
754 default:
11fdf7f2 755 ceph_abort_msg("unknown collection type");
7c673cae
FG
756 }
757}
758
759bool coll_t::parse(const std::string& s)
760{
761 if (s == "meta") {
762 type = TYPE_META;
763 pgid = spg_t();
764 removal_seq = 0;
765 calc_str();
11fdf7f2 766 ceph_assert(s == _str);
7c673cae
FG
767 return true;
768 }
769 if (s.find("_head") == s.length() - 5 &&
770 pgid.parse(s.substr(0, s.length() - 5))) {
771 type = TYPE_PG;
772 removal_seq = 0;
773 calc_str();
11fdf7f2 774 ceph_assert(s == _str);
7c673cae
FG
775 return true;
776 }
777 if (s.find("_TEMP") == s.length() - 5 &&
778 pgid.parse(s.substr(0, s.length() - 5))) {
779 type = TYPE_PG_TEMP;
780 removal_seq = 0;
781 calc_str();
11fdf7f2 782 ceph_assert(s == _str);
7c673cae
FG
783 return true;
784 }
785 return false;
786}
787
788void coll_t::encode(bufferlist& bl) const
789{
11fdf7f2 790 using ceph::encode;
7c673cae
FG
791 // when changing this, remember to update encoded_size() too.
792 if (is_temp()) {
793 // can't express this as v2...
794 __u8 struct_v = 3;
11fdf7f2
TL
795 encode(struct_v, bl);
796 encode(to_str(), bl);
7c673cae
FG
797 } else {
798 __u8 struct_v = 2;
11fdf7f2
TL
799 encode(struct_v, bl);
800 encode((__u8)type, bl);
801 encode(pgid, bl);
7c673cae 802 snapid_t snap = CEPH_NOSNAP;
11fdf7f2 803 encode(snap, bl);
7c673cae
FG
804 }
805}
806
807size_t coll_t::encoded_size() const
808{
809 size_t r = sizeof(__u8);
810 if (is_temp()) {
811 // v3
812 r += sizeof(__u32);
813 if (_str) {
814 r += strlen(_str);
815 }
816 } else {
817 // v2
818 // 1. type
819 r += sizeof(__u8);
820 // 2. pgid
821 // - encoding header
822 r += sizeof(ceph_le32) + 2 * sizeof(__u8);
823 // - pg_t
824 r += sizeof(__u8) + sizeof(uint64_t) + 2 * sizeof(uint32_t);
825 // - shard_id_t
826 r += sizeof(int8_t);
827 // 3. snapid_t
828 r += sizeof(uint64_t);
829 }
830
831 return r;
832}
833
11fdf7f2 834void coll_t::decode(bufferlist::const_iterator& bl)
7c673cae 835{
11fdf7f2 836 using ceph::decode;
7c673cae 837 __u8 struct_v;
11fdf7f2 838 decode(struct_v, bl);
7c673cae
FG
839 switch (struct_v) {
840 case 1:
841 {
842 snapid_t snap;
11fdf7f2
TL
843 decode(pgid, bl);
844 decode(snap, bl);
7c673cae
FG
845
846 // infer the type
847 if (pgid == spg_t() && snap == 0) {
848 type = TYPE_META;
849 } else {
850 type = TYPE_PG;
851 }
852 removal_seq = 0;
853 }
854 break;
855
856 case 2:
857 {
858 __u8 _type;
859 snapid_t snap;
11fdf7f2
TL
860 decode(_type, bl);
861 decode(pgid, bl);
862 decode(snap, bl);
7c673cae
FG
863 type = (type_t)_type;
864 removal_seq = 0;
865 }
866 break;
867
868 case 3:
869 {
870 string str;
11fdf7f2 871 decode(str, bl);
7c673cae
FG
872 bool ok = parse(str);
873 if (!ok)
874 throw std::domain_error(std::string("unable to parse pg ") + str);
875 }
876 break;
877
878 default:
879 {
880 ostringstream oss;
881 oss << "coll_t::decode(): don't know how to decode version "
882 << struct_v;
883 throw std::domain_error(oss.str());
884 }
885 }
886}
887
888void coll_t::dump(Formatter *f) const
889{
890 f->dump_unsigned("type_id", (unsigned)type);
891 if (type != TYPE_META)
892 f->dump_stream("pgid") << pgid;
893 f->dump_string("name", to_str());
894}
895
896void coll_t::generate_test_instances(list<coll_t*>& o)
897{
898 o.push_back(new coll_t());
899 o.push_back(new coll_t(spg_t(pg_t(1, 0), shard_id_t::NO_SHARD)));
900 o.push_back(new coll_t(o.back()->get_temp()));
901 o.push_back(new coll_t(spg_t(pg_t(3, 2), shard_id_t(12))));
902 o.push_back(new coll_t(o.back()->get_temp()));
903 o.push_back(new coll_t());
904}
905
906// ---
907
908std::string pg_vector_string(const vector<int32_t> &a)
909{
910 ostringstream oss;
911 oss << "[";
912 for (vector<int32_t>::const_iterator i = a.begin(); i != a.end(); ++i) {
913 if (i != a.begin())
914 oss << ",";
915 if (*i != CRUSH_ITEM_NONE)
916 oss << *i;
917 else
918 oss << "NONE";
919 }
920 oss << "]";
921 return oss.str();
922}
923
11fdf7f2 924std::string pg_state_string(uint64_t state)
7c673cae
FG
925{
926 ostringstream oss;
927 if (state & PG_STATE_STALE)
928 oss << "stale+";
929 if (state & PG_STATE_CREATING)
930 oss << "creating+";
931 if (state & PG_STATE_ACTIVE)
932 oss << "active+";
933 if (state & PG_STATE_ACTIVATING)
934 oss << "activating+";
935 if (state & PG_STATE_CLEAN)
936 oss << "clean+";
937 if (state & PG_STATE_RECOVERY_WAIT)
938 oss << "recovery_wait+";
939 if (state & PG_STATE_RECOVERY_TOOFULL)
940 oss << "recovery_toofull+";
941 if (state & PG_STATE_RECOVERING)
942 oss << "recovering+";
c07f9fc5
FG
943 if (state & PG_STATE_FORCED_RECOVERY)
944 oss << "forced_recovery+";
7c673cae
FG
945 if (state & PG_STATE_DOWN)
946 oss << "down+";
b32b8144
FG
947 if (state & PG_STATE_RECOVERY_UNFOUND)
948 oss << "recovery_unfound+";
949 if (state & PG_STATE_BACKFILL_UNFOUND)
950 oss << "backfill_unfound+";
7c673cae
FG
951 if (state & PG_STATE_UNDERSIZED)
952 oss << "undersized+";
953 if (state & PG_STATE_DEGRADED)
954 oss << "degraded+";
955 if (state & PG_STATE_REMAPPED)
956 oss << "remapped+";
11fdf7f2
TL
957 if (state & PG_STATE_PREMERGE)
958 oss << "premerge+";
7c673cae
FG
959 if (state & PG_STATE_SCRUBBING)
960 oss << "scrubbing+";
961 if (state & PG_STATE_DEEP_SCRUB)
962 oss << "deep+";
963 if (state & PG_STATE_INCONSISTENT)
964 oss << "inconsistent+";
965 if (state & PG_STATE_PEERING)
966 oss << "peering+";
967 if (state & PG_STATE_REPAIR)
968 oss << "repair+";
3efd9988 969 if (state & PG_STATE_BACKFILL_WAIT)
7c673cae 970 oss << "backfill_wait+";
3efd9988 971 if (state & PG_STATE_BACKFILLING)
7c673cae 972 oss << "backfilling+";
c07f9fc5
FG
973 if (state & PG_STATE_FORCED_BACKFILL)
974 oss << "forced_backfill+";
7c673cae
FG
975 if (state & PG_STATE_BACKFILL_TOOFULL)
976 oss << "backfill_toofull+";
977 if (state & PG_STATE_INCOMPLETE)
978 oss << "incomplete+";
979 if (state & PG_STATE_PEERED)
980 oss << "peered+";
981 if (state & PG_STATE_SNAPTRIM)
982 oss << "snaptrim+";
983 if (state & PG_STATE_SNAPTRIM_WAIT)
984 oss << "snaptrim_wait+";
224ce89b
WB
985 if (state & PG_STATE_SNAPTRIM_ERROR)
986 oss << "snaptrim_error+";
11fdf7f2
TL
987 if (state & PG_STATE_FAILED_REPAIR)
988 oss << "failed_repair+";
7c673cae
FG
989 string ret(oss.str());
990 if (ret.length() > 0)
991 ret.resize(ret.length() - 1);
992 else
31f18b77 993 ret = "unknown";
7c673cae
FG
994 return ret;
995}
996
3efd9988 997boost::optional<uint64_t> pg_string_state(const std::string& state)
7c673cae 998{
3efd9988 999 boost::optional<uint64_t> type;
7c673cae
FG
1000 if (state == "active")
1001 type = PG_STATE_ACTIVE;
1002 else if (state == "clean")
1003 type = PG_STATE_CLEAN;
1004 else if (state == "down")
1005 type = PG_STATE_DOWN;
b32b8144
FG
1006 else if (state == "recovery_unfound")
1007 type = PG_STATE_RECOVERY_UNFOUND;
1008 else if (state == "backfill_unfound")
1009 type = PG_STATE_BACKFILL_UNFOUND;
11fdf7f2
TL
1010 else if (state == "premerge")
1011 type = PG_STATE_PREMERGE;
7c673cae
FG
1012 else if (state == "scrubbing")
1013 type = PG_STATE_SCRUBBING;
1014 else if (state == "degraded")
1015 type = PG_STATE_DEGRADED;
1016 else if (state == "inconsistent")
1017 type = PG_STATE_INCONSISTENT;
1018 else if (state == "peering")
1019 type = PG_STATE_PEERING;
1020 else if (state == "repair")
1021 type = PG_STATE_REPAIR;
1022 else if (state == "recovering")
1023 type = PG_STATE_RECOVERING;
c07f9fc5
FG
1024 else if (state == "forced_recovery")
1025 type = PG_STATE_FORCED_RECOVERY;
7c673cae
FG
1026 else if (state == "backfill_wait")
1027 type = PG_STATE_BACKFILL_WAIT;
1028 else if (state == "incomplete")
1029 type = PG_STATE_INCOMPLETE;
1030 else if (state == "stale")
1031 type = PG_STATE_STALE;
1032 else if (state == "remapped")
1033 type = PG_STATE_REMAPPED;
94b18763 1034 else if (state == "deep")
7c673cae 1035 type = PG_STATE_DEEP_SCRUB;
3efd9988
FG
1036 else if (state == "backfilling")
1037 type = PG_STATE_BACKFILLING;
c07f9fc5
FG
1038 else if (state == "forced_backfill")
1039 type = PG_STATE_FORCED_BACKFILL;
7c673cae
FG
1040 else if (state == "backfill_toofull")
1041 type = PG_STATE_BACKFILL_TOOFULL;
1042 else if (state == "recovery_wait")
1043 type = PG_STATE_RECOVERY_WAIT;
1044 else if (state == "recovery_toofull")
1045 type = PG_STATE_RECOVERY_TOOFULL;
1046 else if (state == "undersized")
1047 type = PG_STATE_UNDERSIZED;
1048 else if (state == "activating")
1049 type = PG_STATE_ACTIVATING;
1050 else if (state == "peered")
1051 type = PG_STATE_PEERED;
1052 else if (state == "snaptrim")
1053 type = PG_STATE_SNAPTRIM;
1054 else if (state == "snaptrim_wait")
1055 type = PG_STATE_SNAPTRIM_WAIT;
224ce89b
WB
1056 else if (state == "snaptrim_error")
1057 type = PG_STATE_SNAPTRIM_ERROR;
91327a77
AA
1058 else if (state == "creating")
1059 type = PG_STATE_CREATING;
11fdf7f2
TL
1060 else if (state == "failed_repair")
1061 type = PG_STATE_FAILED_REPAIR;
1062 else if (state == "unknown")
1063 type = 0;
7c673cae 1064 else
3efd9988 1065 type = boost::none;
7c673cae
FG
1066 return type;
1067}
1068
1069// -- eversion_t --
1070string eversion_t::get_key_name() const
1071{
11fdf7f2
TL
1072 std::string key(32, ' ');
1073 get_key_name(&key[0]);
1074 key.resize(31); // remove the null terminator
1075 return key;
7c673cae
FG
1076}
1077
7c673cae
FG
1078// -- pool_snap_info_t --
1079void pool_snap_info_t::dump(Formatter *f) const
1080{
1081 f->dump_unsigned("snapid", snapid);
1082 f->dump_stream("stamp") << stamp;
1083 f->dump_string("name", name);
1084}
1085
1086void pool_snap_info_t::encode(bufferlist& bl, uint64_t features) const
1087{
11fdf7f2 1088 using ceph::encode;
7c673cae
FG
1089 if ((features & CEPH_FEATURE_PGPOOL3) == 0) {
1090 __u8 struct_v = 1;
11fdf7f2
TL
1091 encode(struct_v, bl);
1092 encode(snapid, bl);
1093 encode(stamp, bl);
1094 encode(name, bl);
7c673cae
FG
1095 return;
1096 }
1097 ENCODE_START(2, 2, bl);
11fdf7f2
TL
1098 encode(snapid, bl);
1099 encode(stamp, bl);
1100 encode(name, bl);
7c673cae
FG
1101 ENCODE_FINISH(bl);
1102}
1103
11fdf7f2 1104void pool_snap_info_t::decode(bufferlist::const_iterator& bl)
7c673cae
FG
1105{
1106 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
11fdf7f2
TL
1107 decode(snapid, bl);
1108 decode(stamp, bl);
1109 decode(name, bl);
7c673cae
FG
1110 DECODE_FINISH(bl);
1111}
1112
1113void pool_snap_info_t::generate_test_instances(list<pool_snap_info_t*>& o)
1114{
1115 o.push_back(new pool_snap_info_t);
1116 o.push_back(new pool_snap_info_t);
1117 o.back()->snapid = 1;
1118 o.back()->stamp = utime_t(1, 2);
1119 o.back()->name = "foo";
1120}
1121
1122// -- pool_opts_t --
1123
1124typedef std::map<std::string, pool_opts_t::opt_desc_t> opt_mapping_t;
1125static opt_mapping_t opt_mapping = boost::assign::map_list_of
1126 ("scrub_min_interval", pool_opts_t::opt_desc_t(
1127 pool_opts_t::SCRUB_MIN_INTERVAL, pool_opts_t::DOUBLE))
1128 ("scrub_max_interval", pool_opts_t::opt_desc_t(
1129 pool_opts_t::SCRUB_MAX_INTERVAL, pool_opts_t::DOUBLE))
1130 ("deep_scrub_interval", pool_opts_t::opt_desc_t(
1131 pool_opts_t::DEEP_SCRUB_INTERVAL, pool_opts_t::DOUBLE))
1132 ("recovery_priority", pool_opts_t::opt_desc_t(
1133 pool_opts_t::RECOVERY_PRIORITY, pool_opts_t::INT))
1134 ("recovery_op_priority", pool_opts_t::opt_desc_t(
1135 pool_opts_t::RECOVERY_OP_PRIORITY, pool_opts_t::INT))
1136 ("scrub_priority", pool_opts_t::opt_desc_t(
1137 pool_opts_t::SCRUB_PRIORITY, pool_opts_t::INT))
1138 ("compression_mode", pool_opts_t::opt_desc_t(
1139 pool_opts_t::COMPRESSION_MODE, pool_opts_t::STR))
1140 ("compression_algorithm", pool_opts_t::opt_desc_t(
1141 pool_opts_t::COMPRESSION_ALGORITHM, pool_opts_t::STR))
1142 ("compression_required_ratio", pool_opts_t::opt_desc_t(
1143 pool_opts_t::COMPRESSION_REQUIRED_RATIO, pool_opts_t::DOUBLE))
1144 ("compression_max_blob_size", pool_opts_t::opt_desc_t(
1145 pool_opts_t::COMPRESSION_MAX_BLOB_SIZE, pool_opts_t::INT))
1146 ("compression_min_blob_size", pool_opts_t::opt_desc_t(
1147 pool_opts_t::COMPRESSION_MIN_BLOB_SIZE, pool_opts_t::INT))
1148 ("csum_type", pool_opts_t::opt_desc_t(
1149 pool_opts_t::CSUM_TYPE, pool_opts_t::INT))
1150 ("csum_max_block", pool_opts_t::opt_desc_t(
1151 pool_opts_t::CSUM_MAX_BLOCK, pool_opts_t::INT))
1152 ("csum_min_block", pool_opts_t::opt_desc_t(
11fdf7f2
TL
1153 pool_opts_t::CSUM_MIN_BLOCK, pool_opts_t::INT))
1154 ("fingerprint_algorithm", pool_opts_t::opt_desc_t(
1155 pool_opts_t::FINGERPRINT_ALGORITHM, pool_opts_t::STR))
1156 ("pg_num_min", pool_opts_t::opt_desc_t(
1157 pool_opts_t::PG_NUM_MIN, pool_opts_t::INT))
1158 ("target_size_bytes", pool_opts_t::opt_desc_t(
1159 pool_opts_t::TARGET_SIZE_BYTES, pool_opts_t::INT))
1160 ("target_size_ratio", pool_opts_t::opt_desc_t(
1161 pool_opts_t::TARGET_SIZE_RATIO, pool_opts_t::DOUBLE))
1162 ("pg_autoscale_bias", pool_opts_t::opt_desc_t(
1163 pool_opts_t::PG_AUTOSCALE_BIAS, pool_opts_t::DOUBLE));
7c673cae 1164
11fdf7f2
TL
1165bool pool_opts_t::is_opt_name(const std::string& name)
1166{
1167 return opt_mapping.count(name);
7c673cae
FG
1168}
1169
11fdf7f2
TL
1170pool_opts_t::opt_desc_t pool_opts_t::get_opt_desc(const std::string& name)
1171{
1172 opt_mapping_t::iterator i = opt_mapping.find(name);
1173 ceph_assert(i != opt_mapping.end());
1174 return i->second;
7c673cae
FG
1175}
1176
11fdf7f2
TL
1177bool pool_opts_t::is_set(pool_opts_t::key_t key) const
1178{
1179 return opts.count(key);
7c673cae
FG
1180}
1181
11fdf7f2
TL
1182const pool_opts_t::value_t& pool_opts_t::get(pool_opts_t::key_t key) const
1183{
7c673cae 1184 opts_t::const_iterator i = opts.find(key);
11fdf7f2 1185 ceph_assert(i != opts.end());
7c673cae
FG
1186 return i->second;
1187}
1188
1189bool pool_opts_t::unset(pool_opts_t::key_t key) {
1190 return opts.erase(key) > 0;
1191}
1192
11fdf7f2 1193class pool_opts_dumper_t : public boost::static_visitor<> {
7c673cae
FG
1194public:
1195 pool_opts_dumper_t(const std::string& name_, Formatter* f_) :
1196 name(name_.c_str()), f(f_) {}
1197
1198 void operator()(std::string s) const {
1199 f->dump_string(name, s);
1200 }
11fdf7f2 1201 void operator()(int64_t i) const {
7c673cae
FG
1202 f->dump_int(name, i);
1203 }
1204 void operator()(double d) const {
1205 f->dump_float(name, d);
1206 }
1207
1208private:
1209 const char* name;
1210 Formatter* f;
1211};
1212
1213void pool_opts_t::dump(const std::string& name, Formatter* f) const
1214{
1215 const opt_desc_t& desc = get_opt_desc(name);
1216 opts_t::const_iterator i = opts.find(desc.key);
1217 if (i == opts.end()) {
1218 return;
1219 }
1220 boost::apply_visitor(pool_opts_dumper_t(name, f), i->second);
1221}
1222
1223void pool_opts_t::dump(Formatter* f) const
1224{
1225 for (opt_mapping_t::iterator i = opt_mapping.begin(); i != opt_mapping.end();
1226 ++i) {
1227 const std::string& name = i->first;
1228 const opt_desc_t& desc = i->second;
1229 opts_t::const_iterator j = opts.find(desc.key);
1230 if (j == opts.end()) {
1231 continue;
1232 }
1233 boost::apply_visitor(pool_opts_dumper_t(name, f), j->second);
1234 }
1235}
1236
11fdf7f2 1237class pool_opts_encoder_t : public boost::static_visitor<> {
7c673cae 1238public:
11fdf7f2
TL
1239 explicit pool_opts_encoder_t(bufferlist& bl_, uint64_t features)
1240 : bl(bl_),
1241 features(features) {}
1242
1243 void operator()(const std::string &s) const {
1244 encode(static_cast<int32_t>(pool_opts_t::STR), bl);
1245 encode(s, bl);
1246 }
1247 void operator()(int64_t i) const {
1248 encode(static_cast<int32_t>(pool_opts_t::INT), bl);
1249 if (HAVE_FEATURE(features, SERVER_NAUTILUS)) {
1250 encode(i, bl);
1251 } else {
1252 encode(static_cast<int32_t>(i), bl);
1253 }
7c673cae
FG
1254 }
1255 void operator()(double d) const {
11fdf7f2
TL
1256 encode(static_cast<int32_t>(pool_opts_t::DOUBLE), bl);
1257 encode(d, bl);
7c673cae
FG
1258 }
1259
1260private:
1261 bufferlist& bl;
11fdf7f2 1262 uint64_t features;
7c673cae
FG
1263};
1264
11fdf7f2
TL
1265void pool_opts_t::encode(bufferlist& bl, uint64_t features) const
1266{
1267 unsigned v = 2;
1268 if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
1269 v = 1;
1270 }
1271 ENCODE_START(v, 1, bl);
7c673cae 1272 uint32_t n = static_cast<uint32_t>(opts.size());
11fdf7f2 1273 encode(n, bl);
7c673cae 1274 for (opts_t::const_iterator i = opts.begin(); i != opts.end(); ++i) {
11fdf7f2
TL
1275 encode(static_cast<int32_t>(i->first), bl);
1276 boost::apply_visitor(pool_opts_encoder_t(bl, features), i->second);
7c673cae
FG
1277 }
1278 ENCODE_FINISH(bl);
1279}
1280
11fdf7f2
TL
1281void pool_opts_t::decode(bufferlist::const_iterator& bl)
1282{
7c673cae
FG
1283 DECODE_START(1, bl);
1284 __u32 n;
11fdf7f2 1285 decode(n, bl);
7c673cae
FG
1286 opts.clear();
1287 while (n--) {
1288 int32_t k, t;
11fdf7f2
TL
1289 decode(k, bl);
1290 decode(t, bl);
7c673cae
FG
1291 if (t == STR) {
1292 std::string s;
11fdf7f2 1293 decode(s, bl);
7c673cae
FG
1294 opts[static_cast<key_t>(k)] = s;
1295 } else if (t == INT) {
11fdf7f2
TL
1296 int64_t i;
1297 if (struct_v >= 2) {
1298 decode(i, bl);
1299 } else {
1300 int ii;
1301 decode(ii, bl);
1302 i = ii;
1303 }
7c673cae
FG
1304 opts[static_cast<key_t>(k)] = i;
1305 } else if (t == DOUBLE) {
1306 double d;
11fdf7f2 1307 decode(d, bl);
7c673cae
FG
1308 opts[static_cast<key_t>(k)] = d;
1309 } else {
11fdf7f2 1310 ceph_assert(!"invalid type");
7c673cae
FG
1311 }
1312 }
1313 DECODE_FINISH(bl);
1314}
1315
1316ostream& operator<<(ostream& out, const pool_opts_t& opts)
1317{
1318 for (opt_mapping_t::iterator i = opt_mapping.begin(); i != opt_mapping.end();
1319 ++i) {
1320 const std::string& name = i->first;
1321 const pool_opts_t::opt_desc_t& desc = i->second;
1322 pool_opts_t::opts_t::const_iterator j = opts.opts.find(desc.key);
1323 if (j == opts.opts.end()) {
1324 continue;
1325 }
1326 out << " " << name << " " << j->second;
1327 }
1328 return out;
1329}
1330
1331// -- pg_pool_t --
1332
c07f9fc5
FG
1333const char *pg_pool_t::APPLICATION_NAME_CEPHFS("cephfs");
1334const char *pg_pool_t::APPLICATION_NAME_RBD("rbd");
1335const char *pg_pool_t::APPLICATION_NAME_RGW("rgw");
1336
7c673cae
FG
1337void pg_pool_t::dump(Formatter *f) const
1338{
11fdf7f2 1339 f->dump_stream("create_time") << get_create_time();
7c673cae
FG
1340 f->dump_unsigned("flags", get_flags());
1341 f->dump_string("flags_names", get_flags_string());
1342 f->dump_int("type", get_type());
1343 f->dump_int("size", get_size());
1344 f->dump_int("min_size", get_min_size());
31f18b77 1345 f->dump_int("crush_rule", get_crush_rule());
7c673cae 1346 f->dump_int("object_hash", get_object_hash());
11fdf7f2
TL
1347 f->dump_string("pg_autoscale_mode",
1348 get_pg_autoscale_mode_name(pg_autoscale_mode));
7c673cae
FG
1349 f->dump_unsigned("pg_num", get_pg_num());
1350 f->dump_unsigned("pg_placement_num", get_pgp_num());
11fdf7f2
TL
1351 f->dump_unsigned("pg_placement_num_target", get_pgp_num_target());
1352 f->dump_unsigned("pg_num_target", get_pg_num_target());
1353 f->dump_unsigned("pg_num_pending", get_pg_num_pending());
1354 f->dump_object("last_pg_merge_meta", last_pg_merge_meta);
7c673cae
FG
1355 f->dump_stream("last_change") << get_last_change();
1356 f->dump_stream("last_force_op_resend") << get_last_force_op_resend();
11fdf7f2
TL
1357 f->dump_stream("last_force_op_resend_prenautilus")
1358 << get_last_force_op_resend_prenautilus();
7c673cae
FG
1359 f->dump_stream("last_force_op_resend_preluminous")
1360 << get_last_force_op_resend_preluminous();
1361 f->dump_unsigned("auid", get_auid());
1362 f->dump_string("snap_mode", is_pool_snaps_mode() ? "pool" : "selfmanaged");
1363 f->dump_unsigned("snap_seq", get_snap_seq());
1364 f->dump_unsigned("snap_epoch", get_snap_epoch());
1365 f->open_array_section("pool_snaps");
1366 for (map<snapid_t, pool_snap_info_t>::const_iterator p = snaps.begin(); p != snaps.end(); ++p) {
1367 f->open_object_section("pool_snap_info");
1368 p->second.dump(f);
1369 f->close_section();
1370 }
1371 f->close_section();
1372 f->dump_stream("removed_snaps") << removed_snaps;
1373 f->dump_unsigned("quota_max_bytes", quota_max_bytes);
1374 f->dump_unsigned("quota_max_objects", quota_max_objects);
1375 f->open_array_section("tiers");
1376 for (set<uint64_t>::const_iterator p = tiers.begin(); p != tiers.end(); ++p)
1377 f->dump_unsigned("pool_id", *p);
1378 f->close_section();
1379 f->dump_int("tier_of", tier_of);
1380 f->dump_int("read_tier", read_tier);
1381 f->dump_int("write_tier", write_tier);
1382 f->dump_string("cache_mode", get_cache_mode_name());
1383 f->dump_unsigned("target_max_bytes", target_max_bytes);
1384 f->dump_unsigned("target_max_objects", target_max_objects);
1385 f->dump_unsigned("cache_target_dirty_ratio_micro",
1386 cache_target_dirty_ratio_micro);
1387 f->dump_unsigned("cache_target_dirty_high_ratio_micro",
1388 cache_target_dirty_high_ratio_micro);
1389 f->dump_unsigned("cache_target_full_ratio_micro",
1390 cache_target_full_ratio_micro);
1391 f->dump_unsigned("cache_min_flush_age", cache_min_flush_age);
1392 f->dump_unsigned("cache_min_evict_age", cache_min_evict_age);
1393 f->dump_string("erasure_code_profile", erasure_code_profile);
1394 f->open_object_section("hit_set_params");
1395 hit_set_params.dump(f);
1396 f->close_section(); // hit_set_params
1397 f->dump_unsigned("hit_set_period", hit_set_period);
1398 f->dump_unsigned("hit_set_count", hit_set_count);
1399 f->dump_bool("use_gmt_hitset", use_gmt_hitset);
1400 f->dump_unsigned("min_read_recency_for_promote", min_read_recency_for_promote);
1401 f->dump_unsigned("min_write_recency_for_promote", min_write_recency_for_promote);
1402 f->dump_unsigned("hit_set_grade_decay_rate", hit_set_grade_decay_rate);
1403 f->dump_unsigned("hit_set_search_last_n", hit_set_search_last_n);
1404 f->open_array_section("grade_table");
1405 for (unsigned i = 0; i < hit_set_count; ++i)
1406 f->dump_unsigned("value", get_grade(i));
1407 f->close_section();
1408 f->dump_unsigned("stripe_width", get_stripe_width());
1409 f->dump_unsigned("expected_num_objects", expected_num_objects);
1410 f->dump_bool("fast_read", fast_read);
1411 f->open_object_section("options");
1412 opts.dump(f);
1413 f->close_section(); // options
c07f9fc5
FG
1414 f->open_object_section("application_metadata");
1415 for (auto &app_pair : application_metadata) {
1416 f->open_object_section(app_pair.first.c_str());
1417 for (auto &kv_pair : app_pair.second) {
1418 f->dump_string(kv_pair.first.c_str(), kv_pair.second);
1419 }
1420 f->close_section(); // application
1421 }
1422 f->close_section(); // application_metadata
7c673cae
FG
1423}
1424
1425void pg_pool_t::convert_to_pg_shards(const vector<int> &from, set<pg_shard_t>* to) const {
1426 for (size_t i = 0; i < from.size(); ++i) {
1427 if (from[i] != CRUSH_ITEM_NONE) {
1428 to->insert(
1429 pg_shard_t(
1430 from[i],
11fdf7f2 1431 is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD));
7c673cae
FG
1432 }
1433 }
1434}
1435
1436void pg_pool_t::calc_pg_masks()
1437{
1438 pg_num_mask = (1 << cbits(pg_num-1)) - 1;
1439 pgp_num_mask = (1 << cbits(pgp_num-1)) - 1;
1440}
1441
1442unsigned pg_pool_t::get_pg_num_divisor(pg_t pgid) const
1443{
1444 if (pg_num == pg_num_mask + 1)
1445 return pg_num; // power-of-2 split
1446 unsigned mask = pg_num_mask >> 1;
1447 if ((pgid.ps() & mask) < (pg_num & mask))
1448 return pg_num_mask + 1; // smaller bin size (already split)
1449 else
1450 return (pg_num_mask + 1) >> 1; // bigger bin (not yet split)
1451}
1452
11fdf7f2
TL
1453bool pg_pool_t::is_pending_merge(pg_t pgid, bool *target) const
1454{
1455 if (pg_num_pending >= pg_num) {
1456 return false;
1457 }
1458 if (pgid.ps() >= pg_num_pending && pgid.ps() < pg_num) {
1459 if (target) {
1460 *target = false;
1461 }
1462 return true;
1463 }
1464 for (unsigned ps = pg_num_pending; ps < pg_num; ++ps) {
1465 if (pg_t(ps, pgid.pool()).get_parent() == pgid) {
1466 if (target) {
1467 *target = true;
1468 }
1469 return true;
1470 }
1471 }
1472 return false;
1473}
1474
7c673cae
FG
1475/*
1476 * we have two snap modes:
11fdf7f2 1477 * - pool snaps
7c673cae
FG
1478 * - snap existence/non-existence defined by snaps[] and snap_seq
1479 * - user managed snaps
11fdf7f2 1480 * - existence tracked by librados user
7c673cae
FG
1481 */
1482bool pg_pool_t::is_pool_snaps_mode() const
1483{
11fdf7f2 1484 return has_flag(FLAG_POOL_SNAPS);
7c673cae
FG
1485}
1486
1487bool pg_pool_t::is_unmanaged_snaps_mode() const
1488{
11fdf7f2 1489 return has_flag(FLAG_SELFMANAGED_SNAPS);
7c673cae
FG
1490}
1491
1492bool pg_pool_t::is_removed_snap(snapid_t s) const
1493{
1494 if (is_pool_snaps_mode())
1495 return s <= get_snap_seq() && snaps.count(s) == 0;
1496 else
1497 return removed_snaps.contains(s);
1498}
1499
1500/*
1501 * build set of known-removed sets from either pool snaps or
1502 * explicit removed_snaps set.
1503 */
1504void pg_pool_t::build_removed_snaps(interval_set<snapid_t>& rs) const
1505{
1506 if (is_pool_snaps_mode()) {
1507 rs.clear();
1508 for (snapid_t s = 1; s <= get_snap_seq(); s = s + 1)
1509 if (snaps.count(s) == 0)
1510 rs.insert(s);
1511 } else {
1512 rs = removed_snaps;
1513 }
1514}
1515
91327a77
AA
1516bool pg_pool_t::maybe_updated_removed_snaps(const interval_set<snapid_t>& cached) const
1517{
1518 if (is_unmanaged_snaps_mode()) { // remove_unmanaged_snap increments range_end
1519 if (removed_snaps.empty() || cached.empty()) // range_end is undefined
1520 return removed_snaps.empty() != cached.empty();
1521 return removed_snaps.range_end() != cached.range_end();
1522 }
1523 return true;
1524}
1525
7c673cae
FG
1526snapid_t pg_pool_t::snap_exists(const char *s) const
1527{
1528 for (map<snapid_t,pool_snap_info_t>::const_iterator p = snaps.begin();
1529 p != snaps.end();
1530 ++p)
1531 if (p->second.name == s)
1532 return p->second.snapid;
1533 return 0;
1534}
1535
1536void pg_pool_t::add_snap(const char *n, utime_t stamp)
1537{
11fdf7f2
TL
1538 ceph_assert(!is_unmanaged_snaps_mode());
1539 flags |= FLAG_POOL_SNAPS;
7c673cae
FG
1540 snapid_t s = get_snap_seq() + 1;
1541 snap_seq = s;
1542 snaps[s].snapid = s;
1543 snaps[s].name = n;
1544 snaps[s].stamp = stamp;
1545}
1546
1547void pg_pool_t::add_unmanaged_snap(uint64_t& snapid)
1548{
11fdf7f2
TL
1549 ceph_assert(!is_pool_snaps_mode());
1550 if (snap_seq == 0) {
1551 // kludge for pre-mimic tracking of pool vs selfmanaged snaps. after
1552 // mimic this field is not decoded but our flag is set; pre-mimic, we
1553 // have a non-empty removed_snaps to signifiy a non-pool-snaps pool.
7c673cae
FG
1554 removed_snaps.insert(snapid_t(1));
1555 snap_seq = 1;
1556 }
11fdf7f2 1557 flags |= FLAG_SELFMANAGED_SNAPS;
7c673cae
FG
1558 snapid = snap_seq = snap_seq + 1;
1559}
1560
1561void pg_pool_t::remove_snap(snapid_t s)
1562{
11fdf7f2 1563 ceph_assert(snaps.count(s));
7c673cae
FG
1564 snaps.erase(s);
1565 snap_seq = snap_seq + 1;
1566}
1567
1568void pg_pool_t::remove_unmanaged_snap(snapid_t s)
1569{
11fdf7f2 1570 ceph_assert(is_unmanaged_snaps_mode());
7c673cae
FG
1571 removed_snaps.insert(s);
1572 snap_seq = snap_seq + 1;
28e407b8
AA
1573 // try to add in the new seq, just to try to keep the interval_set contiguous
1574 if (!removed_snaps.contains(get_snap_seq())) {
1575 removed_snaps.insert(get_snap_seq());
1576 }
7c673cae
FG
1577}
1578
1579SnapContext pg_pool_t::get_snap_context() const
1580{
1581 vector<snapid_t> s(snaps.size());
1582 unsigned i = 0;
1583 for (map<snapid_t, pool_snap_info_t>::const_reverse_iterator p = snaps.rbegin();
1584 p != snaps.rend();
1585 ++p)
1586 s[i++] = p->first;
1587 return SnapContext(get_snap_seq(), s);
1588}
1589
1590uint32_t pg_pool_t::hash_key(const string& key, const string& ns) const
1591{
1592 if (ns.empty())
1593 return ceph_str_hash(object_hash, key.data(), key.length());
1594 int nsl = ns.length();
1595 int len = key.length() + nsl + 1;
1596 char buf[len];
1597 memcpy(&buf[0], ns.data(), nsl);
1598 buf[nsl] = '\037';
1599 memcpy(&buf[nsl+1], key.data(), key.length());
1600 return ceph_str_hash(object_hash, &buf[0], len);
1601}
1602
1603uint32_t pg_pool_t::raw_hash_to_pg(uint32_t v) const
1604{
1605 return ceph_stable_mod(v, pg_num, pg_num_mask);
1606}
1607
1608/*
1609 * map a raw pg (with full precision ps) into an actual pg, for storage
1610 */
1611pg_t pg_pool_t::raw_pg_to_pg(pg_t pg) const
1612{
1613 pg.set_ps(ceph_stable_mod(pg.ps(), pg_num, pg_num_mask));
1614 return pg;
1615}
1616
1617/*
1618 * map raw pg (full precision ps) into a placement seed. include
1619 * pool id in that value so that different pools don't use the same
1620 * seeds.
1621 */
1622ps_t pg_pool_t::raw_pg_to_pps(pg_t pg) const
1623{
1624 if (flags & FLAG_HASHPSPOOL) {
1625 // Hash the pool id so that pool PGs do not overlap.
1626 return
1627 crush_hash32_2(CRUSH_HASH_RJENKINS1,
1628 ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask),
1629 pg.pool());
1630 } else {
1631 // Legacy behavior; add ps and pool together. This is not a great
1632 // idea because the PGs from each pool will essentially overlap on
1633 // top of each other: 0.5 == 1.4 == 2.3 == ...
1634 return
1635 ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask) +
1636 pg.pool();
1637 }
1638}
1639
1640uint32_t pg_pool_t::get_random_pg_position(pg_t pg, uint32_t seed) const
1641{
1642 uint32_t r = crush_hash32_2(CRUSH_HASH_RJENKINS1, seed, 123);
1643 if (pg_num == pg_num_mask + 1) {
1644 r &= ~pg_num_mask;
1645 } else {
1646 unsigned smaller_mask = pg_num_mask >> 1;
1647 if ((pg.ps() & smaller_mask) < (pg_num & smaller_mask)) {
1648 r &= ~pg_num_mask;
1649 } else {
1650 r &= ~smaller_mask;
1651 }
1652 }
1653 r |= pg.ps();
1654 return r;
1655}
1656
1657void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
1658{
11fdf7f2 1659 using ceph::encode;
7c673cae
FG
1660 if ((features & CEPH_FEATURE_PGPOOL3) == 0) {
1661 // this encoding matches the old struct ceph_pg_pool
1662 __u8 struct_v = 2;
11fdf7f2
TL
1663 encode(struct_v, bl);
1664 encode(type, bl);
1665 encode(size, bl);
1666 encode(crush_rule, bl);
1667 encode(object_hash, bl);
1668 encode(pg_num, bl);
1669 encode(pgp_num, bl);
7c673cae 1670 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
11fdf7f2
TL
1671 encode(lpg_num, bl);
1672 encode(lpgp_num, bl);
1673 encode(last_change, bl);
1674 encode(snap_seq, bl);
1675 encode(snap_epoch, bl);
7c673cae
FG
1676
1677 __u32 n = snaps.size();
11fdf7f2 1678 encode(n, bl);
7c673cae 1679 n = removed_snaps.num_intervals();
11fdf7f2 1680 encode(n, bl);
7c673cae 1681
11fdf7f2 1682 encode(auid, bl);
7c673cae 1683
11fdf7f2
TL
1684 encode_nohead(snaps, bl, features);
1685 encode_nohead(removed_snaps, bl);
7c673cae
FG
1686 return;
1687 }
1688
1689 if ((features & CEPH_FEATURE_OSDENC) == 0) {
1690 __u8 struct_v = 4;
11fdf7f2
TL
1691 encode(struct_v, bl);
1692 encode(type, bl);
1693 encode(size, bl);
1694 encode(crush_rule, bl);
1695 encode(object_hash, bl);
1696 encode(pg_num, bl);
1697 encode(pgp_num, bl);
7c673cae 1698 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
11fdf7f2
TL
1699 encode(lpg_num, bl);
1700 encode(lpgp_num, bl);
1701 encode(last_change, bl);
1702 encode(snap_seq, bl);
1703 encode(snap_epoch, bl);
1704 encode(snaps, bl, features);
1705 encode(removed_snaps, bl);
1706 encode(auid, bl);
1707 encode(flags, bl);
1708 encode((uint32_t)0, bl); // crash_replay_interval
7c673cae
FG
1709 return;
1710 }
1711
1712 if ((features & CEPH_FEATURE_OSD_POOLRESEND) == 0) {
1713 // we simply added last_force_op_resend here, which is a fully
1714 // backward compatible change. however, encoding the same map
1715 // differently between monitors triggers scrub noise (even though
1716 // they are decodable without the feature), so let's be pendantic
1717 // about it.
1718 ENCODE_START(14, 5, bl);
11fdf7f2
TL
1719 encode(type, bl);
1720 encode(size, bl);
1721 encode(crush_rule, bl);
1722 encode(object_hash, bl);
1723 encode(pg_num, bl);
1724 encode(pgp_num, bl);
7c673cae 1725 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
11fdf7f2
TL
1726 encode(lpg_num, bl);
1727 encode(lpgp_num, bl);
1728 encode(last_change, bl);
1729 encode(snap_seq, bl);
1730 encode(snap_epoch, bl);
1731 encode(snaps, bl, features);
1732 encode(removed_snaps, bl);
1733 encode(auid, bl);
1734 encode(flags, bl);
1735 encode((uint32_t)0, bl); // crash_replay_interval
1736 encode(min_size, bl);
1737 encode(quota_max_bytes, bl);
1738 encode(quota_max_objects, bl);
1739 encode(tiers, bl);
1740 encode(tier_of, bl);
7c673cae 1741 __u8 c = cache_mode;
11fdf7f2
TL
1742 encode(c, bl);
1743 encode(read_tier, bl);
1744 encode(write_tier, bl);
1745 encode(properties, bl);
1746 encode(hit_set_params, bl);
1747 encode(hit_set_period, bl);
1748 encode(hit_set_count, bl);
1749 encode(stripe_width, bl);
1750 encode(target_max_bytes, bl);
1751 encode(target_max_objects, bl);
1752 encode(cache_target_dirty_ratio_micro, bl);
1753 encode(cache_target_full_ratio_micro, bl);
1754 encode(cache_min_flush_age, bl);
1755 encode(cache_min_evict_age, bl);
1756 encode(erasure_code_profile, bl);
7c673cae
FG
1757 ENCODE_FINISH(bl);
1758 return;
1759 }
1760
11fdf7f2 1761 uint8_t v = 29;
28e407b8
AA
1762 // NOTE: any new encoding dependencies must be reflected by
1763 // SIGNIFICANT_FEATURES
7c673cae
FG
1764 if (!(features & CEPH_FEATURE_NEW_OSDOP_ENCODING)) {
1765 // this was the first post-hammer thing we added; if it's missing, encode
1766 // like hammer.
1767 v = 21;
94b18763 1768 } else if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
7c673cae 1769 v = 24;
11fdf7f2
TL
1770 } else if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
1771 v = 26;
1772 } else if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
1773 v = 27;
7c673cae
FG
1774 }
1775
1776 ENCODE_START(v, 5, bl);
11fdf7f2
TL
1777 encode(type, bl);
1778 encode(size, bl);
1779 encode(crush_rule, bl);
1780 encode(object_hash, bl);
1781 encode(pg_num, bl);
1782 encode(pgp_num, bl);
7c673cae 1783 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
11fdf7f2
TL
1784 encode(lpg_num, bl);
1785 encode(lpgp_num, bl);
1786 encode(last_change, bl);
1787 encode(snap_seq, bl);
1788 encode(snap_epoch, bl);
1789 encode(snaps, bl, features);
1790 encode(removed_snaps, bl);
1791 encode(auid, bl);
1792 if (v >= 27) {
1793 encode(flags, bl);
1794 } else {
1795 auto tmp = flags;
1796 tmp &= ~(FLAG_SELFMANAGED_SNAPS | FLAG_POOL_SNAPS | FLAG_CREATING);
1797 encode(tmp, bl);
1798 }
1799 encode((uint32_t)0, bl); // crash_replay_interval
1800 encode(min_size, bl);
1801 encode(quota_max_bytes, bl);
1802 encode(quota_max_objects, bl);
1803 encode(tiers, bl);
1804 encode(tier_of, bl);
7c673cae 1805 __u8 c = cache_mode;
11fdf7f2
TL
1806 encode(c, bl);
1807 encode(read_tier, bl);
1808 encode(write_tier, bl);
1809 encode(properties, bl);
1810 encode(hit_set_params, bl);
1811 encode(hit_set_period, bl);
1812 encode(hit_set_count, bl);
1813 encode(stripe_width, bl);
1814 encode(target_max_bytes, bl);
1815 encode(target_max_objects, bl);
1816 encode(cache_target_dirty_ratio_micro, bl);
1817 encode(cache_target_full_ratio_micro, bl);
1818 encode(cache_min_flush_age, bl);
1819 encode(cache_min_evict_age, bl);
1820 encode(erasure_code_profile, bl);
1821 encode(last_force_op_resend_preluminous, bl);
1822 encode(min_read_recency_for_promote, bl);
1823 encode(expected_num_objects, bl);
7c673cae 1824 if (v >= 19) {
11fdf7f2 1825 encode(cache_target_dirty_high_ratio_micro, bl);
7c673cae
FG
1826 }
1827 if (v >= 20) {
11fdf7f2 1828 encode(min_write_recency_for_promote, bl);
7c673cae
FG
1829 }
1830 if (v >= 21) {
11fdf7f2 1831 encode(use_gmt_hitset, bl);
7c673cae
FG
1832 }
1833 if (v >= 22) {
11fdf7f2 1834 encode(fast_read, bl);
7c673cae
FG
1835 }
1836 if (v >= 23) {
11fdf7f2
TL
1837 encode(hit_set_grade_decay_rate, bl);
1838 encode(hit_set_search_last_n, bl);
7c673cae
FG
1839 }
1840 if (v >= 24) {
11fdf7f2 1841 encode(opts, bl, features);
7c673cae
FG
1842 }
1843 if (v >= 25) {
11fdf7f2 1844 encode(last_force_op_resend_prenautilus, bl);
7c673cae 1845 }
c07f9fc5 1846 if (v >= 26) {
11fdf7f2
TL
1847 encode(application_metadata, bl);
1848 }
1849 if (v >= 27) {
1850 encode(create_time, bl);
1851 }
1852 if (v >= 28) {
1853 encode(pg_num_target, bl);
1854 encode(pgp_num_target, bl);
1855 encode(pg_num_pending, bl);
1856 encode((epoch_t)0, bl); // pg_num_dec_last_epoch_started from 14.1.[01]
1857 encode((epoch_t)0, bl); // pg_num_dec_last_epoch_clean from 14.1.[01]
1858 encode(last_force_op_resend, bl);
1859 encode(pg_autoscale_mode, bl);
1860 }
1861 if (v >= 29) {
1862 encode(last_pg_merge_meta, bl);
c07f9fc5 1863 }
7c673cae
FG
1864 ENCODE_FINISH(bl);
1865}
1866
11fdf7f2 1867void pg_pool_t::decode(bufferlist::const_iterator& bl)
7c673cae 1868{
11fdf7f2
TL
1869 DECODE_START_LEGACY_COMPAT_LEN(29, 5, 5, bl);
1870 decode(type, bl);
1871 decode(size, bl);
1872 decode(crush_rule, bl);
1873 decode(object_hash, bl);
1874 decode(pg_num, bl);
1875 decode(pgp_num, bl);
7c673cae
FG
1876 {
1877 __u32 lpg_num, lpgp_num;
11fdf7f2
TL
1878 decode(lpg_num, bl);
1879 decode(lpgp_num, bl);
7c673cae 1880 }
11fdf7f2
TL
1881 decode(last_change, bl);
1882 decode(snap_seq, bl);
1883 decode(snap_epoch, bl);
7c673cae
FG
1884
1885 if (struct_v >= 3) {
11fdf7f2
TL
1886 decode(snaps, bl);
1887 decode(removed_snaps, bl);
1888 decode(auid, bl);
7c673cae
FG
1889 } else {
1890 __u32 n, m;
11fdf7f2
TL
1891 decode(n, bl);
1892 decode(m, bl);
1893 decode(auid, bl);
1894 decode_nohead(n, snaps, bl);
1895 decode_nohead(m, removed_snaps, bl);
7c673cae
FG
1896 }
1897
1898 if (struct_v >= 4) {
11fdf7f2
TL
1899 decode(flags, bl);
1900 uint32_t crash_replay_interval;
1901 decode(crash_replay_interval, bl);
7c673cae
FG
1902 } else {
1903 flags = 0;
11fdf7f2
TL
1904 }
1905 // upgrade path for selfmanaged vs pool snaps
1906 if (snap_seq > 0 && (flags & (FLAG_SELFMANAGED_SNAPS|FLAG_POOL_SNAPS)) == 0) {
1907 if (!removed_snaps.empty()) {
1908 flags |= FLAG_SELFMANAGED_SNAPS;
1909 } else {
1910 flags |= FLAG_POOL_SNAPS;
1911 }
7c673cae
FG
1912 }
1913 if (struct_v >= 7) {
11fdf7f2 1914 decode(min_size, bl);
7c673cae
FG
1915 } else {
1916 min_size = size - size/2;
1917 }
1918 if (struct_v >= 8) {
11fdf7f2
TL
1919 decode(quota_max_bytes, bl);
1920 decode(quota_max_objects, bl);
7c673cae
FG
1921 }
1922 if (struct_v >= 9) {
11fdf7f2
TL
1923 decode(tiers, bl);
1924 decode(tier_of, bl);
7c673cae 1925 __u8 v;
11fdf7f2 1926 decode(v, bl);
7c673cae 1927 cache_mode = (cache_mode_t)v;
11fdf7f2
TL
1928 decode(read_tier, bl);
1929 decode(write_tier, bl);
7c673cae
FG
1930 }
1931 if (struct_v >= 10) {
11fdf7f2 1932 decode(properties, bl);
7c673cae
FG
1933 }
1934 if (struct_v >= 11) {
11fdf7f2
TL
1935 decode(hit_set_params, bl);
1936 decode(hit_set_period, bl);
1937 decode(hit_set_count, bl);
7c673cae
FG
1938 } else {
1939 pg_pool_t def;
1940 hit_set_period = def.hit_set_period;
1941 hit_set_count = def.hit_set_count;
1942 }
1943 if (struct_v >= 12) {
11fdf7f2 1944 decode(stripe_width, bl);
7c673cae
FG
1945 } else {
1946 set_stripe_width(0);
1947 }
1948 if (struct_v >= 13) {
11fdf7f2
TL
1949 decode(target_max_bytes, bl);
1950 decode(target_max_objects, bl);
1951 decode(cache_target_dirty_ratio_micro, bl);
1952 decode(cache_target_full_ratio_micro, bl);
1953 decode(cache_min_flush_age, bl);
1954 decode(cache_min_evict_age, bl);
7c673cae
FG
1955 } else {
1956 target_max_bytes = 0;
1957 target_max_objects = 0;
1958 cache_target_dirty_ratio_micro = 0;
1959 cache_target_full_ratio_micro = 0;
1960 cache_min_flush_age = 0;
1961 cache_min_evict_age = 0;
1962 }
1963 if (struct_v >= 14) {
11fdf7f2 1964 decode(erasure_code_profile, bl);
7c673cae
FG
1965 }
1966 if (struct_v >= 15) {
11fdf7f2 1967 decode(last_force_op_resend_preluminous, bl);
7c673cae
FG
1968 } else {
1969 last_force_op_resend_preluminous = 0;
1970 }
1971 if (struct_v >= 16) {
11fdf7f2 1972 decode(min_read_recency_for_promote, bl);
7c673cae
FG
1973 } else {
1974 min_read_recency_for_promote = 1;
1975 }
1976 if (struct_v >= 17) {
11fdf7f2 1977 decode(expected_num_objects, bl);
7c673cae
FG
1978 } else {
1979 expected_num_objects = 0;
1980 }
1981 if (struct_v >= 19) {
11fdf7f2 1982 decode(cache_target_dirty_high_ratio_micro, bl);
7c673cae
FG
1983 } else {
1984 cache_target_dirty_high_ratio_micro = cache_target_dirty_ratio_micro;
1985 }
1986 if (struct_v >= 20) {
11fdf7f2 1987 decode(min_write_recency_for_promote, bl);
7c673cae
FG
1988 } else {
1989 min_write_recency_for_promote = 1;
1990 }
1991 if (struct_v >= 21) {
11fdf7f2 1992 decode(use_gmt_hitset, bl);
7c673cae
FG
1993 } else {
1994 use_gmt_hitset = false;
1995 }
1996 if (struct_v >= 22) {
11fdf7f2 1997 decode(fast_read, bl);
7c673cae
FG
1998 } else {
1999 fast_read = false;
2000 }
2001 if (struct_v >= 23) {
11fdf7f2
TL
2002 decode(hit_set_grade_decay_rate, bl);
2003 decode(hit_set_search_last_n, bl);
7c673cae
FG
2004 } else {
2005 hit_set_grade_decay_rate = 0;
2006 hit_set_search_last_n = 1;
2007 }
2008 if (struct_v >= 24) {
11fdf7f2 2009 decode(opts, bl);
7c673cae
FG
2010 }
2011 if (struct_v >= 25) {
11fdf7f2 2012 decode(last_force_op_resend_prenautilus, bl);
7c673cae 2013 } else {
11fdf7f2 2014 last_force_op_resend_prenautilus = last_force_op_resend_preluminous;
7c673cae 2015 }
c07f9fc5 2016 if (struct_v >= 26) {
11fdf7f2
TL
2017 decode(application_metadata, bl);
2018 }
2019 if (struct_v >= 27) {
2020 decode(create_time, bl);
2021 }
2022 if (struct_v >= 28) {
2023 decode(pg_num_target, bl);
2024 decode(pgp_num_target, bl);
2025 decode(pg_num_pending, bl);
2026 epoch_t old_merge_last_epoch_clean, old_merge_last_epoch_started;
2027 decode(old_merge_last_epoch_started, bl);
2028 decode(old_merge_last_epoch_clean, bl);
2029 decode(last_force_op_resend, bl);
2030 decode(pg_autoscale_mode, bl);
2031 if (struct_v >= 29) {
2032 decode(last_pg_merge_meta, bl);
2033 } else {
2034 last_pg_merge_meta.last_epoch_clean = old_merge_last_epoch_clean;
2035 last_pg_merge_meta.last_epoch_started = old_merge_last_epoch_started;
2036 }
2037 } else {
2038 pg_num_target = pg_num;
2039 pgp_num_target = pgp_num;
2040 pg_num_pending = pg_num;
2041 last_force_op_resend = last_force_op_resend_prenautilus;
2042 pg_autoscale_mode = PG_AUTOSCALE_MODE_WARN; // default to warn on upgrade
c07f9fc5 2043 }
7c673cae
FG
2044 DECODE_FINISH(bl);
2045 calc_pg_masks();
2046 calc_grade_table();
2047}
2048
2049void pg_pool_t::generate_test_instances(list<pg_pool_t*>& o)
2050{
2051 pg_pool_t a;
2052 o.push_back(new pg_pool_t(a));
2053
11fdf7f2 2054 a.create_time = utime_t(4,5);
7c673cae
FG
2055 a.type = TYPE_REPLICATED;
2056 a.size = 2;
31f18b77 2057 a.crush_rule = 3;
7c673cae
FG
2058 a.object_hash = 4;
2059 a.pg_num = 6;
11fdf7f2
TL
2060 a.pgp_num = 4;
2061 a.pgp_num_target = 4;
2062 a.pg_num_target = 5;
2063 a.pg_num_pending = 5;
2064 a.last_pg_merge_meta.last_epoch_started = 2;
2065 a.last_pg_merge_meta.last_epoch_clean = 2;
7c673cae
FG
2066 a.last_change = 9;
2067 a.last_force_op_resend = 123823;
2068 a.last_force_op_resend_preluminous = 123824;
2069 a.snap_seq = 10;
2070 a.snap_epoch = 11;
11fdf7f2 2071 a.flags = FLAG_POOL_SNAPS;
7c673cae 2072 a.auid = 12;
7c673cae
FG
2073 a.quota_max_bytes = 473;
2074 a.quota_max_objects = 474;
2075 o.push_back(new pg_pool_t(a));
2076
2077 a.snaps[3].name = "asdf";
2078 a.snaps[3].snapid = 3;
2079 a.snaps[3].stamp = utime_t(123, 4);
2080 a.snaps[6].name = "qwer";
2081 a.snaps[6].snapid = 6;
2082 a.snaps[6].stamp = utime_t(23423, 4);
2083 o.push_back(new pg_pool_t(a));
2084
11fdf7f2
TL
2085 a.flags = FLAG_SELFMANAGED_SNAPS;
2086 a.snaps.clear();
2087 a.removed_snaps.insert(2);
7c673cae
FG
2088 a.quota_max_bytes = 2473;
2089 a.quota_max_objects = 4374;
2090 a.tiers.insert(0);
2091 a.tiers.insert(1);
2092 a.tier_of = 2;
2093 a.cache_mode = CACHEMODE_WRITEBACK;
2094 a.read_tier = 1;
2095 a.write_tier = 1;
2096 a.hit_set_params = HitSet::Params(new BloomHitSet::Params);
2097 a.hit_set_period = 3600;
2098 a.hit_set_count = 8;
2099 a.min_read_recency_for_promote = 1;
2100 a.min_write_recency_for_promote = 1;
2101 a.hit_set_grade_decay_rate = 50;
2102 a.hit_set_search_last_n = 1;
2103 a.calc_grade_table();
2104 a.set_stripe_width(12345);
2105 a.target_max_bytes = 1238132132;
2106 a.target_max_objects = 1232132;
2107 a.cache_target_dirty_ratio_micro = 187232;
2108 a.cache_target_dirty_high_ratio_micro = 309856;
2109 a.cache_target_full_ratio_micro = 987222;
2110 a.cache_min_flush_age = 231;
2111 a.cache_min_evict_age = 2321;
2112 a.erasure_code_profile = "profile in osdmap";
2113 a.expected_num_objects = 123456;
2114 a.fast_read = false;
c07f9fc5 2115 a.application_metadata = {{"rbd", {{"key", "value"}}}};
7c673cae
FG
2116 o.push_back(new pg_pool_t(a));
2117}
2118
2119ostream& operator<<(ostream& out, const pg_pool_t& p)
2120{
2121 out << p.get_type_name()
2122 << " size " << p.get_size()
2123 << " min_size " << p.get_min_size()
31f18b77 2124 << " crush_rule " << p.get_crush_rule()
7c673cae
FG
2125 << " object_hash " << p.get_object_hash_name()
2126 << " pg_num " << p.get_pg_num()
11fdf7f2
TL
2127 << " pgp_num " << p.get_pgp_num();
2128 if (p.get_pg_num_target() != p.get_pg_num()) {
2129 out << " pg_num_target " << p.get_pg_num_target();
2130 }
2131 if (p.get_pgp_num_target() != p.get_pgp_num()) {
2132 out << " pgp_num_target " << p.get_pgp_num_target();
2133 }
2134 if (p.get_pg_num_pending() != p.get_pg_num()) {
2135 out << " pg_num_pending " << p.get_pg_num_pending();
2136 }
2137 if (p.pg_autoscale_mode) {
2138 out << " autoscale_mode " << p.get_pg_autoscale_mode_name(p.pg_autoscale_mode);
2139 }
2140 out << " last_change " << p.get_last_change();
7c673cae 2141 if (p.get_last_force_op_resend() ||
11fdf7f2 2142 p.get_last_force_op_resend_prenautilus() ||
7c673cae
FG
2143 p.get_last_force_op_resend_preluminous())
2144 out << " lfor " << p.get_last_force_op_resend() << "/"
11fdf7f2 2145 << p.get_last_force_op_resend_prenautilus() << "/"
7c673cae
FG
2146 << p.get_last_force_op_resend_preluminous();
2147 if (p.get_auid())
2148 out << " owner " << p.get_auid();
2149 if (p.flags)
2150 out << " flags " << p.get_flags_string();
7c673cae
FG
2151 if (p.quota_max_bytes)
2152 out << " max_bytes " << p.quota_max_bytes;
2153 if (p.quota_max_objects)
2154 out << " max_objects " << p.quota_max_objects;
2155 if (!p.tiers.empty())
2156 out << " tiers " << p.tiers;
2157 if (p.is_tier())
2158 out << " tier_of " << p.tier_of;
2159 if (p.has_read_tier())
2160 out << " read_tier " << p.read_tier;
2161 if (p.has_write_tier())
2162 out << " write_tier " << p.write_tier;
2163 if (p.cache_mode)
2164 out << " cache_mode " << p.get_cache_mode_name();
2165 if (p.target_max_bytes)
2166 out << " target_bytes " << p.target_max_bytes;
2167 if (p.target_max_objects)
2168 out << " target_objects " << p.target_max_objects;
2169 if (p.hit_set_params.get_type() != HitSet::TYPE_NONE) {
2170 out << " hit_set " << p.hit_set_params
2171 << " " << p.hit_set_period << "s"
2172 << " x" << p.hit_set_count << " decay_rate "
2173 << p.hit_set_grade_decay_rate
2174 << " search_last_n " << p.hit_set_search_last_n;
2175 }
2176 if (p.min_read_recency_for_promote)
2177 out << " min_read_recency_for_promote " << p.min_read_recency_for_promote;
2178 if (p.min_write_recency_for_promote)
2179 out << " min_write_recency_for_promote " << p.min_write_recency_for_promote;
2180 out << " stripe_width " << p.get_stripe_width();
2181 if (p.expected_num_objects)
2182 out << " expected_num_objects " << p.expected_num_objects;
2183 if (p.fast_read)
2184 out << " fast_read " << p.fast_read;
2185 out << p.opts;
c07f9fc5
FG
2186 if (!p.application_metadata.empty()) {
2187 out << " application ";
2188 for (auto it = p.application_metadata.begin();
2189 it != p.application_metadata.end(); ++it) {
2190 if (it != p.application_metadata.begin())
2191 out << ",";
2192 out << it->first;
2193 }
2194 }
7c673cae
FG
2195 return out;
2196}
2197
2198
2199// -- object_stat_sum_t --
2200
2201void object_stat_sum_t::dump(Formatter *f) const
2202{
2203 f->dump_int("num_bytes", num_bytes);
2204 f->dump_int("num_objects", num_objects);
2205 f->dump_int("num_object_clones", num_object_clones);
2206 f->dump_int("num_object_copies", num_object_copies);
2207 f->dump_int("num_objects_missing_on_primary", num_objects_missing_on_primary);
2208 f->dump_int("num_objects_missing", num_objects_missing);
2209 f->dump_int("num_objects_degraded", num_objects_degraded);
2210 f->dump_int("num_objects_misplaced", num_objects_misplaced);
2211 f->dump_int("num_objects_unfound", num_objects_unfound);
2212 f->dump_int("num_objects_dirty", num_objects_dirty);
2213 f->dump_int("num_whiteouts", num_whiteouts);
2214 f->dump_int("num_read", num_rd);
2215 f->dump_int("num_read_kb", num_rd_kb);
2216 f->dump_int("num_write", num_wr);
2217 f->dump_int("num_write_kb", num_wr_kb);
2218 f->dump_int("num_scrub_errors", num_scrub_errors);
2219 f->dump_int("num_shallow_scrub_errors", num_shallow_scrub_errors);
2220 f->dump_int("num_deep_scrub_errors", num_deep_scrub_errors);
2221 f->dump_int("num_objects_recovered", num_objects_recovered);
2222 f->dump_int("num_bytes_recovered", num_bytes_recovered);
2223 f->dump_int("num_keys_recovered", num_keys_recovered);
2224 f->dump_int("num_objects_omap", num_objects_omap);
2225 f->dump_int("num_objects_hit_set_archive", num_objects_hit_set_archive);
2226 f->dump_int("num_bytes_hit_set_archive", num_bytes_hit_set_archive);
2227 f->dump_int("num_flush", num_flush);
2228 f->dump_int("num_flush_kb", num_flush_kb);
2229 f->dump_int("num_evict", num_evict);
2230 f->dump_int("num_evict_kb", num_evict_kb);
2231 f->dump_int("num_promote", num_promote);
2232 f->dump_int("num_flush_mode_high", num_flush_mode_high);
2233 f->dump_int("num_flush_mode_low", num_flush_mode_low);
2234 f->dump_int("num_evict_mode_some", num_evict_mode_some);
2235 f->dump_int("num_evict_mode_full", num_evict_mode_full);
2236 f->dump_int("num_objects_pinned", num_objects_pinned);
2237 f->dump_int("num_legacy_snapsets", num_legacy_snapsets);
28e407b8 2238 f->dump_int("num_large_omap_objects", num_large_omap_objects);
11fdf7f2
TL
2239 f->dump_int("num_objects_manifest", num_objects_manifest);
2240 f->dump_int("num_omap_bytes", num_omap_bytes);
2241 f->dump_int("num_omap_keys", num_omap_keys);
2242 f->dump_int("num_objects_repaired", num_objects_repaired);
7c673cae
FG
2243}
2244
2245void object_stat_sum_t::encode(bufferlist& bl) const
2246{
11fdf7f2 2247 ENCODE_START(20, 14, bl);
7c673cae
FG
2248#if defined(CEPH_LITTLE_ENDIAN)
2249 bl.append((char *)(&num_bytes), sizeof(object_stat_sum_t));
2250#else
11fdf7f2
TL
2251 encode(num_bytes, bl);
2252 encode(num_objects, bl);
2253 encode(num_object_clones, bl);
2254 encode(num_object_copies, bl);
2255 encode(num_objects_missing_on_primary, bl);
2256 encode(num_objects_degraded, bl);
2257 encode(num_objects_unfound, bl);
2258 encode(num_rd, bl);
2259 encode(num_rd_kb, bl);
2260 encode(num_wr, bl);
2261 encode(num_wr_kb, bl);
2262 encode(num_scrub_errors, bl);
2263 encode(num_objects_recovered, bl);
2264 encode(num_bytes_recovered, bl);
2265 encode(num_keys_recovered, bl);
2266 encode(num_shallow_scrub_errors, bl);
2267 encode(num_deep_scrub_errors, bl);
2268 encode(num_objects_dirty, bl);
2269 encode(num_whiteouts, bl);
2270 encode(num_objects_omap, bl);
2271 encode(num_objects_hit_set_archive, bl);
2272 encode(num_objects_misplaced, bl);
2273 encode(num_bytes_hit_set_archive, bl);
2274 encode(num_flush, bl);
2275 encode(num_flush_kb, bl);
2276 encode(num_evict, bl);
2277 encode(num_evict_kb, bl);
2278 encode(num_promote, bl);
2279 encode(num_flush_mode_high, bl);
2280 encode(num_flush_mode_low, bl);
2281 encode(num_evict_mode_some, bl);
2282 encode(num_evict_mode_full, bl);
2283 encode(num_objects_pinned, bl);
2284 encode(num_objects_missing, bl);
2285 encode(num_legacy_snapsets, bl);
2286 encode(num_large_omap_objects, bl);
2287 encode(num_objects_manifest, bl);
2288 encode(num_omap_bytes, bl);
2289 encode(num_omap_keys, bl);
2290 encode(num_objects_repaired, bl);
7c673cae
FG
2291#endif
2292 ENCODE_FINISH(bl);
2293}
2294
11fdf7f2 2295void object_stat_sum_t::decode(bufferlist::const_iterator& bl)
7c673cae
FG
2296{
2297 bool decode_finish = false;
11fdf7f2
TL
2298 static const int STAT_SUM_DECODE_VERSION = 20;
2299 DECODE_START(STAT_SUM_DECODE_VERSION, bl);
7c673cae 2300#if defined(CEPH_LITTLE_ENDIAN)
11fdf7f2 2301 if (struct_v == STAT_SUM_DECODE_VERSION) {
7c673cae
FG
2302 bl.copy(sizeof(object_stat_sum_t), (char*)(&num_bytes));
2303 decode_finish = true;
2304 }
2305#endif
2306 if (!decode_finish) {
11fdf7f2
TL
2307 decode(num_bytes, bl);
2308 decode(num_objects, bl);
2309 decode(num_object_clones, bl);
2310 decode(num_object_copies, bl);
2311 decode(num_objects_missing_on_primary, bl);
2312 decode(num_objects_degraded, bl);
2313 decode(num_objects_unfound, bl);
2314 decode(num_rd, bl);
2315 decode(num_rd_kb, bl);
2316 decode(num_wr, bl);
2317 decode(num_wr_kb, bl);
2318 decode(num_scrub_errors, bl);
2319 decode(num_objects_recovered, bl);
2320 decode(num_bytes_recovered, bl);
2321 decode(num_keys_recovered, bl);
2322 decode(num_shallow_scrub_errors, bl);
2323 decode(num_deep_scrub_errors, bl);
2324 decode(num_objects_dirty, bl);
2325 decode(num_whiteouts, bl);
2326 decode(num_objects_omap, bl);
2327 decode(num_objects_hit_set_archive, bl);
2328 decode(num_objects_misplaced, bl);
2329 decode(num_bytes_hit_set_archive, bl);
2330 decode(num_flush, bl);
2331 decode(num_flush_kb, bl);
2332 decode(num_evict, bl);
2333 decode(num_evict_kb, bl);
2334 decode(num_promote, bl);
2335 decode(num_flush_mode_high, bl);
2336 decode(num_flush_mode_low, bl);
2337 decode(num_evict_mode_some, bl);
2338 decode(num_evict_mode_full, bl);
2339 decode(num_objects_pinned, bl);
2340 decode(num_objects_missing, bl);
7c673cae 2341 if (struct_v >= 16) {
11fdf7f2 2342 decode(num_legacy_snapsets, bl);
7c673cae
FG
2343 } else {
2344 num_legacy_snapsets = num_object_clones; // upper bound
2345 }
28e407b8 2346 if (struct_v >= 17) {
11fdf7f2
TL
2347 decode(num_large_omap_objects, bl);
2348 }
2349 if (struct_v >= 18) {
2350 decode(num_objects_manifest, bl);
2351 }
2352 if (struct_v >= 19) {
2353 decode(num_omap_bytes, bl);
2354 decode(num_omap_keys, bl);
2355 }
2356 if (struct_v >= 20) {
2357 decode(num_objects_repaired, bl);
28e407b8 2358 }
7c673cae
FG
2359 }
2360 DECODE_FINISH(bl);
2361}
2362
2363void object_stat_sum_t::generate_test_instances(list<object_stat_sum_t*>& o)
2364{
2365 object_stat_sum_t a;
2366
2367 a.num_bytes = 1;
2368 a.num_objects = 3;
2369 a.num_object_clones = 4;
2370 a.num_object_copies = 5;
2371 a.num_objects_missing_on_primary = 6;
2372 a.num_objects_missing = 123;
2373 a.num_objects_degraded = 7;
2374 a.num_objects_unfound = 8;
2375 a.num_rd = 9; a.num_rd_kb = 10;
2376 a.num_wr = 11; a.num_wr_kb = 12;
2377 a.num_objects_recovered = 14;
2378 a.num_bytes_recovered = 15;
2379 a.num_keys_recovered = 16;
2380 a.num_deep_scrub_errors = 17;
2381 a.num_shallow_scrub_errors = 18;
2382 a.num_scrub_errors = a.num_deep_scrub_errors + a.num_shallow_scrub_errors;
2383 a.num_objects_dirty = 21;
2384 a.num_whiteouts = 22;
2385 a.num_objects_misplaced = 1232;
2386 a.num_objects_hit_set_archive = 2;
2387 a.num_bytes_hit_set_archive = 27;
2388 a.num_flush = 5;
2389 a.num_flush_kb = 6;
2390 a.num_evict = 7;
2391 a.num_evict_kb = 8;
2392 a.num_promote = 9;
2393 a.num_flush_mode_high = 0;
2394 a.num_flush_mode_low = 1;
2395 a.num_evict_mode_some = 1;
2396 a.num_evict_mode_full = 0;
2397 a.num_objects_pinned = 20;
28e407b8 2398 a.num_large_omap_objects = 5;
11fdf7f2
TL
2399 a.num_objects_manifest = 2;
2400 a.num_omap_bytes = 20000;
2401 a.num_omap_keys = 200;
2402 a.num_objects_repaired = 300;
7c673cae
FG
2403 o.push_back(new object_stat_sum_t(a));
2404}
2405
2406void object_stat_sum_t::add(const object_stat_sum_t& o)
2407{
2408 num_bytes += o.num_bytes;
2409 num_objects += o.num_objects;
2410 num_object_clones += o.num_object_clones;
2411 num_object_copies += o.num_object_copies;
2412 num_objects_missing_on_primary += o.num_objects_missing_on_primary;
2413 num_objects_missing += o.num_objects_missing;
2414 num_objects_degraded += o.num_objects_degraded;
2415 num_objects_misplaced += o.num_objects_misplaced;
2416 num_rd += o.num_rd;
2417 num_rd_kb += o.num_rd_kb;
2418 num_wr += o.num_wr;
2419 num_wr_kb += o.num_wr_kb;
2420 num_objects_unfound += o.num_objects_unfound;
2421 num_scrub_errors += o.num_scrub_errors;
2422 num_shallow_scrub_errors += o.num_shallow_scrub_errors;
2423 num_deep_scrub_errors += o.num_deep_scrub_errors;
2424 num_objects_recovered += o.num_objects_recovered;
2425 num_bytes_recovered += o.num_bytes_recovered;
2426 num_keys_recovered += o.num_keys_recovered;
2427 num_objects_dirty += o.num_objects_dirty;
2428 num_whiteouts += o.num_whiteouts;
2429 num_objects_omap += o.num_objects_omap;
2430 num_objects_hit_set_archive += o.num_objects_hit_set_archive;
2431 num_bytes_hit_set_archive += o.num_bytes_hit_set_archive;
2432 num_flush += o.num_flush;
2433 num_flush_kb += o.num_flush_kb;
2434 num_evict += o.num_evict;
2435 num_evict_kb += o.num_evict_kb;
2436 num_promote += o.num_promote;
2437 num_flush_mode_high += o.num_flush_mode_high;
2438 num_flush_mode_low += o.num_flush_mode_low;
2439 num_evict_mode_some += o.num_evict_mode_some;
2440 num_evict_mode_full += o.num_evict_mode_full;
2441 num_objects_pinned += o.num_objects_pinned;
2442 num_legacy_snapsets += o.num_legacy_snapsets;
28e407b8 2443 num_large_omap_objects += o.num_large_omap_objects;
11fdf7f2
TL
2444 num_objects_manifest += o.num_objects_manifest;
2445 num_omap_bytes += o.num_omap_bytes;
2446 num_omap_keys += o.num_omap_keys;
2447 num_objects_repaired += o.num_objects_repaired;
7c673cae
FG
2448}
2449
2450void object_stat_sum_t::sub(const object_stat_sum_t& o)
2451{
2452 num_bytes -= o.num_bytes;
2453 num_objects -= o.num_objects;
2454 num_object_clones -= o.num_object_clones;
2455 num_object_copies -= o.num_object_copies;
2456 num_objects_missing_on_primary -= o.num_objects_missing_on_primary;
2457 num_objects_missing -= o.num_objects_missing;
2458 num_objects_degraded -= o.num_objects_degraded;
2459 num_objects_misplaced -= o.num_objects_misplaced;
2460 num_rd -= o.num_rd;
2461 num_rd_kb -= o.num_rd_kb;
2462 num_wr -= o.num_wr;
2463 num_wr_kb -= o.num_wr_kb;
2464 num_objects_unfound -= o.num_objects_unfound;
2465 num_scrub_errors -= o.num_scrub_errors;
2466 num_shallow_scrub_errors -= o.num_shallow_scrub_errors;
2467 num_deep_scrub_errors -= o.num_deep_scrub_errors;
2468 num_objects_recovered -= o.num_objects_recovered;
2469 num_bytes_recovered -= o.num_bytes_recovered;
2470 num_keys_recovered -= o.num_keys_recovered;
2471 num_objects_dirty -= o.num_objects_dirty;
2472 num_whiteouts -= o.num_whiteouts;
2473 num_objects_omap -= o.num_objects_omap;
2474 num_objects_hit_set_archive -= o.num_objects_hit_set_archive;
2475 num_bytes_hit_set_archive -= o.num_bytes_hit_set_archive;
2476 num_flush -= o.num_flush;
2477 num_flush_kb -= o.num_flush_kb;
2478 num_evict -= o.num_evict;
2479 num_evict_kb -= o.num_evict_kb;
2480 num_promote -= o.num_promote;
2481 num_flush_mode_high -= o.num_flush_mode_high;
2482 num_flush_mode_low -= o.num_flush_mode_low;
2483 num_evict_mode_some -= o.num_evict_mode_some;
2484 num_evict_mode_full -= o.num_evict_mode_full;
2485 num_objects_pinned -= o.num_objects_pinned;
2486 num_legacy_snapsets -= o.num_legacy_snapsets;
28e407b8 2487 num_large_omap_objects -= o.num_large_omap_objects;
11fdf7f2
TL
2488 num_objects_manifest -= o.num_objects_manifest;
2489 num_omap_bytes -= o.num_omap_bytes;
2490 num_omap_keys -= o.num_omap_keys;
2491 num_objects_repaired -= o.num_objects_repaired;
7c673cae
FG
2492}
2493
2494bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r)
2495{
2496 return
2497 l.num_bytes == r.num_bytes &&
2498 l.num_objects == r.num_objects &&
2499 l.num_object_clones == r.num_object_clones &&
2500 l.num_object_copies == r.num_object_copies &&
2501 l.num_objects_missing_on_primary == r.num_objects_missing_on_primary &&
2502 l.num_objects_missing == r.num_objects_missing &&
2503 l.num_objects_degraded == r.num_objects_degraded &&
2504 l.num_objects_misplaced == r.num_objects_misplaced &&
2505 l.num_objects_unfound == r.num_objects_unfound &&
2506 l.num_rd == r.num_rd &&
2507 l.num_rd_kb == r.num_rd_kb &&
2508 l.num_wr == r.num_wr &&
2509 l.num_wr_kb == r.num_wr_kb &&
2510 l.num_scrub_errors == r.num_scrub_errors &&
2511 l.num_shallow_scrub_errors == r.num_shallow_scrub_errors &&
2512 l.num_deep_scrub_errors == r.num_deep_scrub_errors &&
2513 l.num_objects_recovered == r.num_objects_recovered &&
2514 l.num_bytes_recovered == r.num_bytes_recovered &&
2515 l.num_keys_recovered == r.num_keys_recovered &&
2516 l.num_objects_dirty == r.num_objects_dirty &&
2517 l.num_whiteouts == r.num_whiteouts &&
2518 l.num_objects_omap == r.num_objects_omap &&
2519 l.num_objects_hit_set_archive == r.num_objects_hit_set_archive &&
2520 l.num_bytes_hit_set_archive == r.num_bytes_hit_set_archive &&
2521 l.num_flush == r.num_flush &&
2522 l.num_flush_kb == r.num_flush_kb &&
2523 l.num_evict == r.num_evict &&
2524 l.num_evict_kb == r.num_evict_kb &&
2525 l.num_promote == r.num_promote &&
2526 l.num_flush_mode_high == r.num_flush_mode_high &&
2527 l.num_flush_mode_low == r.num_flush_mode_low &&
2528 l.num_evict_mode_some == r.num_evict_mode_some &&
2529 l.num_evict_mode_full == r.num_evict_mode_full &&
2530 l.num_objects_pinned == r.num_objects_pinned &&
28e407b8 2531 l.num_legacy_snapsets == r.num_legacy_snapsets &&
11fdf7f2
TL
2532 l.num_large_omap_objects == r.num_large_omap_objects &&
2533 l.num_objects_manifest == r.num_objects_manifest &&
2534 l.num_omap_bytes == r.num_omap_bytes &&
2535 l.num_omap_keys == r.num_omap_keys &&
2536 l.num_objects_repaired == r.num_objects_repaired;
7c673cae
FG
2537}
2538
2539// -- object_stat_collection_t --
2540
2541void object_stat_collection_t::dump(Formatter *f) const
2542{
2543 f->open_object_section("stat_sum");
2544 sum.dump(f);
2545 f->close_section();
2546}
2547
2548void object_stat_collection_t::encode(bufferlist& bl) const
2549{
2550 ENCODE_START(2, 2, bl);
11fdf7f2
TL
2551 encode(sum, bl);
2552 encode((__u32)0, bl);
7c673cae
FG
2553 ENCODE_FINISH(bl);
2554}
2555
11fdf7f2 2556void object_stat_collection_t::decode(bufferlist::const_iterator& bl)
7c673cae
FG
2557{
2558 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
11fdf7f2 2559 decode(sum, bl);
7c673cae
FG
2560 {
2561 map<string,object_stat_sum_t> cat_sum;
11fdf7f2 2562 decode(cat_sum, bl);
7c673cae
FG
2563 }
2564 DECODE_FINISH(bl);
2565}
2566
2567void object_stat_collection_t::generate_test_instances(list<object_stat_collection_t*>& o)
2568{
2569 object_stat_collection_t a;
2570 o.push_back(new object_stat_collection_t(a));
2571 list<object_stat_sum_t*> l;
2572 object_stat_sum_t::generate_test_instances(l);
2573 for (list<object_stat_sum_t*>::iterator p = l.begin(); p != l.end(); ++p) {
2574 a.add(**p);
2575 o.push_back(new object_stat_collection_t(a));
2576 }
2577}
2578
2579
2580// -- pg_stat_t --
2581
2582bool pg_stat_t::is_acting_osd(int32_t osd, bool primary) const
2583{
2584 if (primary && osd == acting_primary) {
2585 return true;
2586 } else if (!primary) {
2587 for(vector<int32_t>::const_iterator it = acting.begin();
2588 it != acting.end(); ++it)
2589 {
2590 if (*it == osd)
2591 return true;
2592 }
2593 }
2594 return false;
2595}
2596
2597void pg_stat_t::dump(Formatter *f) const
2598{
2599 f->dump_stream("version") << version;
2600 f->dump_stream("reported_seq") << reported_seq;
2601 f->dump_stream("reported_epoch") << reported_epoch;
2602 f->dump_string("state", pg_state_string(state));
2603 f->dump_stream("last_fresh") << last_fresh;
2604 f->dump_stream("last_change") << last_change;
2605 f->dump_stream("last_active") << last_active;
2606 f->dump_stream("last_peered") << last_peered;
2607 f->dump_stream("last_clean") << last_clean;
2608 f->dump_stream("last_became_active") << last_became_active;
2609 f->dump_stream("last_became_peered") << last_became_peered;
2610 f->dump_stream("last_unstale") << last_unstale;
2611 f->dump_stream("last_undegraded") << last_undegraded;
2612 f->dump_stream("last_fullsized") << last_fullsized;
2613 f->dump_unsigned("mapping_epoch", mapping_epoch);
2614 f->dump_stream("log_start") << log_start;
2615 f->dump_stream("ondisk_log_start") << ondisk_log_start;
2616 f->dump_unsigned("created", created);
2617 f->dump_unsigned("last_epoch_clean", last_epoch_clean);
2618 f->dump_stream("parent") << parent;
2619 f->dump_unsigned("parent_split_bits", parent_split_bits);
2620 f->dump_stream("last_scrub") << last_scrub;
2621 f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
2622 f->dump_stream("last_deep_scrub") << last_deep_scrub;
2623 f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
2624 f->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp;
2625 f->dump_int("log_size", log_size);
2626 f->dump_int("ondisk_log_size", ondisk_log_size);
2627 f->dump_bool("stats_invalid", stats_invalid);
2628 f->dump_bool("dirty_stats_invalid", dirty_stats_invalid);
2629 f->dump_bool("omap_stats_invalid", omap_stats_invalid);
2630 f->dump_bool("hitset_stats_invalid", hitset_stats_invalid);
2631 f->dump_bool("hitset_bytes_stats_invalid", hitset_bytes_stats_invalid);
2632 f->dump_bool("pin_stats_invalid", pin_stats_invalid);
11fdf7f2 2633 f->dump_bool("manifest_stats_invalid", manifest_stats_invalid);
b32b8144 2634 f->dump_unsigned("snaptrimq_len", snaptrimq_len);
7c673cae
FG
2635 stats.dump(f);
2636 f->open_array_section("up");
2637 for (vector<int32_t>::const_iterator p = up.begin(); p != up.end(); ++p)
2638 f->dump_int("osd", *p);
2639 f->close_section();
2640 f->open_array_section("acting");
2641 for (vector<int32_t>::const_iterator p = acting.begin(); p != acting.end(); ++p)
2642 f->dump_int("osd", *p);
2643 f->close_section();
2644 f->open_array_section("blocked_by");
2645 for (vector<int32_t>::const_iterator p = blocked_by.begin();
2646 p != blocked_by.end(); ++p)
2647 f->dump_int("osd", *p);
2648 f->close_section();
2649 f->dump_int("up_primary", up_primary);
2650 f->dump_int("acting_primary", acting_primary);
11fdf7f2
TL
2651 f->open_array_section("purged_snaps");
2652 for (interval_set<snapid_t>::const_iterator i = purged_snaps.begin();
2653 i != purged_snaps.end();
2654 ++i) {
2655 f->open_object_section("interval");
2656 f->dump_stream("start") << i.get_start();
2657 f->dump_stream("length") << i.get_len();
2658 f->close_section();
2659 }
2660 f->close_section();
7c673cae
FG
2661}
2662
2663void pg_stat_t::dump_brief(Formatter *f) const
2664{
2665 f->dump_string("state", pg_state_string(state));
2666 f->open_array_section("up");
2667 for (vector<int32_t>::const_iterator p = up.begin(); p != up.end(); ++p)
2668 f->dump_int("osd", *p);
2669 f->close_section();
2670 f->open_array_section("acting");
2671 for (vector<int32_t>::const_iterator p = acting.begin(); p != acting.end(); ++p)
2672 f->dump_int("osd", *p);
2673 f->close_section();
2674 f->dump_int("up_primary", up_primary);
2675 f->dump_int("acting_primary", acting_primary);
2676}
2677
2678void pg_stat_t::encode(bufferlist &bl) const
2679{
11fdf7f2
TL
2680 ENCODE_START(25, 22, bl);
2681 encode(version, bl);
2682 encode(reported_seq, bl);
2683 encode(reported_epoch, bl);
2684 encode((__u32)state, bl); // for older peers
2685 encode(log_start, bl);
2686 encode(ondisk_log_start, bl);
2687 encode(created, bl);
2688 encode(last_epoch_clean, bl);
2689 encode(parent, bl);
2690 encode(parent_split_bits, bl);
2691 encode(last_scrub, bl);
2692 encode(last_scrub_stamp, bl);
2693 encode(stats, bl);
2694 encode(log_size, bl);
2695 encode(ondisk_log_size, bl);
2696 encode(up, bl);
2697 encode(acting, bl);
2698 encode(last_fresh, bl);
2699 encode(last_change, bl);
2700 encode(last_active, bl);
2701 encode(last_clean, bl);
2702 encode(last_unstale, bl);
2703 encode(mapping_epoch, bl);
2704 encode(last_deep_scrub, bl);
2705 encode(last_deep_scrub_stamp, bl);
2706 encode(stats_invalid, bl);
2707 encode(last_clean_scrub_stamp, bl);
2708 encode(last_became_active, bl);
2709 encode(dirty_stats_invalid, bl);
2710 encode(up_primary, bl);
2711 encode(acting_primary, bl);
2712 encode(omap_stats_invalid, bl);
2713 encode(hitset_stats_invalid, bl);
2714 encode(blocked_by, bl);
2715 encode(last_undegraded, bl);
2716 encode(last_fullsized, bl);
2717 encode(hitset_bytes_stats_invalid, bl);
2718 encode(last_peered, bl);
2719 encode(last_became_peered, bl);
2720 encode(pin_stats_invalid, bl);
2721 encode(snaptrimq_len, bl);
2722 __u32 top_state = (state >> 32);
2723 encode(top_state, bl);
2724 encode(purged_snaps, bl);
2725 encode(manifest_stats_invalid, bl);
7c673cae
FG
2726 ENCODE_FINISH(bl);
2727}
2728
11fdf7f2 2729void pg_stat_t::decode(bufferlist::const_iterator &bl)
7c673cae
FG
2730{
2731 bool tmp;
11fdf7f2
TL
2732 uint32_t old_state;
2733 DECODE_START(25, bl);
2734 decode(version, bl);
2735 decode(reported_seq, bl);
2736 decode(reported_epoch, bl);
2737 decode(old_state, bl);
2738 decode(log_start, bl);
2739 decode(ondisk_log_start, bl);
2740 decode(created, bl);
2741 decode(last_epoch_clean, bl);
2742 decode(parent, bl);
2743 decode(parent_split_bits, bl);
2744 decode(last_scrub, bl);
2745 decode(last_scrub_stamp, bl);
2746 decode(stats, bl);
2747 decode(log_size, bl);
2748 decode(ondisk_log_size, bl);
2749 decode(up, bl);
2750 decode(acting, bl);
2751 decode(last_fresh, bl);
2752 decode(last_change, bl);
2753 decode(last_active, bl);
2754 decode(last_clean, bl);
2755 decode(last_unstale, bl);
2756 decode(mapping_epoch, bl);
2757 decode(last_deep_scrub, bl);
2758 decode(last_deep_scrub_stamp, bl);
2759 decode(tmp, bl);
7c673cae 2760 stats_invalid = tmp;
11fdf7f2
TL
2761 decode(last_clean_scrub_stamp, bl);
2762 decode(last_became_active, bl);
2763 decode(tmp, bl);
7c673cae 2764 dirty_stats_invalid = tmp;
11fdf7f2
TL
2765 decode(up_primary, bl);
2766 decode(acting_primary, bl);
2767 decode(tmp, bl);
7c673cae 2768 omap_stats_invalid = tmp;
11fdf7f2 2769 decode(tmp, bl);
7c673cae 2770 hitset_stats_invalid = tmp;
11fdf7f2
TL
2771 decode(blocked_by, bl);
2772 decode(last_undegraded, bl);
2773 decode(last_fullsized, bl);
2774 decode(tmp, bl);
7c673cae 2775 hitset_bytes_stats_invalid = tmp;
11fdf7f2
TL
2776 decode(last_peered, bl);
2777 decode(last_became_peered, bl);
2778 decode(tmp, bl);
7c673cae 2779 pin_stats_invalid = tmp;
b32b8144 2780 if (struct_v >= 23) {
11fdf7f2
TL
2781 decode(snaptrimq_len, bl);
2782 if (struct_v >= 24) {
2783 __u32 top_state;
2784 decode(top_state, bl);
2785 state = (uint64_t)old_state | ((uint64_t)top_state << 32);
2786 decode(purged_snaps, bl);
2787 } else {
2788 state = old_state;
2789 }
2790 if (struct_v >= 25) {
2791 decode(tmp, bl);
2792 manifest_stats_invalid = tmp;
2793 } else {
2794 manifest_stats_invalid = true;
2795 }
b32b8144 2796 }
7c673cae
FG
2797 DECODE_FINISH(bl);
2798}
2799
2800void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o)
2801{
2802 pg_stat_t a;
2803 o.push_back(new pg_stat_t(a));
2804
2805 a.version = eversion_t(1, 3);
2806 a.reported_epoch = 1;
2807 a.reported_seq = 2;
2808 a.state = 123;
2809 a.mapping_epoch = 998;
2810 a.last_fresh = utime_t(1002, 1);
2811 a.last_change = utime_t(1002, 2);
2812 a.last_active = utime_t(1002, 3);
2813 a.last_clean = utime_t(1002, 4);
2814 a.last_unstale = utime_t(1002, 5);
2815 a.last_undegraded = utime_t(1002, 7);
2816 a.last_fullsized = utime_t(1002, 8);
2817 a.log_start = eversion_t(1, 4);
2818 a.ondisk_log_start = eversion_t(1, 5);
2819 a.created = 6;
2820 a.last_epoch_clean = 7;
11fdf7f2 2821 a.parent = pg_t(1, 2);
7c673cae
FG
2822 a.parent_split_bits = 12;
2823 a.last_scrub = eversion_t(9, 10);
2824 a.last_scrub_stamp = utime_t(11, 12);
2825 a.last_deep_scrub = eversion_t(13, 14);
2826 a.last_deep_scrub_stamp = utime_t(15, 16);
2827 a.last_clean_scrub_stamp = utime_t(17, 18);
b32b8144 2828 a.snaptrimq_len = 1048576;
7c673cae
FG
2829 list<object_stat_collection_t*> l;
2830 object_stat_collection_t::generate_test_instances(l);
2831 a.stats = *l.back();
2832 a.log_size = 99;
2833 a.ondisk_log_size = 88;
2834 a.up.push_back(123);
2835 a.up_primary = 123;
2836 a.acting.push_back(456);
2837 a.acting_primary = 456;
2838 o.push_back(new pg_stat_t(a));
2839
2840 a.up.push_back(124);
2841 a.up_primary = 124;
2842 a.acting.push_back(124);
2843 a.acting_primary = 124;
2844 a.blocked_by.push_back(155);
2845 a.blocked_by.push_back(156);
2846 o.push_back(new pg_stat_t(a));
2847}
2848
2849bool operator==(const pg_stat_t& l, const pg_stat_t& r)
2850{
2851 return
2852 l.version == r.version &&
2853 l.reported_seq == r.reported_seq &&
2854 l.reported_epoch == r.reported_epoch &&
2855 l.state == r.state &&
2856 l.last_fresh == r.last_fresh &&
2857 l.last_change == r.last_change &&
2858 l.last_active == r.last_active &&
2859 l.last_peered == r.last_peered &&
2860 l.last_clean == r.last_clean &&
2861 l.last_unstale == r.last_unstale &&
2862 l.last_undegraded == r.last_undegraded &&
2863 l.last_fullsized == r.last_fullsized &&
2864 l.log_start == r.log_start &&
2865 l.ondisk_log_start == r.ondisk_log_start &&
2866 l.created == r.created &&
2867 l.last_epoch_clean == r.last_epoch_clean &&
2868 l.parent == r.parent &&
2869 l.parent_split_bits == r.parent_split_bits &&
2870 l.last_scrub == r.last_scrub &&
2871 l.last_deep_scrub == r.last_deep_scrub &&
2872 l.last_scrub_stamp == r.last_scrub_stamp &&
2873 l.last_deep_scrub_stamp == r.last_deep_scrub_stamp &&
2874 l.last_clean_scrub_stamp == r.last_clean_scrub_stamp &&
2875 l.stats == r.stats &&
2876 l.stats_invalid == r.stats_invalid &&
2877 l.log_size == r.log_size &&
2878 l.ondisk_log_size == r.ondisk_log_size &&
2879 l.up == r.up &&
2880 l.acting == r.acting &&
2881 l.mapping_epoch == r.mapping_epoch &&
2882 l.blocked_by == r.blocked_by &&
2883 l.last_became_active == r.last_became_active &&
2884 l.last_became_peered == r.last_became_peered &&
2885 l.dirty_stats_invalid == r.dirty_stats_invalid &&
2886 l.omap_stats_invalid == r.omap_stats_invalid &&
2887 l.hitset_stats_invalid == r.hitset_stats_invalid &&
2888 l.hitset_bytes_stats_invalid == r.hitset_bytes_stats_invalid &&
2889 l.up_primary == r.up_primary &&
2890 l.acting_primary == r.acting_primary &&
b32b8144 2891 l.pin_stats_invalid == r.pin_stats_invalid &&
11fdf7f2
TL
2892 l.manifest_stats_invalid == r.manifest_stats_invalid &&
2893 l.purged_snaps == r.purged_snaps &&
b32b8144 2894 l.snaptrimq_len == r.snaptrimq_len;
7c673cae
FG
2895}
2896
11fdf7f2
TL
2897// -- store_statfs_t --
2898
2899bool store_statfs_t::operator==(const store_statfs_t& other) const
2900{
2901 return total == other.total
2902 && available == other.available
2903 && allocated == other.allocated
2904 && internally_reserved == other.internally_reserved
2905 && data_stored == other.data_stored
2906 && data_compressed == other.data_compressed
2907 && data_compressed_allocated == other.data_compressed_allocated
2908 && data_compressed_original == other.data_compressed_original
2909 && omap_allocated == other.omap_allocated
2910 && internal_metadata == other.internal_metadata;
2911}
2912
2913void store_statfs_t::dump(Formatter *f) const
2914{
2915 f->dump_int("total", total);
2916 f->dump_int("available", available);
2917 f->dump_int("internally_reserved", internally_reserved);
2918 f->dump_int("allocated", allocated);
2919 f->dump_int("data_stored", data_stored);
2920 f->dump_int("data_compressed", data_compressed);
2921 f->dump_int("data_compressed_allocated", data_compressed_allocated);
2922 f->dump_int("data_compressed_original", data_compressed_original);
2923 f->dump_int("omap_allocated", omap_allocated);
2924 f->dump_int("internal_metadata", internal_metadata);
2925}
2926
2927ostream& operator<<(ostream& out, const store_statfs_t &s)
2928{
2929 out << std::hex
2930 << "store_statfs(0x" << s.available
2931 << "/0x" << s.internally_reserved
2932 << "/0x" << s.total
2933 << ", data 0x" << s.data_stored
2934 << "/0x" << s.allocated
2935 << ", compress 0x" << s.data_compressed
2936 << "/0x" << s.data_compressed_allocated
2937 << "/0x" << s.data_compressed_original
2938 << ", omap 0x" << s.omap_allocated
2939 << ", meta 0x" << s.internal_metadata
2940 << std::dec
2941 << ")";
2942 return out;
2943}
2944
2945void store_statfs_t::generate_test_instances(list<store_statfs_t*>& o)
2946{
2947 store_statfs_t a;
2948 o.push_back(new store_statfs_t(a));
2949 a.total = 234;
2950 a.available = 123;
2951 a.internally_reserved = 33;
2952 a.allocated = 32;
2953 a.data_stored = 44;
2954 a.data_compressed = 21;
2955 a.data_compressed_allocated = 12;
2956 a.data_compressed_original = 13;
2957 a.omap_allocated = 14;
2958 a.internal_metadata = 15;
2959 o.push_back(new store_statfs_t(a));
2960}
2961
7c673cae
FG
2962// -- pool_stat_t --
2963
2964void pool_stat_t::dump(Formatter *f) const
2965{
2966 stats.dump(f);
11fdf7f2
TL
2967 f->open_object_section("store_stats");
2968 store_stats.dump(f);
2969 f->close_section();
7c673cae
FG
2970 f->dump_int("log_size", log_size);
2971 f->dump_int("ondisk_log_size", ondisk_log_size);
2972 f->dump_int("up", up);
2973 f->dump_int("acting", acting);
11fdf7f2 2974 f->dump_int("num_store_stats", acting);
7c673cae
FG
2975}
2976
2977void pool_stat_t::encode(bufferlist &bl, uint64_t features) const
2978{
11fdf7f2 2979 using ceph::encode;
7c673cae
FG
2980 if ((features & CEPH_FEATURE_OSDENC) == 0) {
2981 __u8 v = 4;
11fdf7f2
TL
2982 encode(v, bl);
2983 encode(stats, bl);
2984 encode(log_size, bl);
2985 encode(ondisk_log_size, bl);
7c673cae
FG
2986 return;
2987 }
2988
11fdf7f2
TL
2989 ENCODE_START(7, 5, bl);
2990 encode(stats, bl);
2991 encode(log_size, bl);
2992 encode(ondisk_log_size, bl);
2993 encode(up, bl);
2994 encode(acting, bl);
2995 encode(store_stats, bl);
2996 encode(num_store_stats, bl);
7c673cae
FG
2997 ENCODE_FINISH(bl);
2998}
2999
11fdf7f2 3000void pool_stat_t::decode(bufferlist::const_iterator &bl)
7c673cae 3001{
11fdf7f2 3002 DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl);
7c673cae 3003 if (struct_v >= 4) {
11fdf7f2
TL
3004 decode(stats, bl);
3005 decode(log_size, bl);
3006 decode(ondisk_log_size, bl);
7c673cae 3007 if (struct_v >= 6) {
11fdf7f2
TL
3008 decode(up, bl);
3009 decode(acting, bl);
7c673cae
FG
3010 } else {
3011 up = 0;
3012 acting = 0;
3013 }
11fdf7f2
TL
3014 if (struct_v >= 7) {
3015 decode(store_stats, bl);
3016 decode(num_store_stats, bl);
3017 } else {
3018 store_stats.reset();
3019 num_store_stats = 0;
3020 }
3021
7c673cae 3022 } else {
11fdf7f2 3023 decode(stats.sum.num_bytes, bl);
7c673cae 3024 uint64_t num_kb;
11fdf7f2
TL
3025 decode(num_kb, bl);
3026 decode(stats.sum.num_objects, bl);
3027 decode(stats.sum.num_object_clones, bl);
3028 decode(stats.sum.num_object_copies, bl);
3029 decode(stats.sum.num_objects_missing_on_primary, bl);
3030 decode(stats.sum.num_objects_degraded, bl);
3031 decode(log_size, bl);
3032 decode(ondisk_log_size, bl);
7c673cae 3033 if (struct_v >= 2) {
11fdf7f2
TL
3034 decode(stats.sum.num_rd, bl);
3035 decode(stats.sum.num_rd_kb, bl);
3036 decode(stats.sum.num_wr, bl);
3037 decode(stats.sum.num_wr_kb, bl);
7c673cae
FG
3038 }
3039 if (struct_v >= 3) {
11fdf7f2 3040 decode(stats.sum.num_objects_unfound, bl);
7c673cae
FG
3041 }
3042 }
3043 DECODE_FINISH(bl);
3044}
3045
3046void pool_stat_t::generate_test_instances(list<pool_stat_t*>& o)
3047{
3048 pool_stat_t a;
3049 o.push_back(new pool_stat_t(a));
3050
3051 list<object_stat_collection_t*> l;
3052 object_stat_collection_t::generate_test_instances(l);
11fdf7f2
TL
3053 list<store_statfs_t*> ll;
3054 store_statfs_t::generate_test_instances(ll);
7c673cae 3055 a.stats = *l.back();
11fdf7f2 3056 a.store_stats = *ll.back();
7c673cae
FG
3057 a.log_size = 123;
3058 a.ondisk_log_size = 456;
3059 a.acting = 3;
3060 a.up = 4;
11fdf7f2 3061 a.num_store_stats = 1;
7c673cae
FG
3062 o.push_back(new pool_stat_t(a));
3063}
3064
3065
3066// -- pg_history_t --
3067
3068void pg_history_t::encode(bufferlist &bl) const
3069{
31f18b77 3070 ENCODE_START(9, 4, bl);
11fdf7f2
TL
3071 encode(epoch_created, bl);
3072 encode(last_epoch_started, bl);
3073 encode(last_epoch_clean, bl);
3074 encode(last_epoch_split, bl);
3075 encode(same_interval_since, bl);
3076 encode(same_up_since, bl);
3077 encode(same_primary_since, bl);
3078 encode(last_scrub, bl);
3079 encode(last_scrub_stamp, bl);
3080 encode(last_deep_scrub, bl);
3081 encode(last_deep_scrub_stamp, bl);
3082 encode(last_clean_scrub_stamp, bl);
3083 encode(last_epoch_marked_full, bl);
3084 encode(last_interval_started, bl);
3085 encode(last_interval_clean, bl);
3086 encode(epoch_pool_created, bl);
7c673cae
FG
3087 ENCODE_FINISH(bl);
3088}
3089
11fdf7f2 3090void pg_history_t::decode(bufferlist::const_iterator &bl)
7c673cae 3091{
31f18b77 3092 DECODE_START_LEGACY_COMPAT_LEN(9, 4, 4, bl);
11fdf7f2
TL
3093 decode(epoch_created, bl);
3094 decode(last_epoch_started, bl);
7c673cae 3095 if (struct_v >= 3)
11fdf7f2 3096 decode(last_epoch_clean, bl);
7c673cae
FG
3097 else
3098 last_epoch_clean = last_epoch_started; // careful, it's a lie!
11fdf7f2
TL
3099 decode(last_epoch_split, bl);
3100 decode(same_interval_since, bl);
3101 decode(same_up_since, bl);
3102 decode(same_primary_since, bl);
7c673cae 3103 if (struct_v >= 2) {
11fdf7f2
TL
3104 decode(last_scrub, bl);
3105 decode(last_scrub_stamp, bl);
7c673cae
FG
3106 }
3107 if (struct_v >= 5) {
11fdf7f2
TL
3108 decode(last_deep_scrub, bl);
3109 decode(last_deep_scrub_stamp, bl);
7c673cae
FG
3110 }
3111 if (struct_v >= 6) {
11fdf7f2 3112 decode(last_clean_scrub_stamp, bl);
7c673cae
FG
3113 }
3114 if (struct_v >= 7) {
11fdf7f2 3115 decode(last_epoch_marked_full, bl);
7c673cae
FG
3116 }
3117 if (struct_v >= 8) {
11fdf7f2
TL
3118 decode(last_interval_started, bl);
3119 decode(last_interval_clean, bl);
7c673cae
FG
3120 } else {
3121 if (last_epoch_started >= same_interval_since) {
3122 last_interval_started = same_interval_since;
3123 } else {
3124 last_interval_started = last_epoch_started; // best guess
3125 }
3126 if (last_epoch_clean >= same_interval_since) {
3127 last_interval_clean = same_interval_since;
3128 } else {
3129 last_interval_clean = last_epoch_clean; // best guess
3130 }
3131 }
31f18b77 3132 if (struct_v >= 9) {
11fdf7f2 3133 decode(epoch_pool_created, bl);
31f18b77
FG
3134 } else {
3135 epoch_pool_created = epoch_created;
3136 }
7c673cae
FG
3137 DECODE_FINISH(bl);
3138}
3139
3140void pg_history_t::dump(Formatter *f) const
3141{
3142 f->dump_int("epoch_created", epoch_created);
31f18b77 3143 f->dump_int("epoch_pool_created", epoch_pool_created);
7c673cae
FG
3144 f->dump_int("last_epoch_started", last_epoch_started);
3145 f->dump_int("last_interval_started", last_interval_started);
3146 f->dump_int("last_epoch_clean", last_epoch_clean);
3147 f->dump_int("last_interval_clean", last_interval_clean);
3148 f->dump_int("last_epoch_split", last_epoch_split);
3149 f->dump_int("last_epoch_marked_full", last_epoch_marked_full);
3150 f->dump_int("same_up_since", same_up_since);
3151 f->dump_int("same_interval_since", same_interval_since);
3152 f->dump_int("same_primary_since", same_primary_since);
3153 f->dump_stream("last_scrub") << last_scrub;
3154 f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
3155 f->dump_stream("last_deep_scrub") << last_deep_scrub;
3156 f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
3157 f->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp;
3158}
3159
3160void pg_history_t::generate_test_instances(list<pg_history_t*>& o)
3161{
3162 o.push_back(new pg_history_t);
3163 o.push_back(new pg_history_t);
3164 o.back()->epoch_created = 1;
31f18b77 3165 o.back()->epoch_pool_created = 1;
7c673cae
FG
3166 o.back()->last_epoch_started = 2;
3167 o.back()->last_interval_started = 2;
3168 o.back()->last_epoch_clean = 3;
3169 o.back()->last_interval_clean = 2;
3170 o.back()->last_epoch_split = 4;
3171 o.back()->same_up_since = 5;
3172 o.back()->same_interval_since = 6;
3173 o.back()->same_primary_since = 7;
3174 o.back()->last_scrub = eversion_t(8, 9);
3175 o.back()->last_scrub_stamp = utime_t(10, 11);
3176 o.back()->last_deep_scrub = eversion_t(12, 13);
3177 o.back()->last_deep_scrub_stamp = utime_t(14, 15);
3178 o.back()->last_clean_scrub_stamp = utime_t(16, 17);
3179 o.back()->last_epoch_marked_full = 18;
3180}
3181
3182
3183// -- pg_info_t --
3184
3185void pg_info_t::encode(bufferlist &bl) const
3186{
3187 ENCODE_START(32, 26, bl);
11fdf7f2
TL
3188 encode(pgid.pgid, bl);
3189 encode(last_update, bl);
3190 encode(last_complete, bl);
3191 encode(log_tail, bl);
7c673cae 3192 if (last_backfill_bitwise && !last_backfill.is_max()) {
11fdf7f2 3193 encode(hobject_t(), bl);
7c673cae 3194 } else {
11fdf7f2 3195 encode(last_backfill, bl);
7c673cae 3196 }
11fdf7f2 3197 encode(stats, bl);
7c673cae 3198 history.encode(bl);
11fdf7f2
TL
3199 encode(purged_snaps, bl);
3200 encode(last_epoch_started, bl);
3201 encode(last_user_version, bl);
3202 encode(hit_set, bl);
3203 encode(pgid.shard, bl);
3204 encode(last_backfill, bl);
3205 encode(last_backfill_bitwise, bl);
3206 encode(last_interval_started, bl);
7c673cae
FG
3207 ENCODE_FINISH(bl);
3208}
3209
11fdf7f2 3210void pg_info_t::decode(bufferlist::const_iterator &bl)
7c673cae
FG
3211{
3212 DECODE_START(32, bl);
11fdf7f2
TL
3213 decode(pgid.pgid, bl);
3214 decode(last_update, bl);
3215 decode(last_complete, bl);
3216 decode(log_tail, bl);
7c673cae
FG
3217 {
3218 hobject_t old_last_backfill;
11fdf7f2 3219 decode(old_last_backfill, bl);
7c673cae 3220 }
11fdf7f2 3221 decode(stats, bl);
7c673cae 3222 history.decode(bl);
11fdf7f2
TL
3223 decode(purged_snaps, bl);
3224 decode(last_epoch_started, bl);
3225 decode(last_user_version, bl);
3226 decode(hit_set, bl);
3227 decode(pgid.shard, bl);
3228 decode(last_backfill, bl);
3229 decode(last_backfill_bitwise, bl);
7c673cae 3230 if (struct_v >= 32) {
11fdf7f2 3231 decode(last_interval_started, bl);
7c673cae
FG
3232 } else {
3233 last_interval_started = last_epoch_started;
3234 }
3235 DECODE_FINISH(bl);
3236}
3237
3238// -- pg_info_t --
3239
3240void pg_info_t::dump(Formatter *f) const
3241{
3242 f->dump_stream("pgid") << pgid;
3243 f->dump_stream("last_update") << last_update;
3244 f->dump_stream("last_complete") << last_complete;
3245 f->dump_stream("log_tail") << log_tail;
3246 f->dump_int("last_user_version", last_user_version);
3247 f->dump_stream("last_backfill") << last_backfill;
3248 f->dump_int("last_backfill_bitwise", (int)last_backfill_bitwise);
3249 f->open_array_section("purged_snaps");
3250 for (interval_set<snapid_t>::const_iterator i=purged_snaps.begin();
3251 i != purged_snaps.end();
3252 ++i) {
3253 f->open_object_section("purged_snap_interval");
3254 f->dump_stream("start") << i.get_start();
3255 f->dump_stream("length") << i.get_len();
3256 f->close_section();
3257 }
3258 f->close_section();
3259 f->open_object_section("history");
3260 history.dump(f);
3261 f->close_section();
3262 f->open_object_section("stats");
3263 stats.dump(f);
3264 f->close_section();
3265
3266 f->dump_int("empty", is_empty());
3267 f->dump_int("dne", dne());
3268 f->dump_int("incomplete", is_incomplete());
3269 f->dump_int("last_epoch_started", last_epoch_started);
3270
3271 f->open_object_section("hit_set_history");
3272 hit_set.dump(f);
3273 f->close_section();
3274}
3275
3276void pg_info_t::generate_test_instances(list<pg_info_t*>& o)
3277{
3278 o.push_back(new pg_info_t);
3279 o.push_back(new pg_info_t);
3280 list<pg_history_t*> h;
3281 pg_history_t::generate_test_instances(h);
3282 o.back()->history = *h.back();
11fdf7f2 3283 o.back()->pgid = spg_t(pg_t(1, 2), shard_id_t::NO_SHARD);
7c673cae
FG
3284 o.back()->last_update = eversion_t(3, 4);
3285 o.back()->last_complete = eversion_t(5, 6);
3286 o.back()->last_user_version = 2;
3287 o.back()->log_tail = eversion_t(7, 8);
3288 o.back()->last_backfill = hobject_t(object_t("objname"), "key", 123, 456, -1, "");
3289 o.back()->last_backfill_bitwise = true;
3290 {
3291 list<pg_stat_t*> s;
3292 pg_stat_t::generate_test_instances(s);
3293 o.back()->stats = *s.back();
3294 }
3295 {
3296 list<pg_hit_set_history_t*> s;
3297 pg_hit_set_history_t::generate_test_instances(s);
3298 o.back()->hit_set = *s.back();
3299 }
3300}
3301
3302// -- pg_notify_t --
3303void pg_notify_t::encode(bufferlist &bl) const
3304{
3305 ENCODE_START(2, 2, bl);
11fdf7f2
TL
3306 encode(query_epoch, bl);
3307 encode(epoch_sent, bl);
3308 encode(info, bl);
3309 encode(to, bl);
3310 encode(from, bl);
7c673cae
FG
3311 ENCODE_FINISH(bl);
3312}
3313
11fdf7f2 3314void pg_notify_t::decode(bufferlist::const_iterator &bl)
7c673cae
FG
3315{
3316 DECODE_START(2, bl);
11fdf7f2
TL
3317 decode(query_epoch, bl);
3318 decode(epoch_sent, bl);
3319 decode(info, bl);
3320 decode(to, bl);
3321 decode(from, bl);
7c673cae
FG
3322 DECODE_FINISH(bl);
3323}
3324
3325void pg_notify_t::dump(Formatter *f) const
3326{
3327 f->dump_int("from", from);
3328 f->dump_int("to", to);
3329 f->dump_unsigned("query_epoch", query_epoch);
3330 f->dump_unsigned("epoch_sent", epoch_sent);
3331 {
3332 f->open_object_section("info");
3333 info.dump(f);
3334 f->close_section();
3335 }
3336}
3337
3338void pg_notify_t::generate_test_instances(list<pg_notify_t*>& o)
3339{
3340 o.push_back(new pg_notify_t(shard_id_t(3), shard_id_t::NO_SHARD, 1, 1, pg_info_t()));
3341 o.push_back(new pg_notify_t(shard_id_t(0), shard_id_t(0), 3, 10, pg_info_t()));
3342}
3343
3344ostream &operator<<(ostream &lhs, const pg_notify_t &notify)
3345{
3346 lhs << "(query:" << notify.query_epoch
3347 << " sent:" << notify.epoch_sent
3348 << " " << notify.info;
3349 if (notify.from != shard_id_t::NO_SHARD ||
3350 notify.to != shard_id_t::NO_SHARD)
3351 lhs << " " << (unsigned)notify.from
3352 << "->" << (unsigned)notify.to;
3353 return lhs << ")";
3354}
3355
3356// -- pg_interval_t --
3357
3358void PastIntervals::pg_interval_t::encode(bufferlist& bl) const
3359{
3360 ENCODE_START(4, 2, bl);
11fdf7f2
TL
3361 encode(first, bl);
3362 encode(last, bl);
3363 encode(up, bl);
3364 encode(acting, bl);
3365 encode(maybe_went_rw, bl);
3366 encode(primary, bl);
3367 encode(up_primary, bl);
7c673cae
FG
3368 ENCODE_FINISH(bl);
3369}
3370
11fdf7f2 3371void PastIntervals::pg_interval_t::decode(bufferlist::const_iterator& bl)
7c673cae
FG
3372{
3373 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
11fdf7f2
TL
3374 decode(first, bl);
3375 decode(last, bl);
3376 decode(up, bl);
3377 decode(acting, bl);
3378 decode(maybe_went_rw, bl);
7c673cae 3379 if (struct_v >= 3) {
11fdf7f2 3380 decode(primary, bl);
7c673cae
FG
3381 } else {
3382 if (acting.size())
3383 primary = acting[0];
3384 }
3385 if (struct_v >= 4) {
11fdf7f2 3386 decode(up_primary, bl);
7c673cae
FG
3387 } else {
3388 if (up.size())
3389 up_primary = up[0];
3390 }
3391 DECODE_FINISH(bl);
3392}
3393
3394void PastIntervals::pg_interval_t::dump(Formatter *f) const
3395{
3396 f->dump_unsigned("first", first);
3397 f->dump_unsigned("last", last);
3398 f->dump_int("maybe_went_rw", maybe_went_rw ? 1 : 0);
3399 f->open_array_section("up");
3400 for (vector<int>::const_iterator p = up.begin(); p != up.end(); ++p)
3401 f->dump_int("osd", *p);
3402 f->close_section();
3403 f->open_array_section("acting");
3404 for (vector<int>::const_iterator p = acting.begin(); p != acting.end(); ++p)
3405 f->dump_int("osd", *p);
3406 f->close_section();
3407 f->dump_int("primary", primary);
3408 f->dump_int("up_primary", up_primary);
3409}
3410
3411void PastIntervals::pg_interval_t::generate_test_instances(list<pg_interval_t*>& o)
3412{
3413 o.push_back(new pg_interval_t);
3414 o.push_back(new pg_interval_t);
3415 o.back()->up.push_back(1);
3416 o.back()->acting.push_back(2);
3417 o.back()->acting.push_back(3);
3418 o.back()->first = 4;
3419 o.back()->last = 5;
3420 o.back()->maybe_went_rw = true;
3421}
3422
3423WRITE_CLASS_ENCODER(PastIntervals::pg_interval_t)
3424
7c673cae
FG
3425
3426/**
3427 * pi_compact_rep
3428 *
3429 * PastIntervals only needs to be able to answer two questions:
3430 * 1) Where should the primary look for unfound objects?
3431 * 2) List a set of subsets of the OSDs such that contacting at least
11fdf7f2 3432 * one from each subset guarantees we speak to at least one witness
7c673cae
FG
3433 * of any completed write.
3434 *
3435 * Crucially, 2) does not require keeping *all* past intervals. Certainly,
3436 * we don't need to keep any where maybe_went_rw would be false. We also
3437 * needn't keep two intervals where the actingset in one is a subset
3438 * of the other (only need to keep the smaller of the two sets). In order
3439 * to accurately trim the set of intervals as last_epoch_started changes
3440 * without rebuilding the set from scratch, we'll retain the larger set
3441 * if it in an older interval.
3442 */
3443struct compact_interval_t {
3444 epoch_t first;
3445 epoch_t last;
3446 set<pg_shard_t> acting;
3447 bool supersedes(const compact_interval_t &other) {
3448 for (auto &&i: acting) {
3449 if (!other.acting.count(i))
3450 return false;
3451 }
3452 return true;
3453 }
3454 void dump(Formatter *f) const {
3455 f->open_object_section("compact_interval_t");
3456 f->dump_stream("first") << first;
3457 f->dump_stream("last") << last;
3458 f->dump_stream("acting") << acting;
3459 f->close_section();
3460 }
3461 void encode(bufferlist &bl) const {
3462 ENCODE_START(1, 1, bl);
11fdf7f2
TL
3463 encode(first, bl);
3464 encode(last, bl);
3465 encode(acting, bl);
7c673cae
FG
3466 ENCODE_FINISH(bl);
3467 }
11fdf7f2 3468 void decode(bufferlist::const_iterator &bl) {
7c673cae 3469 DECODE_START(1, bl);
11fdf7f2
TL
3470 decode(first, bl);
3471 decode(last, bl);
3472 decode(acting, bl);
7c673cae
FG
3473 DECODE_FINISH(bl);
3474 }
3475 static void generate_test_instances(list<compact_interval_t*> & o) {
3476 /* Not going to be used, we'll generate pi_compact_rep directly */
3477 }
3478};
3479ostream &operator<<(ostream &o, const compact_interval_t &rhs)
3480{
3481 return o << "([" << rhs.first << "," << rhs.last
3482 << "] acting " << rhs.acting << ")";
3483}
3484WRITE_CLASS_ENCODER(compact_interval_t)
3485
3486class pi_compact_rep : public PastIntervals::interval_rep {
3487 epoch_t first = 0;
3488 epoch_t last = 0; // inclusive
3489 set<pg_shard_t> all_participants;
3490 list<compact_interval_t> intervals;
3491 pi_compact_rep(
3492 bool ec_pool,
3493 std::list<PastIntervals::pg_interval_t> &&intervals) {
3494 for (auto &&i: intervals)
3495 add_interval(ec_pool, i);
3496 }
3497public:
3498 pi_compact_rep() = default;
3499 pi_compact_rep(const pi_compact_rep &) = default;
3500 pi_compact_rep(pi_compact_rep &&) = default;
3501 pi_compact_rep &operator=(const pi_compact_rep &) = default;
3502 pi_compact_rep &operator=(pi_compact_rep &&) = default;
3503
3504 size_t size() const override { return intervals.size(); }
3505 bool empty() const override {
3506 return first > last || (first == 0 && last == 0);
3507 }
3508 void clear() override {
3509 *this = pi_compact_rep();
3510 }
3511 pair<epoch_t, epoch_t> get_bounds() const override {
3512 return make_pair(first, last + 1);
3513 }
11fdf7f2
TL
3514 void adjust_start_backwards(epoch_t last_epoch_clean) {
3515 first = last_epoch_clean;
3516 }
3517
7c673cae
FG
3518 set<pg_shard_t> get_all_participants(
3519 bool ec_pool) const override {
3520 return all_participants;
3521 }
3522 void add_interval(
3523 bool ec_pool, const PastIntervals::pg_interval_t &interval) override {
3524 if (first == 0)
3525 first = interval.first;
11fdf7f2 3526 ceph_assert(interval.last > last);
7c673cae
FG
3527 last = interval.last;
3528 set<pg_shard_t> acting;
3529 for (unsigned i = 0; i < interval.acting.size(); ++i) {
3530 if (interval.acting[i] == CRUSH_ITEM_NONE)
3531 continue;
3532 acting.insert(
3533 pg_shard_t(
3534 interval.acting[i],
3535 ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD));
3536 }
3537 all_participants.insert(acting.begin(), acting.end());
3538 if (!interval.maybe_went_rw)
3539 return;
3540 intervals.push_back(
3541 compact_interval_t{interval.first, interval.last, acting});
3542 auto plast = intervals.end();
3543 --plast;
3544 for (auto cur = intervals.begin(); cur != plast; ) {
3545 if (plast->supersedes(*cur)) {
3546 intervals.erase(cur++);
3547 } else {
3548 ++cur;
3549 }
3550 }
3551 }
3552 unique_ptr<PastIntervals::interval_rep> clone() const override {
3553 return unique_ptr<PastIntervals::interval_rep>(new pi_compact_rep(*this));
3554 }
3555 ostream &print(ostream &out) const override {
3556 return out << "([" << first << "," << last
3557 << "] intervals=" << intervals << ")";
3558 }
3559 void encode(bufferlist &bl) const override {
3560 ENCODE_START(1, 1, bl);
11fdf7f2
TL
3561 encode(first, bl);
3562 encode(last, bl);
3563 encode(all_participants, bl);
3564 encode(intervals, bl);
7c673cae
FG
3565 ENCODE_FINISH(bl);
3566 }
11fdf7f2 3567 void decode(bufferlist::const_iterator &bl) override {
7c673cae 3568 DECODE_START(1, bl);
11fdf7f2
TL
3569 decode(first, bl);
3570 decode(last, bl);
3571 decode(all_participants, bl);
3572 decode(intervals, bl);
7c673cae
FG
3573 DECODE_FINISH(bl);
3574 }
3575 void dump(Formatter *f) const override {
3576 f->open_object_section("PastIntervals::compact_rep");
3577 f->dump_stream("first") << first;
3578 f->dump_stream("last") << last;
3579 f->open_array_section("all_participants");
3580 for (auto& i : all_participants) {
3581 f->dump_object("pg_shard", i);
3582 }
3583 f->close_section();
3584 f->open_array_section("intervals");
3585 for (auto &&i: intervals) {
3586 i.dump(f);
3587 }
3588 f->close_section();
3589 f->close_section();
3590 }
7c673cae
FG
3591 static void generate_test_instances(list<pi_compact_rep*> &o) {
3592 using ival = PastIntervals::pg_interval_t;
3593 using ivallst = std::list<ival>;
3594 o.push_back(
3595 new pi_compact_rep(
3596 true, ivallst
3597 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3598 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3599 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3600 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3601 }));
3602 o.push_back(
3603 new pi_compact_rep(
3604 false, ivallst
3605 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3606 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3607 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3608 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3609 }));
3610 o.push_back(
3611 new pi_compact_rep(
3612 true, ivallst
3613 { ival{{2, 1, 0}, {2, 1, 0}, 10, 20, true, 1, 1}
3614 , ival{{ 0, 2}, { 0, 2}, 21, 30, true, 0, 0}
3615 , ival{{ 0, 2}, {2, 0}, 31, 35, true, 2, 2}
3616 , ival{{ 0, 2}, { 0, 2}, 36, 50, true, 0, 0}
3617 }));
3618 }
3619 void iterate_mayberw_back_to(
7c673cae
FG
3620 epoch_t les,
3621 std::function<void(epoch_t, const set<pg_shard_t> &)> &&f) const override {
3622 for (auto i = intervals.rbegin(); i != intervals.rend(); ++i) {
3623 if (i->last < les)
3624 break;
3625 f(i->first, i->acting);
3626 }
3627 }
3628 virtual ~pi_compact_rep() override {}
3629};
3630WRITE_CLASS_ENCODER(pi_compact_rep)
3631
11fdf7f2
TL
3632PastIntervals::PastIntervals()
3633{
3634 past_intervals.reset(new pi_compact_rep);
3635}
3636
7c673cae
FG
3637PastIntervals::PastIntervals(const PastIntervals &rhs)
3638 : past_intervals(rhs.past_intervals ?
3639 rhs.past_intervals->clone() :
3640 nullptr) {}
3641
3642PastIntervals &PastIntervals::operator=(const PastIntervals &rhs)
3643{
3644 PastIntervals other(rhs);
31f18b77 3645 swap(other);
7c673cae
FG
3646 return *this;
3647}
3648
3649ostream& operator<<(ostream& out, const PastIntervals &i)
3650{
3651 if (i.past_intervals) {
3652 return i.past_intervals->print(out);
3653 } else {
3654 return out << "(empty)";
3655 }
3656}
3657
3658ostream& operator<<(ostream& out, const PastIntervals::PriorSet &i)
3659{
3660 return out << "PriorSet("
3661 << "ec_pool: " << i.ec_pool
3662 << ", probe: " << i.probe
3663 << ", down: " << i.down
3664 << ", blocked_by: " << i.blocked_by
3665 << ", pg_down: " << i.pg_down
3666 << ")";
3667}
3668
11fdf7f2 3669void PastIntervals::decode(bufferlist::const_iterator &bl)
7c673cae
FG
3670{
3671 DECODE_START(1, bl);
3672 __u8 type = 0;
11fdf7f2 3673 decode(type, bl);
7c673cae
FG
3674 switch (type) {
3675 case 0:
3676 break;
3677 case 1:
11fdf7f2 3678 ceph_abort_msg("pi_simple_rep support removed post-luminous");
7c673cae
FG
3679 break;
3680 case 2:
3681 past_intervals.reset(new pi_compact_rep);
3682 past_intervals->decode(bl);
3683 break;
3684 }
3685 DECODE_FINISH(bl);
3686}
3687
7c673cae
FG
3688void PastIntervals::generate_test_instances(list<PastIntervals*> &o)
3689{
7c673cae
FG
3690 {
3691 list<pi_compact_rep *> compact;
3692 pi_compact_rep::generate_test_instances(compact);
3693 for (auto &&i: compact) {
3694 // takes ownership of contents
3695 o.push_back(new PastIntervals(i));
3696 }
3697 }
3698 return;
3699}
3700
7c673cae
FG
3701bool PastIntervals::is_new_interval(
3702 int old_acting_primary,
3703 int new_acting_primary,
3704 const vector<int> &old_acting,
3705 const vector<int> &new_acting,
3706 int old_up_primary,
3707 int new_up_primary,
3708 const vector<int> &old_up,
3709 const vector<int> &new_up,
3710 int old_size,
3711 int new_size,
3712 int old_min_size,
3713 int new_min_size,
3714 unsigned old_pg_num,
3715 unsigned new_pg_num,
11fdf7f2
TL
3716 unsigned old_pg_num_pending,
3717 unsigned new_pg_num_pending,
7c673cae
FG
3718 bool old_sort_bitwise,
3719 bool new_sort_bitwise,
c07f9fc5
FG
3720 bool old_recovery_deletes,
3721 bool new_recovery_deletes,
7c673cae
FG
3722 pg_t pgid) {
3723 return old_acting_primary != new_acting_primary ||
3724 new_acting != old_acting ||
3725 old_up_primary != new_up_primary ||
3726 new_up != old_up ||
3727 old_min_size != new_min_size ||
3728 old_size != new_size ||
3729 pgid.is_split(old_pg_num, new_pg_num, 0) ||
11fdf7f2
TL
3730 // (is or was) pre-merge source
3731 pgid.is_merge_source(old_pg_num_pending, new_pg_num_pending, 0) ||
3732 pgid.is_merge_source(new_pg_num_pending, old_pg_num_pending, 0) ||
3733 // merge source
3734 pgid.is_merge_source(old_pg_num, new_pg_num, 0) ||
3735 // (is or was) pre-merge target
3736 pgid.is_merge_target(old_pg_num_pending, new_pg_num_pending) ||
3737 pgid.is_merge_target(new_pg_num_pending, old_pg_num_pending) ||
3738 // merge target
3739 pgid.is_merge_target(old_pg_num, new_pg_num) ||
c07f9fc5
FG
3740 old_sort_bitwise != new_sort_bitwise ||
3741 old_recovery_deletes != new_recovery_deletes;
7c673cae
FG
3742}
3743
3744bool PastIntervals::is_new_interval(
3745 int old_acting_primary,
3746 int new_acting_primary,
3747 const vector<int> &old_acting,
3748 const vector<int> &new_acting,
3749 int old_up_primary,
3750 int new_up_primary,
3751 const vector<int> &old_up,
3752 const vector<int> &new_up,
3753 OSDMapRef osdmap,
3754 OSDMapRef lastmap,
11fdf7f2
TL
3755 pg_t pgid)
3756{
3757 const pg_pool_t *plast = lastmap->get_pg_pool(pgid.pool());
3758 if (!plast) {
3759 return false; // after pool is deleted there are no more interval changes
3760 }
3761 const pg_pool_t *pi = osdmap->get_pg_pool(pgid.pool());
3762 if (!pi) {
3763 return true; // pool was deleted this epoch -> (final!) interval change
3764 }
3765 return
7c673cae
FG
3766 is_new_interval(old_acting_primary,
3767 new_acting_primary,
3768 old_acting,
3769 new_acting,
3770 old_up_primary,
3771 new_up_primary,
3772 old_up,
3773 new_up,
11fdf7f2
TL
3774 plast->size,
3775 pi->size,
3776 plast->min_size,
3777 pi->min_size,
3778 plast->get_pg_num(),
3779 pi->get_pg_num(),
3780 plast->get_pg_num_pending(),
3781 pi->get_pg_num_pending(),
7c673cae
FG
3782 lastmap->test_flag(CEPH_OSDMAP_SORTBITWISE),
3783 osdmap->test_flag(CEPH_OSDMAP_SORTBITWISE),
c07f9fc5
FG
3784 lastmap->test_flag(CEPH_OSDMAP_RECOVERY_DELETES),
3785 osdmap->test_flag(CEPH_OSDMAP_RECOVERY_DELETES),
7c673cae
FG
3786 pgid);
3787}
3788
3789bool PastIntervals::check_new_interval(
3790 int old_acting_primary,
3791 int new_acting_primary,
3792 const vector<int> &old_acting,
3793 const vector<int> &new_acting,
3794 int old_up_primary,
3795 int new_up_primary,
3796 const vector<int> &old_up,
3797 const vector<int> &new_up,
3798 epoch_t same_interval_since,
3799 epoch_t last_epoch_clean,
3800 OSDMapRef osdmap,
3801 OSDMapRef lastmap,
3802 pg_t pgid,
3803 IsPGRecoverablePredicate *could_have_gone_active,
3804 PastIntervals *past_intervals,
3805 std::ostream *out)
3806{
3807 /*
3808 * We have to be careful to gracefully deal with situations like
3809 * so. Say we have a power outage or something that takes out both
3810 * OSDs, but the monitor doesn't mark them down in the same epoch.
3811 * The history may look like
3812 *
3813 * 1: A B
3814 * 2: B
3815 * 3: let's say B dies for good, too (say, from the power spike)
3816 * 4: A
3817 *
3818 * which makes it look like B may have applied updates to the PG
3819 * that we need in order to proceed. This sucks...
3820 *
3821 * To minimize the risk of this happening, we CANNOT go active if
3822 * _any_ OSDs in the prior set are down until we send an MOSDAlive
3823 * to the monitor such that the OSDMap sets osd_up_thru to an epoch.
3824 * Then, we have something like
3825 *
3826 * 1: A B
3827 * 2: B up_thru[B]=0
3828 * 3:
3829 * 4: A
3830 *
3831 * -> we can ignore B, bc it couldn't have gone active (up_thru still 0).
3832 *
3833 * or,
3834 *
3835 * 1: A B
3836 * 2: B up_thru[B]=0
3837 * 3: B up_thru[B]=2
3838 * 4:
3839 * 5: A
3840 *
3841 * -> we must wait for B, bc it was alive through 2, and could have
3842 * written to the pg.
3843 *
3844 * If B is really dead, then an administrator will need to manually
3845 * intervene by marking the OSD as "lost."
3846 */
3847
3848 // remember past interval
3849 // NOTE: a change in the up set primary triggers an interval
3850 // change, even though the interval members in the pg_interval_t
3851 // do not change.
11fdf7f2
TL
3852 ceph_assert(past_intervals);
3853 ceph_assert(past_intervals->past_intervals);
7c673cae
FG
3854 if (is_new_interval(
3855 old_acting_primary,
3856 new_acting_primary,
3857 old_acting,
3858 new_acting,
3859 old_up_primary,
3860 new_up_primary,
3861 old_up,
3862 new_up,
3863 osdmap,
3864 lastmap,
3865 pgid)) {
3866 pg_interval_t i;
3867 i.first = same_interval_since;
3868 i.last = osdmap->get_epoch() - 1;
11fdf7f2 3869 ceph_assert(i.first <= i.last);
7c673cae
FG
3870 i.acting = old_acting;
3871 i.up = old_up;
3872 i.primary = old_acting_primary;
3873 i.up_primary = old_up_primary;
3874
3875 unsigned num_acting = 0;
3876 for (vector<int>::const_iterator p = i.acting.begin(); p != i.acting.end();
3877 ++p)
3878 if (*p != CRUSH_ITEM_NONE)
3879 ++num_acting;
3880
11fdf7f2 3881 ceph_assert(lastmap->get_pools().count(pgid.pool()));
7c673cae
FG
3882 const pg_pool_t& old_pg_pool = lastmap->get_pools().find(pgid.pool())->second;
3883 set<pg_shard_t> old_acting_shards;
3884 old_pg_pool.convert_to_pg_shards(old_acting, &old_acting_shards);
3885
3886 if (num_acting &&
3887 i.primary != -1 &&
3888 num_acting >= old_pg_pool.min_size &&
3889 (*could_have_gone_active)(old_acting_shards)) {
3890 if (out)
3891 *out << __func__ << " " << i
7c673cae
FG
3892 << " up_thru " << lastmap->get_up_thru(i.primary)
3893 << " up_from " << lastmap->get_up_from(i.primary)
11fdf7f2 3894 << " last_epoch_clean " << last_epoch_clean;
7c673cae
FG
3895 if (lastmap->get_up_thru(i.primary) >= i.first &&
3896 lastmap->get_up_from(i.primary) <= i.first) {
3897 i.maybe_went_rw = true;
3898 if (out)
11fdf7f2 3899 *out << " " << i
7c673cae
FG
3900 << " : primary up " << lastmap->get_up_from(i.primary)
3901 << "-" << lastmap->get_up_thru(i.primary)
3902 << " includes interval"
11fdf7f2 3903 << std::endl;
7c673cae
FG
3904 } else if (last_epoch_clean >= i.first &&
3905 last_epoch_clean <= i.last) {
3906 // If the last_epoch_clean is included in this interval, then
3907 // the pg must have been rw (for recovery to have completed).
3908 // This is important because we won't know the _real_
3909 // first_epoch because we stop at last_epoch_clean, and we
3910 // don't want the oldest interval to randomly have
3911 // maybe_went_rw false depending on the relative up_thru vs
3912 // last_epoch_clean timing.
3913 i.maybe_went_rw = true;
3914 if (out)
11fdf7f2 3915 *out << " " << i
7c673cae
FG
3916 << " : includes last_epoch_clean " << last_epoch_clean
3917 << " and presumed to have been rw"
3918 << std::endl;
3919 } else {
3920 i.maybe_went_rw = false;
3921 if (out)
11fdf7f2 3922 *out << " " << i
7c673cae
FG
3923 << " : primary up " << lastmap->get_up_from(i.primary)
3924 << "-" << lastmap->get_up_thru(i.primary)
3925 << " does not include interval"
11fdf7f2 3926 << std::endl;
7c673cae
FG
3927 }
3928 } else {
3929 i.maybe_went_rw = false;
3930 if (out)
3931 *out << __func__ << " " << i << " : acting set is too small" << std::endl;
3932 }
11fdf7f2 3933 past_intervals->past_intervals->add_interval(old_pg_pool.is_erasure(), i);
7c673cae
FG
3934 return true;
3935 } else {
3936 return false;
3937 }
3938}
3939
3940
3941// true if the given map affects the prior set
3942bool PastIntervals::PriorSet::affected_by_map(
3943 const OSDMap &osdmap,
3944 const DoutPrefixProvider *dpp) const
3945{
3946 for (set<pg_shard_t>::iterator p = probe.begin();
3947 p != probe.end();
3948 ++p) {
3949 int o = p->osd;
3950
3951 // did someone in the prior set go down?
3952 if (osdmap.is_down(o) && down.count(o) == 0) {
3953 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now down" << dendl;
3954 return true;
3955 }
3956
3957 // did a down osd in cur get (re)marked as lost?
3958 map<int, epoch_t>::const_iterator r = blocked_by.find(o);
3959 if (r != blocked_by.end()) {
3960 if (!osdmap.exists(o)) {
3961 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
3962 return true;
3963 }
3964 if (osdmap.get_info(o).lost_at != r->second) {
3965 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
3966 return true;
3967 }
3968 }
3969 }
3970
3971 // did someone in the prior down set go up?
3972 for (set<int>::const_iterator p = down.begin();
3973 p != down.end();
3974 ++p) {
3975 int o = *p;
3976
3977 if (osdmap.is_up(o)) {
3978 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now up" << dendl;
3979 return true;
3980 }
3981
3982 // did someone in the prior set get lost or destroyed?
3983 if (!osdmap.exists(o)) {
3984 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
3985 return true;
3986 }
3987 // did a down osd in down get (re)marked as lost?
3988 map<int, epoch_t>::const_iterator r = blocked_by.find(o);
3989 if (r != blocked_by.end()) {
3990 if (osdmap.get_info(o).lost_at != r->second) {
3991 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
3992 return true;
3993 }
3994 }
3995 }
3996
3997 return false;
3998}
3999
4000ostream& operator<<(ostream& out, const PastIntervals::pg_interval_t& i)
4001{
4002 out << "interval(" << i.first << "-" << i.last
4003 << " up " << i.up << "(" << i.up_primary << ")"
4004 << " acting " << i.acting << "(" << i.primary << ")";
4005 if (i.maybe_went_rw)
4006 out << " maybe_went_rw";
4007 out << ")";
4008 return out;
4009}
4010
4011
4012
4013// -- pg_query_t --
4014
4015void pg_query_t::encode(bufferlist &bl, uint64_t features) const {
4016 ENCODE_START(3, 3, bl);
11fdf7f2
TL
4017 encode(type, bl);
4018 encode(since, bl);
7c673cae 4019 history.encode(bl);
11fdf7f2
TL
4020 encode(epoch_sent, bl);
4021 encode(to, bl);
4022 encode(from, bl);
7c673cae
FG
4023 ENCODE_FINISH(bl);
4024}
4025
11fdf7f2 4026void pg_query_t::decode(bufferlist::const_iterator &bl) {
7c673cae 4027 DECODE_START(3, bl);
11fdf7f2
TL
4028 decode(type, bl);
4029 decode(since, bl);
7c673cae 4030 history.decode(bl);
11fdf7f2
TL
4031 decode(epoch_sent, bl);
4032 decode(to, bl);
4033 decode(from, bl);
7c673cae
FG
4034 DECODE_FINISH(bl);
4035}
4036
4037void pg_query_t::dump(Formatter *f) const
4038{
4039 f->dump_int("from", from);
4040 f->dump_int("to", to);
4041 f->dump_string("type", get_type_name());
4042 f->dump_stream("since") << since;
4043 f->dump_stream("epoch_sent") << epoch_sent;
4044 f->open_object_section("history");
4045 history.dump(f);
4046 f->close_section();
4047}
4048void pg_query_t::generate_test_instances(list<pg_query_t*>& o)
4049{
4050 o.push_back(new pg_query_t());
4051 list<pg_history_t*> h;
4052 pg_history_t::generate_test_instances(h);
4053 o.push_back(new pg_query_t(pg_query_t::INFO, shard_id_t(1), shard_id_t(2), *h.back(), 4));
4054 o.push_back(new pg_query_t(pg_query_t::MISSING, shard_id_t(2), shard_id_t(3), *h.back(), 4));
4055 o.push_back(new pg_query_t(pg_query_t::LOG, shard_id_t(0), shard_id_t(0),
4056 eversion_t(4, 5), *h.back(), 4));
4057 o.push_back(new pg_query_t(pg_query_t::FULLLOG,
4058 shard_id_t::NO_SHARD, shard_id_t::NO_SHARD,
4059 *h.back(), 5));
4060}
4061
4062// -- ObjectModDesc --
4063void ObjectModDesc::visit(Visitor *visitor) const
4064{
11fdf7f2 4065 auto bp = bl.cbegin();
7c673cae
FG
4066 try {
4067 while (!bp.end()) {
4068 DECODE_START(max_required_version, bp);
4069 uint8_t code;
11fdf7f2 4070 decode(code, bp);
7c673cae
FG
4071 switch (code) {
4072 case APPEND: {
4073 uint64_t size;
11fdf7f2 4074 decode(size, bp);
7c673cae
FG
4075 visitor->append(size);
4076 break;
4077 }
4078 case SETATTRS: {
4079 map<string, boost::optional<bufferlist> > attrs;
11fdf7f2 4080 decode(attrs, bp);
7c673cae
FG
4081 visitor->setattrs(attrs);
4082 break;
4083 }
4084 case DELETE: {
4085 version_t old_version;
11fdf7f2 4086 decode(old_version, bp);
7c673cae
FG
4087 visitor->rmobject(old_version);
4088 break;
4089 }
4090 case CREATE: {
4091 visitor->create();
4092 break;
4093 }
4094 case UPDATE_SNAPS: {
4095 set<snapid_t> snaps;
11fdf7f2 4096 decode(snaps, bp);
7c673cae
FG
4097 visitor->update_snaps(snaps);
4098 break;
4099 }
4100 case TRY_DELETE: {
4101 version_t old_version;
11fdf7f2 4102 decode(old_version, bp);
7c673cae
FG
4103 visitor->try_rmobject(old_version);
4104 break;
4105 }
4106 case ROLLBACK_EXTENTS: {
4107 vector<pair<uint64_t, uint64_t> > extents;
4108 version_t gen;
11fdf7f2
TL
4109 decode(gen, bp);
4110 decode(extents, bp);
7c673cae
FG
4111 visitor->rollback_extents(gen,extents);
4112 break;
4113 }
4114 default:
11fdf7f2 4115 ceph_abort_msg("Invalid rollback code");
7c673cae
FG
4116 }
4117 DECODE_FINISH(bp);
4118 }
4119 } catch (...) {
11fdf7f2 4120 ceph_abort_msg("Invalid encoding");
7c673cae
FG
4121 }
4122}
4123
4124struct DumpVisitor : public ObjectModDesc::Visitor {
4125 Formatter *f;
4126 explicit DumpVisitor(Formatter *f) : f(f) {}
4127 void append(uint64_t old_size) override {
4128 f->open_object_section("op");
4129 f->dump_string("code", "APPEND");
4130 f->dump_unsigned("old_size", old_size);
4131 f->close_section();
4132 }
4133 void setattrs(map<string, boost::optional<bufferlist> > &attrs) override {
4134 f->open_object_section("op");
4135 f->dump_string("code", "SETATTRS");
4136 f->open_array_section("attrs");
4137 for (map<string, boost::optional<bufferlist> >::iterator i = attrs.begin();
4138 i != attrs.end();
4139 ++i) {
4140 f->dump_string("attr_name", i->first);
4141 }
4142 f->close_section();
4143 f->close_section();
4144 }
4145 void rmobject(version_t old_version) override {
4146 f->open_object_section("op");
4147 f->dump_string("code", "RMOBJECT");
4148 f->dump_unsigned("old_version", old_version);
4149 f->close_section();
4150 }
4151 void try_rmobject(version_t old_version) override {
4152 f->open_object_section("op");
4153 f->dump_string("code", "TRY_RMOBJECT");
4154 f->dump_unsigned("old_version", old_version);
4155 f->close_section();
4156 }
4157 void create() override {
4158 f->open_object_section("op");
4159 f->dump_string("code", "CREATE");
4160 f->close_section();
4161 }
4162 void update_snaps(const set<snapid_t> &snaps) override {
4163 f->open_object_section("op");
4164 f->dump_string("code", "UPDATE_SNAPS");
4165 f->dump_stream("snaps") << snaps;
4166 f->close_section();
4167 }
4168 void rollback_extents(
4169 version_t gen,
4170 const vector<pair<uint64_t, uint64_t> > &extents) override {
4171 f->open_object_section("op");
4172 f->dump_string("code", "ROLLBACK_EXTENTS");
4173 f->dump_unsigned("gen", gen);
4174 f->dump_stream("snaps") << extents;
4175 f->close_section();
4176 }
4177};
4178
4179void ObjectModDesc::dump(Formatter *f) const
4180{
4181 f->open_object_section("object_mod_desc");
4182 f->dump_bool("can_local_rollback", can_local_rollback);
4183 f->dump_bool("rollback_info_completed", rollback_info_completed);
4184 {
4185 f->open_array_section("ops");
4186 DumpVisitor vis(f);
4187 visit(&vis);
4188 f->close_section();
4189 }
4190 f->close_section();
4191}
4192
4193void ObjectModDesc::generate_test_instances(list<ObjectModDesc*>& o)
4194{
4195 map<string, boost::optional<bufferlist> > attrs;
4196 attrs[OI_ATTR];
4197 attrs[SS_ATTR];
4198 attrs["asdf"];
4199 o.push_back(new ObjectModDesc());
4200 o.back()->append(100);
4201 o.back()->setattrs(attrs);
4202 o.push_back(new ObjectModDesc());
4203 o.back()->rmobject(1001);
4204 o.push_back(new ObjectModDesc());
4205 o.back()->create();
4206 o.back()->setattrs(attrs);
4207 o.push_back(new ObjectModDesc());
4208 o.back()->create();
4209 o.back()->setattrs(attrs);
4210 o.back()->mark_unrollbackable();
4211 o.back()->append(1000);
4212}
4213
4214void ObjectModDesc::encode(bufferlist &_bl) const
4215{
4216 ENCODE_START(max_required_version, max_required_version, _bl);
11fdf7f2
TL
4217 encode(can_local_rollback, _bl);
4218 encode(rollback_info_completed, _bl);
4219 encode(bl, _bl);
7c673cae
FG
4220 ENCODE_FINISH(_bl);
4221}
11fdf7f2 4222void ObjectModDesc::decode(bufferlist::const_iterator &_bl)
7c673cae
FG
4223{
4224 DECODE_START(2, _bl);
4225 max_required_version = struct_v;
11fdf7f2
TL
4226 decode(can_local_rollback, _bl);
4227 decode(rollback_info_completed, _bl);
4228 decode(bl, _bl);
7c673cae
FG
4229 // ensure bl does not pin a larger buffer in memory
4230 bl.rebuild();
31f18b77 4231 bl.reassign_to_mempool(mempool::mempool_osd_pglog);
7c673cae
FG
4232 DECODE_FINISH(_bl);
4233}
4234
4235// -- pg_log_entry_t --
4236
4237string pg_log_entry_t::get_key_name() const
4238{
4239 return version.get_key_name();
4240}
4241
4242void pg_log_entry_t::encode_with_checksum(bufferlist& bl) const
4243{
11fdf7f2 4244 using ceph::encode;
7c673cae 4245 bufferlist ebl(sizeof(*this)*2);
11fdf7f2 4246 this->encode(ebl);
7c673cae 4247 __u32 crc = ebl.crc32c(0);
11fdf7f2
TL
4248 encode(ebl, bl);
4249 encode(crc, bl);
7c673cae
FG
4250}
4251
11fdf7f2 4252void pg_log_entry_t::decode_with_checksum(bufferlist::const_iterator& p)
7c673cae 4253{
11fdf7f2 4254 using ceph::decode;
7c673cae 4255 bufferlist bl;
11fdf7f2 4256 decode(bl, p);
7c673cae 4257 __u32 crc;
11fdf7f2 4258 decode(crc, p);
7c673cae
FG
4259 if (crc != bl.crc32c(0))
4260 throw buffer::malformed_input("bad checksum on pg_log_entry_t");
11fdf7f2
TL
4261 auto q = bl.cbegin();
4262 this->decode(q);
7c673cae
FG
4263}
4264
4265void pg_log_entry_t::encode(bufferlist &bl) const
4266{
11fdf7f2
TL
4267 ENCODE_START(12, 4, bl);
4268 encode(op, bl);
4269 encode(soid, bl);
4270 encode(version, bl);
7c673cae
FG
4271
4272 /**
4273 * Added with reverting_to:
4274 * Previous code used prior_version to encode
4275 * what we now call reverting_to. This will
4276 * allow older code to decode reverting_to
4277 * into prior_version as expected.
4278 */
4279 if (op == LOST_REVERT)
11fdf7f2 4280 encode(reverting_to, bl);
7c673cae 4281 else
11fdf7f2 4282 encode(prior_version, bl);
7c673cae 4283
11fdf7f2
TL
4284 encode(reqid, bl);
4285 encode(mtime, bl);
7c673cae 4286 if (op == LOST_REVERT)
11fdf7f2
TL
4287 encode(prior_version, bl);
4288 encode(snaps, bl);
4289 encode(user_version, bl);
4290 encode(mod_desc, bl);
4291 encode(extra_reqids, bl);
7c673cae 4292 if (op == ERROR)
11fdf7f2
TL
4293 encode(return_code, bl);
4294 if (!extra_reqids.empty())
4295 encode(extra_reqid_return_codes, bl);
7c673cae
FG
4296 ENCODE_FINISH(bl);
4297}
4298
11fdf7f2 4299void pg_log_entry_t::decode(bufferlist::const_iterator &bl)
7c673cae 4300{
11fdf7f2
TL
4301 DECODE_START_LEGACY_COMPAT_LEN(12, 4, 4, bl);
4302 decode(op, bl);
7c673cae
FG
4303 if (struct_v < 2) {
4304 sobject_t old_soid;
11fdf7f2 4305 decode(old_soid, bl);
7c673cae
FG
4306 soid.oid = old_soid.oid;
4307 soid.snap = old_soid.snap;
4308 invalid_hash = true;
4309 } else {
11fdf7f2 4310 decode(soid, bl);
7c673cae
FG
4311 }
4312 if (struct_v < 3)
4313 invalid_hash = true;
11fdf7f2 4314 decode(version, bl);
7c673cae
FG
4315
4316 if (struct_v >= 6 && op == LOST_REVERT)
11fdf7f2 4317 decode(reverting_to, bl);
7c673cae 4318 else
11fdf7f2 4319 decode(prior_version, bl);
7c673cae 4320
11fdf7f2 4321 decode(reqid, bl);
7c673cae 4322
11fdf7f2 4323 decode(mtime, bl);
7c673cae
FG
4324 if (struct_v < 5)
4325 invalid_pool = true;
4326
4327 if (op == LOST_REVERT) {
4328 if (struct_v >= 6) {
11fdf7f2 4329 decode(prior_version, bl);
7c673cae
FG
4330 } else {
4331 reverting_to = prior_version;
4332 }
4333 }
4334 if (struct_v >= 7 || // for v >= 7, this is for all ops.
4335 op == CLONE) { // for v < 7, it's only present for CLONE.
11fdf7f2 4336 decode(snaps, bl);
7c673cae
FG
4337 // ensure snaps does not pin a larger buffer in memory
4338 snaps.rebuild();
31f18b77 4339 snaps.reassign_to_mempool(mempool::mempool_osd_pglog);
7c673cae
FG
4340 }
4341
4342 if (struct_v >= 8)
11fdf7f2 4343 decode(user_version, bl);
7c673cae
FG
4344 else
4345 user_version = version.version;
4346
4347 if (struct_v >= 9)
11fdf7f2 4348 decode(mod_desc, bl);
7c673cae
FG
4349 else
4350 mod_desc.mark_unrollbackable();
4351 if (struct_v >= 10)
11fdf7f2 4352 decode(extra_reqids, bl);
7c673cae 4353 if (struct_v >= 11 && op == ERROR)
11fdf7f2
TL
4354 decode(return_code, bl);
4355 if (struct_v >= 12 && !extra_reqids.empty())
4356 decode(extra_reqid_return_codes, bl);
7c673cae
FG
4357 DECODE_FINISH(bl);
4358}
4359
4360void pg_log_entry_t::dump(Formatter *f) const
4361{
4362 f->dump_string("op", get_op_name());
4363 f->dump_stream("object") << soid;
4364 f->dump_stream("version") << version;
4365 f->dump_stream("prior_version") << prior_version;
4366 f->dump_stream("reqid") << reqid;
4367 f->open_array_section("extra_reqids");
11fdf7f2 4368 uint32_t idx = 0;
31f18b77 4369 for (auto p = extra_reqids.begin();
7c673cae 4370 p != extra_reqids.end();
11fdf7f2 4371 ++idx, ++p) {
7c673cae
FG
4372 f->open_object_section("extra_reqid");
4373 f->dump_stream("reqid") << p->first;
4374 f->dump_stream("user_version") << p->second;
11fdf7f2
TL
4375 auto it = extra_reqid_return_codes.find(idx);
4376 if (it != extra_reqid_return_codes.end()) {
4377 f->dump_int("return_code", it->second);
4378 }
7c673cae
FG
4379 f->close_section();
4380 }
4381 f->close_section();
4382 f->dump_stream("mtime") << mtime;
4383 f->dump_int("return_code", return_code);
4384 if (snaps.length() > 0) {
4385 vector<snapid_t> v;
4386 bufferlist c = snaps;
11fdf7f2 4387 auto p = c.cbegin();
7c673cae 4388 try {
11fdf7f2
TL
4389 using ceph::decode;
4390 decode(v, p);
7c673cae
FG
4391 } catch (...) {
4392 v.clear();
4393 }
4394 f->open_object_section("snaps");
4395 for (vector<snapid_t>::iterator p = v.begin(); p != v.end(); ++p)
4396 f->dump_unsigned("snap", *p);
4397 f->close_section();
4398 }
4399 {
4400 f->open_object_section("mod_desc");
4401 mod_desc.dump(f);
4402 f->close_section();
4403 }
4404}
4405
4406void pg_log_entry_t::generate_test_instances(list<pg_log_entry_t*>& o)
4407{
4408 o.push_back(new pg_log_entry_t());
4409 hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
4410 o.push_back(new pg_log_entry_t(MODIFY, oid, eversion_t(1,2), eversion_t(3,4),
4411 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4412 utime_t(8,9), 0));
4413 o.push_back(new pg_log_entry_t(ERROR, oid, eversion_t(1,2), eversion_t(3,4),
4414 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4415 utime_t(8,9), -ENOENT));
4416}
4417
4418ostream& operator<<(ostream& out, const pg_log_entry_t& e)
4419{
4420 out << e.version << " (" << e.prior_version << ") "
4421 << std::left << std::setw(8) << e.get_op_name() << ' '
4422 << e.soid << " by " << e.reqid << " " << e.mtime
4423 << " " << e.return_code;
4424 if (e.snaps.length()) {
4425 vector<snapid_t> snaps;
4426 bufferlist c = e.snaps;
11fdf7f2 4427 auto p = c.cbegin();
7c673cae 4428 try {
11fdf7f2 4429 decode(snaps, p);
7c673cae
FG
4430 } catch (...) {
4431 snaps.clear();
4432 }
4433 out << " snaps " << snaps;
4434 }
4435 return out;
4436}
4437
c07f9fc5
FG
4438// -- pg_log_dup_t --
4439
11fdf7f2 4440std::string pg_log_dup_t::get_key_name() const
c07f9fc5 4441{
11fdf7f2
TL
4442 static const char prefix[] = "dup_";
4443 std::string key(36, ' ');
4444 memcpy(&key[0], prefix, 4);
4445 version.get_key_name(&key[4]);
4446 key.resize(35); // remove the null terminator
4447 return key;
c07f9fc5
FG
4448}
4449
4450void pg_log_dup_t::encode(bufferlist &bl) const
4451{
4452 ENCODE_START(1, 1, bl);
11fdf7f2
TL
4453 encode(reqid, bl);
4454 encode(version, bl);
4455 encode(user_version, bl);
4456 encode(return_code, bl);
c07f9fc5
FG
4457 ENCODE_FINISH(bl);
4458}
4459
11fdf7f2 4460void pg_log_dup_t::decode(bufferlist::const_iterator &bl)
c07f9fc5
FG
4461{
4462 DECODE_START(1, bl);
11fdf7f2
TL
4463 decode(reqid, bl);
4464 decode(version, bl);
4465 decode(user_version, bl);
4466 decode(return_code, bl);
c07f9fc5
FG
4467 DECODE_FINISH(bl);
4468}
4469
4470void pg_log_dup_t::dump(Formatter *f) const
4471{
4472 f->dump_stream("reqid") << reqid;
4473 f->dump_stream("version") << version;
4474 f->dump_stream("user_version") << user_version;
4475 f->dump_stream("return_code") << return_code;
4476}
4477
4478void pg_log_dup_t::generate_test_instances(list<pg_log_dup_t*>& o)
4479{
4480 o.push_back(new pg_log_dup_t());
4481 o.push_back(new pg_log_dup_t(eversion_t(1,2),
4482 1,
4483 osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4484 0));
4485 o.push_back(new pg_log_dup_t(eversion_t(1,2),
4486 2,
4487 osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4488 -ENOENT));
4489}
4490
4491
4492std::ostream& operator<<(std::ostream& out, const pg_log_dup_t& e) {
4493 return out << "log_dup(reqid=" << e.reqid <<
4494 " v=" << e.version << " uv=" << e.user_version <<
4495 " rc=" << e.return_code << ")";
4496}
4497
7c673cae
FG
4498
4499// -- pg_log_t --
4500
4501// out: pg_log_t that only has entries that apply to import_pgid using curmap
4502// reject: Entries rejected from "in" are in the reject.log. Other fields not set.
4503void pg_log_t::filter_log(spg_t import_pgid, const OSDMap &curmap,
4504 const string &hit_set_namespace, const pg_log_t &in,
4505 pg_log_t &out, pg_log_t &reject)
4506{
4507 out = in;
4508 out.log.clear();
4509 reject.log.clear();
4510
4511 for (list<pg_log_entry_t>::const_iterator i = in.log.begin();
4512 i != in.log.end(); ++i) {
4513
4514 // Reject pg log entries for temporary objects
4515 if (i->soid.is_temp()) {
4516 reject.log.push_back(*i);
4517 continue;
4518 }
4519
4520 if (i->soid.nspace != hit_set_namespace) {
4521 object_t oid = i->soid.oid;
4522 object_locator_t loc(i->soid);
4523 pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
4524 pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
4525
4526 if (import_pgid.pgid == pgid) {
4527 out.log.push_back(*i);
4528 } else {
4529 reject.log.push_back(*i);
4530 }
4531 } else {
4532 out.log.push_back(*i);
4533 }
4534 }
4535}
4536
4537void pg_log_t::encode(bufferlist& bl) const
4538{
c07f9fc5 4539 ENCODE_START(7, 3, bl);
11fdf7f2
TL
4540 encode(head, bl);
4541 encode(tail, bl);
4542 encode(log, bl);
4543 encode(can_rollback_to, bl);
4544 encode(rollback_info_trimmed_to, bl);
4545 encode(dups, bl);
7c673cae
FG
4546 ENCODE_FINISH(bl);
4547}
4548
11fdf7f2 4549void pg_log_t::decode(bufferlist::const_iterator &bl, int64_t pool)
7c673cae 4550{
c07f9fc5 4551 DECODE_START_LEGACY_COMPAT_LEN(7, 3, 3, bl);
11fdf7f2
TL
4552 decode(head, bl);
4553 decode(tail, bl);
7c673cae
FG
4554 if (struct_v < 2) {
4555 bool backlog;
11fdf7f2 4556 decode(backlog, bl);
7c673cae 4557 }
11fdf7f2 4558 decode(log, bl);
7c673cae 4559 if (struct_v >= 5)
11fdf7f2 4560 decode(can_rollback_to, bl);
7c673cae
FG
4561
4562 if (struct_v >= 6)
11fdf7f2 4563 decode(rollback_info_trimmed_to, bl);
7c673cae
FG
4564 else
4565 rollback_info_trimmed_to = tail;
c07f9fc5
FG
4566
4567 if (struct_v >= 7)
11fdf7f2 4568 decode(dups, bl);
c07f9fc5 4569
7c673cae
FG
4570 DECODE_FINISH(bl);
4571
4572 // handle hobject_t format change
4573 if (struct_v < 4) {
4574 for (list<pg_log_entry_t>::iterator i = log.begin();
4575 i != log.end();
4576 ++i) {
4577 if (!i->soid.is_max() && i->soid.pool == -1)
4578 i->soid.pool = pool;
4579 }
4580 }
4581}
4582
4583void pg_log_t::dump(Formatter *f) const
4584{
4585 f->dump_stream("head") << head;
4586 f->dump_stream("tail") << tail;
4587 f->open_array_section("log");
4588 for (list<pg_log_entry_t>::const_iterator p = log.begin(); p != log.end(); ++p) {
4589 f->open_object_section("entry");
4590 p->dump(f);
4591 f->close_section();
4592 }
4593 f->close_section();
c07f9fc5
FG
4594 f->open_array_section("dups");
4595 for (const auto& entry : dups) {
4596 f->open_object_section("entry");
4597 entry.dump(f);
4598 f->close_section();
4599 }
4600 f->close_section();
7c673cae
FG
4601}
4602
4603void pg_log_t::generate_test_instances(list<pg_log_t*>& o)
4604{
4605 o.push_back(new pg_log_t);
4606
4607 // this is nonsensical:
4608 o.push_back(new pg_log_t);
4609 o.back()->head = eversion_t(1,2);
4610 o.back()->tail = eversion_t(3,4);
4611 list<pg_log_entry_t*> e;
4612 pg_log_entry_t::generate_test_instances(e);
4613 for (list<pg_log_entry_t*>::iterator p = e.begin(); p != e.end(); ++p)
4614 o.back()->log.push_back(**p);
4615}
4616
4617void pg_log_t::copy_after(const pg_log_t &other, eversion_t v)
4618{
4619 can_rollback_to = other.can_rollback_to;
4620 head = other.head;
4621 tail = other.tail;
4622 for (list<pg_log_entry_t>::const_reverse_iterator i = other.log.rbegin();
4623 i != other.log.rend();
4624 ++i) {
11fdf7f2 4625 ceph_assert(i->version > other.tail);
7c673cae
FG
4626 if (i->version <= v) {
4627 // make tail accurate.
4628 tail = i->version;
4629 break;
4630 }
4631 log.push_front(*i);
4632 }
4633}
4634
4635void pg_log_t::copy_range(const pg_log_t &other, eversion_t from, eversion_t to)
4636{
4637 can_rollback_to = other.can_rollback_to;
4638 list<pg_log_entry_t>::const_reverse_iterator i = other.log.rbegin();
11fdf7f2 4639 ceph_assert(i != other.log.rend());
7c673cae
FG
4640 while (i->version > to) {
4641 ++i;
11fdf7f2 4642 ceph_assert(i != other.log.rend());
7c673cae 4643 }
11fdf7f2 4644 ceph_assert(i->version == to);
7c673cae
FG
4645 head = to;
4646 for ( ; i != other.log.rend(); ++i) {
4647 if (i->version <= from) {
4648 tail = i->version;
4649 break;
4650 }
4651 log.push_front(*i);
4652 }
4653}
4654
4655void pg_log_t::copy_up_to(const pg_log_t &other, int max)
4656{
4657 can_rollback_to = other.can_rollback_to;
4658 int n = 0;
4659 head = other.head;
4660 tail = other.tail;
4661 for (list<pg_log_entry_t>::const_reverse_iterator i = other.log.rbegin();
4662 i != other.log.rend();
4663 ++i) {
4664 if (n++ >= max) {
4665 tail = i->version;
4666 break;
4667 }
4668 log.push_front(*i);
4669 }
4670}
4671
c07f9fc5 4672ostream& pg_log_t::print(ostream& out) const
7c673cae
FG
4673{
4674 out << *this << std::endl;
4675 for (list<pg_log_entry_t>::const_iterator p = log.begin();
4676 p != log.end();
c07f9fc5 4677 ++p)
7c673cae 4678 out << *p << std::endl;
c07f9fc5
FG
4679 for (const auto& entry : dups) {
4680 out << " dup entry: " << entry << std::endl;
4681 }
7c673cae
FG
4682 return out;
4683}
4684
4685// -- pg_missing_t --
4686
4687ostream& operator<<(ostream& out, const pg_missing_item& i)
4688{
4689 out << i.need;
4690 if (i.have != eversion_t())
4691 out << "(" << i.have << ")";
c07f9fc5 4692 out << " flags = " << i.flag_str();
7c673cae
FG
4693 return out;
4694}
4695
4696// -- object_copy_cursor_t --
4697
4698void object_copy_cursor_t::encode(bufferlist& bl) const
4699{
4700 ENCODE_START(1, 1, bl);
11fdf7f2
TL
4701 encode(attr_complete, bl);
4702 encode(data_offset, bl);
4703 encode(data_complete, bl);
4704 encode(omap_offset, bl);
4705 encode(omap_complete, bl);
7c673cae
FG
4706 ENCODE_FINISH(bl);
4707}
4708
11fdf7f2 4709void object_copy_cursor_t::decode(bufferlist::const_iterator &bl)
7c673cae
FG
4710{
4711 DECODE_START(1, bl);
11fdf7f2
TL
4712 decode(attr_complete, bl);
4713 decode(data_offset, bl);
4714 decode(data_complete, bl);
4715 decode(omap_offset, bl);
4716 decode(omap_complete, bl);
7c673cae
FG
4717 DECODE_FINISH(bl);
4718}
4719
4720void object_copy_cursor_t::dump(Formatter *f) const
4721{
4722 f->dump_unsigned("attr_complete", (int)attr_complete);
4723 f->dump_unsigned("data_offset", data_offset);
4724 f->dump_unsigned("data_complete", (int)data_complete);
4725 f->dump_string("omap_offset", omap_offset);
4726 f->dump_unsigned("omap_complete", (int)omap_complete);
4727}
4728
4729void object_copy_cursor_t::generate_test_instances(list<object_copy_cursor_t*>& o)
4730{
4731 o.push_back(new object_copy_cursor_t);
4732 o.push_back(new object_copy_cursor_t);
4733 o.back()->attr_complete = true;
4734 o.back()->data_offset = 123;
4735 o.push_back(new object_copy_cursor_t);
4736 o.back()->attr_complete = true;
4737 o.back()->data_complete = true;
4738 o.back()->omap_offset = "foo";
4739 o.push_back(new object_copy_cursor_t);
4740 o.back()->attr_complete = true;
4741 o.back()->data_complete = true;
4742 o.back()->omap_complete = true;
4743}
4744
4745// -- object_copy_data_t --
4746
4747void object_copy_data_t::encode(bufferlist& bl, uint64_t features) const
4748{
11fdf7f2
TL
4749 ENCODE_START(8, 5, bl);
4750 encode(size, bl);
4751 encode(mtime, bl);
4752 encode(attrs, bl);
4753 encode(data, bl);
4754 encode(omap_data, bl);
4755 encode(cursor, bl);
4756 encode(omap_header, bl);
4757 encode(snaps, bl);
4758 encode(snap_seq, bl);
4759 encode(flags, bl);
4760 encode(data_digest, bl);
4761 encode(omap_digest, bl);
4762 encode(reqids, bl);
4763 encode(truncate_seq, bl);
4764 encode(truncate_size, bl);
4765 encode(reqid_return_codes, bl);
7c673cae
FG
4766 ENCODE_FINISH(bl);
4767}
4768
11fdf7f2 4769void object_copy_data_t::decode(bufferlist::const_iterator& bl)
7c673cae
FG
4770{
4771 DECODE_START(7, bl);
4772 if (struct_v < 5) {
4773 // old
11fdf7f2
TL
4774 decode(size, bl);
4775 decode(mtime, bl);
7c673cae
FG
4776 {
4777 string category;
11fdf7f2 4778 decode(category, bl); // no longer used
7c673cae 4779 }
11fdf7f2
TL
4780 decode(attrs, bl);
4781 decode(data, bl);
7c673cae
FG
4782 {
4783 map<string,bufferlist> omap;
11fdf7f2 4784 decode(omap, bl);
7c673cae 4785 omap_data.clear();
11fdf7f2
TL
4786 if (!omap.empty()) {
4787 using ceph::encode;
4788 encode(omap, omap_data);
4789 }
7c673cae 4790 }
11fdf7f2 4791 decode(cursor, bl);
7c673cae 4792 if (struct_v >= 2)
11fdf7f2 4793 decode(omap_header, bl);
7c673cae 4794 if (struct_v >= 3) {
11fdf7f2
TL
4795 decode(snaps, bl);
4796 decode(snap_seq, bl);
7c673cae
FG
4797 } else {
4798 snaps.clear();
4799 snap_seq = 0;
4800 }
4801 if (struct_v >= 4) {
11fdf7f2
TL
4802 decode(flags, bl);
4803 decode(data_digest, bl);
4804 decode(omap_digest, bl);
7c673cae
FG
4805 }
4806 } else {
4807 // current
11fdf7f2
TL
4808 decode(size, bl);
4809 decode(mtime, bl);
4810 decode(attrs, bl);
4811 decode(data, bl);
4812 decode(omap_data, bl);
4813 decode(cursor, bl);
4814 decode(omap_header, bl);
4815 decode(snaps, bl);
4816 decode(snap_seq, bl);
7c673cae 4817 if (struct_v >= 4) {
11fdf7f2
TL
4818 decode(flags, bl);
4819 decode(data_digest, bl);
4820 decode(omap_digest, bl);
7c673cae
FG
4821 }
4822 if (struct_v >= 6) {
11fdf7f2 4823 decode(reqids, bl);
7c673cae
FG
4824 }
4825 if (struct_v >= 7) {
11fdf7f2
TL
4826 decode(truncate_seq, bl);
4827 decode(truncate_size, bl);
4828 }
4829 if (struct_v >= 8) {
4830 decode(reqid_return_codes, bl);
7c673cae
FG
4831 }
4832 }
4833 DECODE_FINISH(bl);
4834}
4835
4836void object_copy_data_t::generate_test_instances(list<object_copy_data_t*>& o)
4837{
4838 o.push_back(new object_copy_data_t());
4839
4840 list<object_copy_cursor_t*> cursors;
4841 object_copy_cursor_t::generate_test_instances(cursors);
4842 list<object_copy_cursor_t*>::iterator ci = cursors.begin();
4843 o.back()->cursor = **(ci++);
4844
4845 o.push_back(new object_copy_data_t());
4846 o.back()->cursor = **(ci++);
4847
4848 o.push_back(new object_copy_data_t());
4849 o.back()->size = 1234;
4850 o.back()->mtime.set_from_double(1234);
4851 bufferptr bp("there", 5);
4852 bufferlist bl;
4853 bl.push_back(bp);
4854 o.back()->attrs["hello"] = bl;
4855 bufferptr bp2("not", 3);
4856 bufferlist bl2;
4857 bl2.push_back(bp2);
4858 map<string,bufferlist> omap;
4859 omap["why"] = bl2;
11fdf7f2
TL
4860 using ceph::encode;
4861 encode(omap, o.back()->omap_data);
7c673cae
FG
4862 bufferptr databp("iamsomedatatocontain", 20);
4863 o.back()->data.push_back(databp);
4864 o.back()->omap_header.append("this is an omap header");
4865 o.back()->snaps.push_back(123);
4866 o.back()->reqids.push_back(make_pair(osd_reqid_t(), version_t()));
4867}
4868
4869void object_copy_data_t::dump(Formatter *f) const
4870{
4871 f->open_object_section("cursor");
4872 cursor.dump(f);
4873 f->close_section(); // cursor
4874 f->dump_int("size", size);
4875 f->dump_stream("mtime") << mtime;
4876 /* we should really print out the attrs here, but bufferlist
4877 const-correctness prevents that */
4878 f->dump_int("attrs_size", attrs.size());
4879 f->dump_int("flags", flags);
4880 f->dump_unsigned("data_digest", data_digest);
4881 f->dump_unsigned("omap_digest", omap_digest);
4882 f->dump_int("omap_data_length", omap_data.length());
4883 f->dump_int("omap_header_length", omap_header.length());
4884 f->dump_int("data_length", data.length());
4885 f->open_array_section("snaps");
4886 for (vector<snapid_t>::const_iterator p = snaps.begin();
4887 p != snaps.end(); ++p)
4888 f->dump_unsigned("snap", *p);
4889 f->close_section();
4890 f->open_array_section("reqids");
11fdf7f2 4891 uint32_t idx = 0;
31f18b77 4892 for (auto p = reqids.begin();
7c673cae 4893 p != reqids.end();
11fdf7f2 4894 ++idx, ++p) {
7c673cae
FG
4895 f->open_object_section("extra_reqid");
4896 f->dump_stream("reqid") << p->first;
4897 f->dump_stream("user_version") << p->second;
11fdf7f2
TL
4898 auto it = reqid_return_codes.find(idx);
4899 if (it != reqid_return_codes.end()) {
4900 f->dump_int("return_code", it->second);
4901 }
7c673cae
FG
4902 f->close_section();
4903 }
4904 f->close_section();
4905}
4906
4907// -- pg_create_t --
4908
4909void pg_create_t::encode(bufferlist &bl) const
4910{
4911 ENCODE_START(1, 1, bl);
11fdf7f2
TL
4912 encode(created, bl);
4913 encode(parent, bl);
4914 encode(split_bits, bl);
7c673cae
FG
4915 ENCODE_FINISH(bl);
4916}
4917
11fdf7f2 4918void pg_create_t::decode(bufferlist::const_iterator &bl)
7c673cae
FG
4919{
4920 DECODE_START(1, bl);
11fdf7f2
TL
4921 decode(created, bl);
4922 decode(parent, bl);
4923 decode(split_bits, bl);
7c673cae
FG
4924 DECODE_FINISH(bl);
4925}
4926
4927void pg_create_t::dump(Formatter *f) const
4928{
4929 f->dump_unsigned("created", created);
4930 f->dump_stream("parent") << parent;
4931 f->dump_int("split_bits", split_bits);
4932}
4933
4934void pg_create_t::generate_test_instances(list<pg_create_t*>& o)
4935{
4936 o.push_back(new pg_create_t);
11fdf7f2 4937 o.push_back(new pg_create_t(1, pg_t(3, 4), 2));
7c673cae
FG
4938}
4939
4940
4941// -- pg_hit_set_info_t --
4942
4943void pg_hit_set_info_t::encode(bufferlist& bl) const
4944{
4945 ENCODE_START(2, 1, bl);
11fdf7f2
TL
4946 encode(begin, bl);
4947 encode(end, bl);
4948 encode(version, bl);
4949 encode(using_gmt, bl);
7c673cae
FG
4950 ENCODE_FINISH(bl);
4951}
4952
11fdf7f2 4953void pg_hit_set_info_t::decode(bufferlist::const_iterator& p)
7c673cae
FG
4954{
4955 DECODE_START(2, p);
11fdf7f2
TL
4956 decode(begin, p);
4957 decode(end, p);
4958 decode(version, p);
7c673cae 4959 if (struct_v >= 2) {
11fdf7f2 4960 decode(using_gmt, p);
7c673cae
FG
4961 } else {
4962 using_gmt = false;
4963 }
4964 DECODE_FINISH(p);
4965}
4966
4967void pg_hit_set_info_t::dump(Formatter *f) const
4968{
4969 f->dump_stream("begin") << begin;
4970 f->dump_stream("end") << end;
4971 f->dump_stream("version") << version;
4972 f->dump_stream("using_gmt") << using_gmt;
4973}
4974
4975void pg_hit_set_info_t::generate_test_instances(list<pg_hit_set_info_t*>& ls)
4976{
4977 ls.push_back(new pg_hit_set_info_t);
4978 ls.push_back(new pg_hit_set_info_t);
4979 ls.back()->begin = utime_t(1, 2);
4980 ls.back()->end = utime_t(3, 4);
4981}
4982
4983
4984// -- pg_hit_set_history_t --
4985
4986void pg_hit_set_history_t::encode(bufferlist& bl) const
4987{
4988 ENCODE_START(1, 1, bl);
11fdf7f2 4989 encode(current_last_update, bl);
7c673cae
FG
4990 {
4991 utime_t dummy_stamp;
11fdf7f2 4992 encode(dummy_stamp, bl);
7c673cae
FG
4993 }
4994 {
4995 pg_hit_set_info_t dummy_info;
11fdf7f2 4996 encode(dummy_info, bl);
7c673cae 4997 }
11fdf7f2 4998 encode(history, bl);
7c673cae
FG
4999 ENCODE_FINISH(bl);
5000}
5001
11fdf7f2 5002void pg_hit_set_history_t::decode(bufferlist::const_iterator& p)
7c673cae
FG
5003{
5004 DECODE_START(1, p);
11fdf7f2 5005 decode(current_last_update, p);
7c673cae
FG
5006 {
5007 utime_t dummy_stamp;
11fdf7f2 5008 decode(dummy_stamp, p);
7c673cae
FG
5009 }
5010 {
5011 pg_hit_set_info_t dummy_info;
11fdf7f2 5012 decode(dummy_info, p);
7c673cae 5013 }
11fdf7f2 5014 decode(history, p);
7c673cae
FG
5015 DECODE_FINISH(p);
5016}
5017
5018void pg_hit_set_history_t::dump(Formatter *f) const
5019{
5020 f->dump_stream("current_last_update") << current_last_update;
5021 f->open_array_section("history");
5022 for (list<pg_hit_set_info_t>::const_iterator p = history.begin();
5023 p != history.end(); ++p) {
5024 f->open_object_section("info");
5025 p->dump(f);
5026 f->close_section();
5027 }
5028 f->close_section();
5029}
5030
5031void pg_hit_set_history_t::generate_test_instances(list<pg_hit_set_history_t*>& ls)
5032{
5033 ls.push_back(new pg_hit_set_history_t);
5034 ls.push_back(new pg_hit_set_history_t);
5035 ls.back()->current_last_update = eversion_t(1, 2);
5036 ls.back()->history.push_back(pg_hit_set_info_t());
5037}
5038
7c673cae
FG
5039// -- OSDSuperblock --
5040
5041void OSDSuperblock::encode(bufferlist &bl) const
5042{
5043 ENCODE_START(8, 5, bl);
11fdf7f2
TL
5044 encode(cluster_fsid, bl);
5045 encode(whoami, bl);
5046 encode(current_epoch, bl);
5047 encode(oldest_map, bl);
5048 encode(newest_map, bl);
5049 encode(weight, bl);
7c673cae 5050 compat_features.encode(bl);
11fdf7f2
TL
5051 encode(clean_thru, bl);
5052 encode(mounted, bl);
5053 encode(osd_fsid, bl);
5054 encode((epoch_t)0, bl); // epoch_t last_epoch_marked_full
5055 encode((uint32_t)0, bl); // map<int64_t,epoch_t> pool_last_epoch_marked_full
7c673cae
FG
5056 ENCODE_FINISH(bl);
5057}
5058
11fdf7f2 5059void OSDSuperblock::decode(bufferlist::const_iterator &bl)
7c673cae
FG
5060{
5061 DECODE_START_LEGACY_COMPAT_LEN(8, 5, 5, bl);
5062 if (struct_v < 3) {
5063 string magic;
11fdf7f2
TL
5064 decode(magic, bl);
5065 }
5066 decode(cluster_fsid, bl);
5067 decode(whoami, bl);
5068 decode(current_epoch, bl);
5069 decode(oldest_map, bl);
5070 decode(newest_map, bl);
5071 decode(weight, bl);
7c673cae
FG
5072 if (struct_v >= 2) {
5073 compat_features.decode(bl);
5074 } else { //upgrade it!
5075 compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
5076 }
11fdf7f2
TL
5077 decode(clean_thru, bl);
5078 decode(mounted, bl);
7c673cae 5079 if (struct_v >= 4)
11fdf7f2 5080 decode(osd_fsid, bl);
7c673cae
FG
5081 if (struct_v >= 6) {
5082 epoch_t last_map_marked_full;
11fdf7f2 5083 decode(last_map_marked_full, bl);
7c673cae
FG
5084 }
5085 if (struct_v >= 7) {
5086 map<int64_t,epoch_t> pool_last_map_marked_full;
11fdf7f2 5087 decode(pool_last_map_marked_full, bl);
7c673cae
FG
5088 }
5089 DECODE_FINISH(bl);
5090}
5091
5092void OSDSuperblock::dump(Formatter *f) const
5093{
5094 f->dump_stream("cluster_fsid") << cluster_fsid;
5095 f->dump_stream("osd_fsid") << osd_fsid;
5096 f->dump_int("whoami", whoami);
5097 f->dump_int("current_epoch", current_epoch);
5098 f->dump_int("oldest_map", oldest_map);
5099 f->dump_int("newest_map", newest_map);
5100 f->dump_float("weight", weight);
5101 f->open_object_section("compat");
5102 compat_features.dump(f);
5103 f->close_section();
5104 f->dump_int("clean_thru", clean_thru);
5105 f->dump_int("last_epoch_mounted", mounted);
5106}
5107
5108void OSDSuperblock::generate_test_instances(list<OSDSuperblock*>& o)
5109{
5110 OSDSuperblock z;
5111 o.push_back(new OSDSuperblock(z));
11fdf7f2
TL
5112 z.cluster_fsid.parse("01010101-0101-0101-0101-010101010101");
5113 z.osd_fsid.parse("02020202-0202-0202-0202-020202020202");
7c673cae
FG
5114 z.whoami = 3;
5115 z.current_epoch = 4;
5116 z.oldest_map = 5;
5117 z.newest_map = 9;
5118 z.mounted = 8;
5119 z.clean_thru = 7;
5120 o.push_back(new OSDSuperblock(z));
5121 o.push_back(new OSDSuperblock(z));
5122}
5123
5124// -- SnapSet --
5125
5126void SnapSet::encode(bufferlist& bl) const
5127{
5128 ENCODE_START(3, 2, bl);
11fdf7f2
TL
5129 encode(seq, bl);
5130 encode(true, bl); // head_exists
5131 encode(snaps, bl);
5132 encode(clones, bl);
5133 encode(clone_overlap, bl);
5134 encode(clone_size, bl);
5135 encode(clone_snaps, bl);
7c673cae
FG
5136 ENCODE_FINISH(bl);
5137}
5138
11fdf7f2 5139void SnapSet::decode(bufferlist::const_iterator& bl)
7c673cae
FG
5140{
5141 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
11fdf7f2
TL
5142 decode(seq, bl);
5143 bl.advance(1u); // skip legacy head_exists (always true)
5144 decode(snaps, bl);
5145 decode(clones, bl);
5146 decode(clone_overlap, bl);
5147 decode(clone_size, bl);
7c673cae 5148 if (struct_v >= 3) {
11fdf7f2 5149 decode(clone_snaps, bl);
7c673cae
FG
5150 } else {
5151 clone_snaps.clear();
5152 }
5153 DECODE_FINISH(bl);
5154}
5155
5156void SnapSet::dump(Formatter *f) const
5157{
5158 SnapContext sc(seq, snaps);
5159 f->open_object_section("snap_context");
5160 sc.dump(f);
5161 f->close_section();
7c673cae
FG
5162 f->open_array_section("clones");
5163 for (vector<snapid_t>::const_iterator p = clones.begin(); p != clones.end(); ++p) {
5164 f->open_object_section("clone");
5165 f->dump_unsigned("snap", *p);
94b18763
FG
5166 auto cs = clone_size.find(*p);
5167 if (cs != clone_size.end())
5168 f->dump_unsigned("size", cs->second);
5169 else
5170 f->dump_string("size", "????");
5171 auto co = clone_overlap.find(*p);
5172 if (co != clone_overlap.end())
5173 f->dump_stream("overlap") << co->second;
5174 else
5175 f->dump_stream("overlap") << "????";
7c673cae
FG
5176 auto q = clone_snaps.find(*p);
5177 if (q != clone_snaps.end()) {
5178 f->open_array_section("snaps");
5179 for (auto s : q->second) {
5180 f->dump_unsigned("snap", s);
5181 }
5182 f->close_section();
5183 }
5184 f->close_section();
5185 }
5186 f->close_section();
5187}
5188
5189void SnapSet::generate_test_instances(list<SnapSet*>& o)
5190{
5191 o.push_back(new SnapSet);
5192 o.push_back(new SnapSet);
7c673cae
FG
5193 o.back()->seq = 123;
5194 o.back()->snaps.push_back(123);
5195 o.back()->snaps.push_back(12);
5196 o.push_back(new SnapSet);
7c673cae
FG
5197 o.back()->seq = 123;
5198 o.back()->snaps.push_back(123);
5199 o.back()->snaps.push_back(12);
5200 o.back()->clones.push_back(12);
5201 o.back()->clone_size[12] = 12345;
5202 o.back()->clone_overlap[12];
5203 o.back()->clone_snaps[12] = {12, 10, 8};
5204}
5205
5206ostream& operator<<(ostream& out, const SnapSet& cs)
5207{
11fdf7f2
TL
5208 return out << cs.seq << "=" << cs.snaps << ":"
5209 << cs.clone_snaps;
7c673cae
FG
5210}
5211
5212void SnapSet::from_snap_set(const librados::snap_set_t& ss, bool legacy)
5213{
5214 // NOTE: our reconstruction of snaps (and the snapc) is not strictly
5215 // correct: it will not include snaps that still logically exist
5216 // but for which there was no clone that is defined. For all
5217 // practical purposes this doesn't matter, since we only use that
5218 // information to clone on the OSD, and we have already moved
5219 // forward past that part of the object history.
5220
5221 seq = ss.seq;
5222 set<snapid_t> _snaps;
5223 set<snapid_t> _clones;
7c673cae
FG
5224 for (vector<librados::clone_info_t>::const_iterator p = ss.clones.begin();
5225 p != ss.clones.end();
5226 ++p) {
11fdf7f2 5227 if (p->cloneid != librados::SNAP_HEAD) {
7c673cae
FG
5228 _clones.insert(p->cloneid);
5229 _snaps.insert(p->snaps.begin(), p->snaps.end());
5230 clone_size[p->cloneid] = p->size;
5231 clone_overlap[p->cloneid]; // the entry must exist, even if it's empty.
5232 for (vector<pair<uint64_t, uint64_t> >::const_iterator q =
5233 p->overlap.begin(); q != p->overlap.end(); ++q)
5234 clone_overlap[p->cloneid].insert(q->first, q->second);
5235 if (!legacy) {
5236 // p->snaps is ascending; clone_snaps is descending
5237 vector<snapid_t>& v = clone_snaps[p->cloneid];
5238 for (auto q = p->snaps.rbegin(); q != p->snaps.rend(); ++q) {
5239 v.push_back(*q);
5240 }
5241 }
5242 }
5243 }
5244
5245 // ascending
5246 clones.clear();
5247 clones.reserve(_clones.size());
5248 for (set<snapid_t>::iterator p = _clones.begin(); p != _clones.end(); ++p)
5249 clones.push_back(*p);
5250
5251 // descending
5252 snaps.clear();
5253 snaps.reserve(_snaps.size());
5254 for (set<snapid_t>::reverse_iterator p = _snaps.rbegin();
5255 p != _snaps.rend(); ++p)
5256 snaps.push_back(*p);
5257}
5258
5259uint64_t SnapSet::get_clone_bytes(snapid_t clone) const
5260{
11fdf7f2 5261 ceph_assert(clone_size.count(clone));
7c673cae 5262 uint64_t size = clone_size.find(clone)->second;
11fdf7f2 5263 ceph_assert(clone_overlap.count(clone));
7c673cae 5264 const interval_set<uint64_t> &overlap = clone_overlap.find(clone)->second;
11fdf7f2
TL
5265 ceph_assert(size >= (uint64_t)overlap.size());
5266 return size - overlap.size();
7c673cae
FG
5267}
5268
5269void SnapSet::filter(const pg_pool_t &pinfo)
5270{
5271 vector<snapid_t> oldsnaps;
5272 oldsnaps.swap(snaps);
5273 for (vector<snapid_t>::const_iterator i = oldsnaps.begin();
5274 i != oldsnaps.end();
5275 ++i) {
5276 if (!pinfo.is_removed_snap(*i))
5277 snaps.push_back(*i);
5278 }
5279}
5280
5281SnapSet SnapSet::get_filtered(const pg_pool_t &pinfo) const
5282{
5283 SnapSet ss = *this;
5284 ss.filter(pinfo);
5285 return ss;
5286}
5287
5288// -- watch_info_t --
5289
5290void watch_info_t::encode(bufferlist& bl, uint64_t features) const
5291{
5292 ENCODE_START(4, 3, bl);
11fdf7f2
TL
5293 encode(cookie, bl);
5294 encode(timeout_seconds, bl);
5295 encode(addr, bl, features);
7c673cae
FG
5296 ENCODE_FINISH(bl);
5297}
5298
11fdf7f2 5299void watch_info_t::decode(bufferlist::const_iterator& bl)
7c673cae
FG
5300{
5301 DECODE_START_LEGACY_COMPAT_LEN(4, 3, 3, bl);
11fdf7f2 5302 decode(cookie, bl);
7c673cae
FG
5303 if (struct_v < 2) {
5304 uint64_t ver;
11fdf7f2 5305 decode(ver, bl);
7c673cae 5306 }
11fdf7f2 5307 decode(timeout_seconds, bl);
7c673cae 5308 if (struct_v >= 4) {
11fdf7f2 5309 decode(addr, bl);
7c673cae
FG
5310 }
5311 DECODE_FINISH(bl);
5312}
5313
5314void watch_info_t::dump(Formatter *f) const
5315{
5316 f->dump_unsigned("cookie", cookie);
5317 f->dump_unsigned("timeout_seconds", timeout_seconds);
5318 f->open_object_section("addr");
5319 addr.dump(f);
5320 f->close_section();
5321}
5322
5323void watch_info_t::generate_test_instances(list<watch_info_t*>& o)
5324{
5325 o.push_back(new watch_info_t);
5326 o.push_back(new watch_info_t);
5327 o.back()->cookie = 123;
5328 o.back()->timeout_seconds = 99;
5329 entity_addr_t ea;
5330 ea.set_type(entity_addr_t::TYPE_LEGACY);
5331 ea.set_nonce(1);
5332 ea.set_family(AF_INET);
5333 ea.set_in4_quad(0, 127);
5334 ea.set_in4_quad(1, 0);
5335 ea.set_in4_quad(2, 1);
5336 ea.set_in4_quad(3, 2);
5337 ea.set_port(2);
5338 o.back()->addr = ea;
5339}
5340
11fdf7f2
TL
5341// -- chunk_info_t --
5342
5343void chunk_info_t::encode(bufferlist& bl) const
5344{
5345 ENCODE_START(1, 1, bl);
5346 encode(offset, bl);
5347 encode(length, bl);
5348 encode(oid, bl);
5349 __u32 _flags = flags;
5350 encode(_flags, bl);
5351 ENCODE_FINISH(bl);
5352}
5353
5354void chunk_info_t::decode(bufferlist::const_iterator& bl)
5355{
5356 DECODE_START(1, bl);
5357 decode(offset, bl);
5358 decode(length, bl);
5359 decode(oid, bl);
5360 __u32 _flags;
5361 decode(_flags, bl);
5362 flags = (cflag_t)_flags;
5363 DECODE_FINISH(bl);
5364}
5365
5366void chunk_info_t::dump(Formatter *f) const
5367{
5368 f->dump_unsigned("length", length);
5369 f->open_object_section("oid");
5370 oid.dump(f);
5371 f->close_section();
5372 f->dump_unsigned("flags", flags);
5373}
5374
5375ostream& operator<<(ostream& out, const chunk_info_t& ci)
5376{
5377 return out << "(len: " << ci.length << " oid: " << ci.oid
5378 << " offset: " << ci.offset
5379 << " flags: " << ci.get_flag_string(ci.flags) << ")";
5380}
5381
31f18b77
FG
5382// -- object_manifest_t --
5383
5384void object_manifest_t::encode(bufferlist& bl) const
5385{
5386 ENCODE_START(1, 1, bl);
11fdf7f2 5387 encode(type, bl);
31f18b77
FG
5388 switch (type) {
5389 case TYPE_NONE: break;
5390 case TYPE_REDIRECT:
11fdf7f2
TL
5391 encode(redirect_target, bl);
5392 break;
5393 case TYPE_CHUNKED:
5394 encode(chunk_map, bl);
31f18b77
FG
5395 break;
5396 default:
5397 ceph_abort();
5398 }
5399 ENCODE_FINISH(bl);
5400}
5401
11fdf7f2 5402void object_manifest_t::decode(bufferlist::const_iterator& bl)
31f18b77
FG
5403{
5404 DECODE_START(1, bl);
11fdf7f2 5405 decode(type, bl);
31f18b77
FG
5406 switch (type) {
5407 case TYPE_NONE: break;
5408 case TYPE_REDIRECT:
11fdf7f2
TL
5409 decode(redirect_target, bl);
5410 break;
5411 case TYPE_CHUNKED:
5412 decode(chunk_map, bl);
31f18b77
FG
5413 break;
5414 default:
5415 ceph_abort();
5416 }
5417 DECODE_FINISH(bl);
5418}
5419
5420void object_manifest_t::dump(Formatter *f) const
5421{
5422 f->dump_unsigned("type", type);
11fdf7f2
TL
5423 if (type == TYPE_REDIRECT) {
5424 f->open_object_section("redirect_target");
5425 redirect_target.dump(f);
5426 f->close_section();
5427 } else if (type == TYPE_CHUNKED) {
5428 f->open_array_section("chunk_map");
5429 for (auto& p : chunk_map) {
5430 f->open_object_section("chunk");
5431 f->dump_unsigned("offset", p.first);
5432 p.second.dump(f);
5433 f->close_section();
5434 }
5435 f->close_section();
5436 }
31f18b77
FG
5437}
5438
5439void object_manifest_t::generate_test_instances(list<object_manifest_t*>& o)
5440{
5441 o.push_back(new object_manifest_t());
5442 o.back()->type = TYPE_REDIRECT;
5443}
5444
5445ostream& operator<<(ostream& out, const object_manifest_t& om)
5446{
11fdf7f2
TL
5447 out << "manifest(" << om.get_type_name();
5448 if (om.is_redirect()) {
5449 out << " " << om.redirect_target;
5450 } else if (om.is_chunked()) {
5451 out << " " << om.chunk_map;
5452 }
5453 out << ")";
5454 return out;
31f18b77 5455}
7c673cae
FG
5456
5457// -- object_info_t --
5458
5459void object_info_t::copy_user_bits(const object_info_t& other)
5460{
5461 // these bits are copied from head->clone.
5462 size = other.size;
5463 mtime = other.mtime;
5464 local_mtime = other.local_mtime;
5465 last_reqid = other.last_reqid;
5466 truncate_seq = other.truncate_seq;
5467 truncate_size = other.truncate_size;
5468 flags = other.flags;
5469 user_version = other.user_version;
5470 data_digest = other.data_digest;
5471 omap_digest = other.omap_digest;
5472}
5473
7c673cae
FG
5474void object_info_t::encode(bufferlist& bl, uint64_t features) const
5475{
5476 object_locator_t myoloc(soid);
5477 map<entity_name_t, watch_info_t> old_watchers;
5478 for (map<pair<uint64_t, entity_name_t>, watch_info_t>::const_iterator i =
5479 watchers.begin();
5480 i != watchers.end();
5481 ++i) {
5482 old_watchers.insert(make_pair(i->first.second, i->second));
5483 }
31f18b77 5484 ENCODE_START(17, 8, bl);
11fdf7f2
TL
5485 encode(soid, bl);
5486 encode(myoloc, bl); //Retained for compatibility
5487 encode((__u32)0, bl); // was category, no longer used
5488 encode(version, bl);
5489 encode(prior_version, bl);
5490 encode(last_reqid, bl);
5491 encode(size, bl);
5492 encode(mtime, bl);
7c673cae 5493 if (soid.snap == CEPH_NOSNAP)
11fdf7f2 5494 encode(osd_reqid_t(), bl); // used to be wrlock_by
7c673cae 5495 else
11fdf7f2
TL
5496 encode((uint32_t)0, bl); // was legacy_snaps
5497 encode(truncate_seq, bl);
5498 encode(truncate_size, bl);
5499 encode(is_lost(), bl);
5500 encode(old_watchers, bl, features);
7c673cae
FG
5501 /* shenanigans to avoid breaking backwards compatibility in the disk format.
5502 * When we can, switch this out for simply putting the version_t on disk. */
5503 eversion_t user_eversion(0, user_version);
11fdf7f2
TL
5504 encode(user_eversion, bl);
5505 encode(test_flag(FLAG_USES_TMAP), bl);
5506 encode(watchers, bl, features);
7c673cae 5507 __u32 _flags = flags;
11fdf7f2
TL
5508 encode(_flags, bl);
5509 encode(local_mtime, bl);
5510 encode(data_digest, bl);
5511 encode(omap_digest, bl);
5512 encode(expected_object_size, bl);
5513 encode(expected_write_size, bl);
5514 encode(alloc_hint_flags, bl);
31f18b77 5515 if (has_manifest()) {
11fdf7f2 5516 encode(manifest, bl);
31f18b77 5517 }
7c673cae
FG
5518 ENCODE_FINISH(bl);
5519}
5520
11fdf7f2 5521void object_info_t::decode(bufferlist::const_iterator& bl)
7c673cae
FG
5522{
5523 object_locator_t myoloc;
31f18b77 5524 DECODE_START_LEGACY_COMPAT_LEN(17, 8, 8, bl);
7c673cae 5525 map<entity_name_t, watch_info_t> old_watchers;
11fdf7f2
TL
5526 decode(soid, bl);
5527 decode(myoloc, bl);
7c673cae
FG
5528 {
5529 string category;
11fdf7f2 5530 decode(category, bl); // no longer used
7c673cae 5531 }
11fdf7f2
TL
5532 decode(version, bl);
5533 decode(prior_version, bl);
5534 decode(last_reqid, bl);
5535 decode(size, bl);
5536 decode(mtime, bl);
7c673cae
FG
5537 if (soid.snap == CEPH_NOSNAP) {
5538 osd_reqid_t wrlock_by;
11fdf7f2 5539 decode(wrlock_by, bl);
7c673cae 5540 } else {
11fdf7f2
TL
5541 vector<snapid_t> legacy_snaps;
5542 decode(legacy_snaps, bl);
7c673cae 5543 }
11fdf7f2
TL
5544 decode(truncate_seq, bl);
5545 decode(truncate_size, bl);
7c673cae
FG
5546
5547 // if this is struct_v >= 13, we will overwrite this
5548 // below since this field is just here for backwards
5549 // compatibility
5550 __u8 lo;
11fdf7f2 5551 decode(lo, bl);
7c673cae
FG
5552 flags = (flag_t)lo;
5553
11fdf7f2 5554 decode(old_watchers, bl);
7c673cae 5555 eversion_t user_eversion;
11fdf7f2 5556 decode(user_eversion, bl);
7c673cae
FG
5557 user_version = user_eversion.version;
5558
5559 if (struct_v >= 9) {
5560 bool uses_tmap = false;
11fdf7f2 5561 decode(uses_tmap, bl);
7c673cae
FG
5562 if (uses_tmap)
5563 set_flag(FLAG_USES_TMAP);
5564 } else {
5565 set_flag(FLAG_USES_TMAP);
5566 }
5567 if (struct_v < 10)
5568 soid.pool = myoloc.pool;
5569 if (struct_v >= 11) {
11fdf7f2 5570 decode(watchers, bl);
7c673cae
FG
5571 } else {
5572 for (map<entity_name_t, watch_info_t>::iterator i = old_watchers.begin();
5573 i != old_watchers.end();
5574 ++i) {
5575 watchers.insert(
5576 make_pair(
5577 make_pair(i->second.cookie, i->first), i->second));
5578 }
5579 }
5580 if (struct_v >= 13) {
5581 __u32 _flags;
11fdf7f2 5582 decode(_flags, bl);
7c673cae
FG
5583 flags = (flag_t)_flags;
5584 }
5585 if (struct_v >= 14) {
11fdf7f2 5586 decode(local_mtime, bl);
7c673cae
FG
5587 } else {
5588 local_mtime = utime_t();
5589 }
5590 if (struct_v >= 15) {
11fdf7f2
TL
5591 decode(data_digest, bl);
5592 decode(omap_digest, bl);
7c673cae
FG
5593 } else {
5594 data_digest = omap_digest = -1;
5595 clear_flag(FLAG_DATA_DIGEST);
5596 clear_flag(FLAG_OMAP_DIGEST);
5597 }
5598 if (struct_v >= 16) {
11fdf7f2
TL
5599 decode(expected_object_size, bl);
5600 decode(expected_write_size, bl);
5601 decode(alloc_hint_flags, bl);
7c673cae
FG
5602 } else {
5603 expected_object_size = 0;
5604 expected_write_size = 0;
5605 alloc_hint_flags = 0;
5606 }
31f18b77
FG
5607 if (struct_v >= 17) {
5608 if (has_manifest()) {
11fdf7f2 5609 decode(manifest, bl);
31f18b77
FG
5610 }
5611 }
7c673cae
FG
5612 DECODE_FINISH(bl);
5613}
5614
5615void object_info_t::dump(Formatter *f) const
5616{
5617 f->open_object_section("oid");
5618 soid.dump(f);
5619 f->close_section();
5620 f->dump_stream("version") << version;
5621 f->dump_stream("prior_version") << prior_version;
5622 f->dump_stream("last_reqid") << last_reqid;
5623 f->dump_unsigned("user_version", user_version);
5624 f->dump_unsigned("size", size);
5625 f->dump_stream("mtime") << mtime;
5626 f->dump_stream("local_mtime") << local_mtime;
5627 f->dump_unsigned("lost", (int)is_lost());
94b18763
FG
5628 vector<string> sv = get_flag_vector(flags);
5629 f->open_array_section("flags");
5630 for (auto str: sv)
5631 f->dump_string("flags", str);
5632 f->close_section();
7c673cae
FG
5633 f->dump_unsigned("truncate_seq", truncate_seq);
5634 f->dump_unsigned("truncate_size", truncate_size);
94b18763
FG
5635 f->dump_format("data_digest", "0x%08x", data_digest);
5636 f->dump_format("omap_digest", "0x%08x", omap_digest);
7c673cae
FG
5637 f->dump_unsigned("expected_object_size", expected_object_size);
5638 f->dump_unsigned("expected_write_size", expected_write_size);
5639 f->dump_unsigned("alloc_hint_flags", alloc_hint_flags);
31f18b77 5640 f->dump_object("manifest", manifest);
7c673cae
FG
5641 f->open_object_section("watchers");
5642 for (map<pair<uint64_t, entity_name_t>,watch_info_t>::const_iterator p =
5643 watchers.begin(); p != watchers.end(); ++p) {
5644 stringstream ss;
5645 ss << p->first.second;
5646 f->open_object_section(ss.str().c_str());
5647 p->second.dump(f);
5648 f->close_section();
5649 }
5650 f->close_section();
5651}
5652
5653void object_info_t::generate_test_instances(list<object_info_t*>& o)
5654{
5655 o.push_back(new object_info_t());
5656
5657 // fixme
5658}
5659
5660
5661ostream& operator<<(ostream& out, const object_info_t& oi)
5662{
5663 out << oi.soid << "(" << oi.version
5664 << " " << oi.last_reqid;
7c673cae
FG
5665 if (oi.flags)
5666 out << " " << oi.get_flag_string();
5667 out << " s " << oi.size;
5668 out << " uv " << oi.user_version;
5669 if (oi.is_data_digest())
5670 out << " dd " << std::hex << oi.data_digest << std::dec;
5671 if (oi.is_omap_digest())
5672 out << " od " << std::hex << oi.omap_digest << std::dec;
5673 out << " alloc_hint [" << oi.expected_object_size
5674 << " " << oi.expected_write_size
5675 << " " << oi.alloc_hint_flags << "]";
31f18b77
FG
5676 if (oi.has_manifest())
5677 out << " " << oi.manifest;
7c673cae
FG
5678 out << ")";
5679 return out;
5680}
5681
5682// -- ObjectRecovery --
5683void ObjectRecoveryProgress::encode(bufferlist &bl) const
5684{
5685 ENCODE_START(1, 1, bl);
11fdf7f2
TL
5686 encode(first, bl);
5687 encode(data_complete, bl);
5688 encode(data_recovered_to, bl);
5689 encode(omap_recovered_to, bl);
5690 encode(omap_complete, bl);
7c673cae
FG
5691 ENCODE_FINISH(bl);
5692}
5693
11fdf7f2 5694void ObjectRecoveryProgress::decode(bufferlist::const_iterator &bl)
7c673cae
FG
5695{
5696 DECODE_START(1, bl);
11fdf7f2
TL
5697 decode(first, bl);
5698 decode(data_complete, bl);
5699 decode(data_recovered_to, bl);
5700 decode(omap_recovered_to, bl);
5701 decode(omap_complete, bl);
7c673cae
FG
5702 DECODE_FINISH(bl);
5703}
5704
5705ostream &operator<<(ostream &out, const ObjectRecoveryProgress &prog)
5706{
5707 return prog.print(out);
5708}
5709
5710void ObjectRecoveryProgress::generate_test_instances(
5711 list<ObjectRecoveryProgress*>& o)
5712{
5713 o.push_back(new ObjectRecoveryProgress);
5714 o.back()->first = false;
5715 o.back()->data_complete = true;
5716 o.back()->omap_complete = true;
5717 o.back()->data_recovered_to = 100;
5718
5719 o.push_back(new ObjectRecoveryProgress);
5720 o.back()->first = true;
5721 o.back()->data_complete = false;
5722 o.back()->omap_complete = false;
5723 o.back()->data_recovered_to = 0;
5724}
5725
5726ostream &ObjectRecoveryProgress::print(ostream &out) const
5727{
5728 return out << "ObjectRecoveryProgress("
5729 << ( first ? "" : "!" ) << "first, "
5730 << "data_recovered_to:" << data_recovered_to
5731 << ", data_complete:" << ( data_complete ? "true" : "false" )
5732 << ", omap_recovered_to:" << omap_recovered_to
5733 << ", omap_complete:" << ( omap_complete ? "true" : "false" )
224ce89b 5734 << ", error:" << ( error ? "true" : "false" )
7c673cae
FG
5735 << ")";
5736}
5737
5738void ObjectRecoveryProgress::dump(Formatter *f) const
5739{
5740 f->dump_int("first?", first);
5741 f->dump_int("data_complete?", data_complete);
5742 f->dump_unsigned("data_recovered_to", data_recovered_to);
5743 f->dump_int("omap_complete?", omap_complete);
5744 f->dump_string("omap_recovered_to", omap_recovered_to);
5745}
5746
5747void ObjectRecoveryInfo::encode(bufferlist &bl, uint64_t features) const
5748{
5749 ENCODE_START(2, 1, bl);
11fdf7f2
TL
5750 encode(soid, bl);
5751 encode(version, bl);
5752 encode(size, bl);
5753 encode(oi, bl, features);
5754 encode(ss, bl);
5755 encode(copy_subset, bl);
5756 encode(clone_subset, bl);
7c673cae
FG
5757 ENCODE_FINISH(bl);
5758}
5759
11fdf7f2 5760void ObjectRecoveryInfo::decode(bufferlist::const_iterator &bl,
7c673cae
FG
5761 int64_t pool)
5762{
5763 DECODE_START(2, bl);
11fdf7f2
TL
5764 decode(soid, bl);
5765 decode(version, bl);
5766 decode(size, bl);
5767 decode(oi, bl);
5768 decode(ss, bl);
5769 decode(copy_subset, bl);
5770 decode(clone_subset, bl);
7c673cae
FG
5771 DECODE_FINISH(bl);
5772
5773 if (struct_v < 2) {
5774 if (!soid.is_max() && soid.pool == -1)
5775 soid.pool = pool;
5776 map<hobject_t, interval_set<uint64_t>> tmp;
5777 tmp.swap(clone_subset);
5778 for (map<hobject_t, interval_set<uint64_t>>::iterator i = tmp.begin();
5779 i != tmp.end();
5780 ++i) {
5781 hobject_t first(i->first);
5782 if (!first.is_max() && first.pool == -1)
5783 first.pool = pool;
5784 clone_subset[first].swap(i->second);
5785 }
5786 }
5787}
5788
5789void ObjectRecoveryInfo::generate_test_instances(
5790 list<ObjectRecoveryInfo*>& o)
5791{
5792 o.push_back(new ObjectRecoveryInfo);
5793 o.back()->soid = hobject_t(sobject_t("key", CEPH_NOSNAP));
5794 o.back()->version = eversion_t(0,0);
5795 o.back()->size = 100;
5796}
5797
5798
5799void ObjectRecoveryInfo::dump(Formatter *f) const
5800{
5801 f->dump_stream("object") << soid;
5802 f->dump_stream("at_version") << version;
5803 f->dump_stream("size") << size;
5804 {
5805 f->open_object_section("object_info");
5806 oi.dump(f);
5807 f->close_section();
5808 }
5809 {
5810 f->open_object_section("snapset");
5811 ss.dump(f);
5812 f->close_section();
5813 }
5814 f->dump_stream("copy_subset") << copy_subset;
5815 f->dump_stream("clone_subset") << clone_subset;
5816}
5817
5818ostream& operator<<(ostream& out, const ObjectRecoveryInfo &inf)
5819{
5820 return inf.print(out);
5821}
5822
5823ostream &ObjectRecoveryInfo::print(ostream &out) const
5824{
5825 return out << "ObjectRecoveryInfo("
5826 << soid << "@" << version
5827 << ", size: " << size
5828 << ", copy_subset: " << copy_subset
5829 << ", clone_subset: " << clone_subset
5830 << ", snapset: " << ss
5831 << ")";
5832}
5833
5834// -- PushReplyOp --
5835void PushReplyOp::generate_test_instances(list<PushReplyOp*> &o)
5836{
5837 o.push_back(new PushReplyOp);
5838 o.push_back(new PushReplyOp);
5839 o.back()->soid = hobject_t(sobject_t("asdf", 2));
5840 o.push_back(new PushReplyOp);
5841 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
5842}
5843
5844void PushReplyOp::encode(bufferlist &bl) const
5845{
5846 ENCODE_START(1, 1, bl);
11fdf7f2 5847 encode(soid, bl);
7c673cae
FG
5848 ENCODE_FINISH(bl);
5849}
5850
11fdf7f2 5851void PushReplyOp::decode(bufferlist::const_iterator &bl)
7c673cae
FG
5852{
5853 DECODE_START(1, bl);
11fdf7f2 5854 decode(soid, bl);
7c673cae
FG
5855 DECODE_FINISH(bl);
5856}
5857
5858void PushReplyOp::dump(Formatter *f) const
5859{
5860 f->dump_stream("soid") << soid;
5861}
5862
5863ostream &PushReplyOp::print(ostream &out) const
5864{
5865 return out
5866 << "PushReplyOp(" << soid
5867 << ")";
5868}
5869
5870ostream& operator<<(ostream& out, const PushReplyOp &op)
5871{
5872 return op.print(out);
5873}
5874
5875uint64_t PushReplyOp::cost(CephContext *cct) const
5876{
5877
5878 return cct->_conf->osd_push_per_object_cost +
5879 cct->_conf->osd_recovery_max_chunk;
5880}
5881
5882// -- PullOp --
5883void PullOp::generate_test_instances(list<PullOp*> &o)
5884{
5885 o.push_back(new PullOp);
5886 o.push_back(new PullOp);
5887 o.back()->soid = hobject_t(sobject_t("asdf", 2));
5888 o.back()->recovery_info.version = eversion_t(3, 10);
5889 o.push_back(new PullOp);
5890 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
5891 o.back()->recovery_info.version = eversion_t(0, 0);
5892}
5893
5894void PullOp::encode(bufferlist &bl, uint64_t features) const
5895{
5896 ENCODE_START(1, 1, bl);
11fdf7f2
TL
5897 encode(soid, bl);
5898 encode(recovery_info, bl, features);
5899 encode(recovery_progress, bl);
7c673cae
FG
5900 ENCODE_FINISH(bl);
5901}
5902
11fdf7f2 5903void PullOp::decode(bufferlist::const_iterator &bl)
7c673cae
FG
5904{
5905 DECODE_START(1, bl);
11fdf7f2
TL
5906 decode(soid, bl);
5907 decode(recovery_info, bl);
5908 decode(recovery_progress, bl);
7c673cae
FG
5909 DECODE_FINISH(bl);
5910}
5911
5912void PullOp::dump(Formatter *f) const
5913{
5914 f->dump_stream("soid") << soid;
5915 {
5916 f->open_object_section("recovery_info");
5917 recovery_info.dump(f);
5918 f->close_section();
5919 }
5920 {
5921 f->open_object_section("recovery_progress");
5922 recovery_progress.dump(f);
5923 f->close_section();
5924 }
5925}
5926
5927ostream &PullOp::print(ostream &out) const
5928{
5929 return out
5930 << "PullOp(" << soid
5931 << ", recovery_info: " << recovery_info
5932 << ", recovery_progress: " << recovery_progress
5933 << ")";
5934}
5935
5936ostream& operator<<(ostream& out, const PullOp &op)
5937{
5938 return op.print(out);
5939}
5940
5941uint64_t PullOp::cost(CephContext *cct) const
5942{
5943 return cct->_conf->osd_push_per_object_cost +
5944 cct->_conf->osd_recovery_max_chunk;
5945}
5946
5947// -- PushOp --
5948void PushOp::generate_test_instances(list<PushOp*> &o)
5949{
5950 o.push_back(new PushOp);
5951 o.push_back(new PushOp);
5952 o.back()->soid = hobject_t(sobject_t("asdf", 2));
5953 o.back()->version = eversion_t(3, 10);
5954 o.push_back(new PushOp);
5955 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
5956 o.back()->version = eversion_t(0, 0);
5957}
5958
5959void PushOp::encode(bufferlist &bl, uint64_t features) const
5960{
5961 ENCODE_START(1, 1, bl);
11fdf7f2
TL
5962 encode(soid, bl);
5963 encode(version, bl);
5964 encode(data, bl);
5965 encode(data_included, bl);
5966 encode(omap_header, bl);
5967 encode(omap_entries, bl);
5968 encode(attrset, bl);
5969 encode(recovery_info, bl, features);
5970 encode(after_progress, bl);
5971 encode(before_progress, bl);
7c673cae
FG
5972 ENCODE_FINISH(bl);
5973}
5974
11fdf7f2 5975void PushOp::decode(bufferlist::const_iterator &bl)
7c673cae
FG
5976{
5977 DECODE_START(1, bl);
11fdf7f2
TL
5978 decode(soid, bl);
5979 decode(version, bl);
5980 decode(data, bl);
5981 decode(data_included, bl);
5982 decode(omap_header, bl);
5983 decode(omap_entries, bl);
5984 decode(attrset, bl);
5985 decode(recovery_info, bl);
5986 decode(after_progress, bl);
5987 decode(before_progress, bl);
7c673cae
FG
5988 DECODE_FINISH(bl);
5989}
5990
5991void PushOp::dump(Formatter *f) const
5992{
5993 f->dump_stream("soid") << soid;
5994 f->dump_stream("version") << version;
5995 f->dump_int("data_len", data.length());
5996 f->dump_stream("data_included") << data_included;
5997 f->dump_int("omap_header_len", omap_header.length());
5998 f->dump_int("omap_entries_len", omap_entries.size());
5999 f->dump_int("attrset_len", attrset.size());
6000 {
6001 f->open_object_section("recovery_info");
6002 recovery_info.dump(f);
6003 f->close_section();
6004 }
6005 {
6006 f->open_object_section("after_progress");
6007 after_progress.dump(f);
6008 f->close_section();
6009 }
6010 {
6011 f->open_object_section("before_progress");
6012 before_progress.dump(f);
6013 f->close_section();
6014 }
6015}
6016
6017ostream &PushOp::print(ostream &out) const
6018{
6019 return out
6020 << "PushOp(" << soid
6021 << ", version: " << version
6022 << ", data_included: " << data_included
6023 << ", data_size: " << data.length()
6024 << ", omap_header_size: " << omap_header.length()
6025 << ", omap_entries_size: " << omap_entries.size()
6026 << ", attrset_size: " << attrset.size()
6027 << ", recovery_info: " << recovery_info
6028 << ", after_progress: " << after_progress
6029 << ", before_progress: " << before_progress
6030 << ")";
6031}
6032
6033ostream& operator<<(ostream& out, const PushOp &op)
6034{
6035 return op.print(out);
6036}
6037
6038uint64_t PushOp::cost(CephContext *cct) const
6039{
6040 uint64_t cost = data_included.size();
6041 for (map<string, bufferlist>::const_iterator i =
6042 omap_entries.begin();
6043 i != omap_entries.end();
6044 ++i) {
6045 cost += i->second.length();
6046 }
6047 cost += cct->_conf->osd_push_per_object_cost;
6048 return cost;
6049}
6050
6051// -- ScrubMap --
6052
6053void ScrubMap::merge_incr(const ScrubMap &l)
6054{
11fdf7f2 6055 ceph_assert(valid_through == l.incr_since);
7c673cae
FG
6056 valid_through = l.valid_through;
6057
6058 for (map<hobject_t,object>::const_iterator p = l.objects.begin();
6059 p != l.objects.end();
6060 ++p){
6061 if (p->second.negative) {
6062 map<hobject_t,object>::iterator q = objects.find(p->first);
6063 if (q != objects.end()) {
6064 objects.erase(q);
6065 }
6066 } else {
6067 objects[p->first] = p->second;
6068 }
6069 }
6070}
6071
6072void ScrubMap::encode(bufferlist& bl) const
6073{
6074 ENCODE_START(3, 2, bl);
11fdf7f2
TL
6075 encode(objects, bl);
6076 encode((__u32)0, bl); // used to be attrs; now deprecated
7c673cae 6077 bufferlist old_logbl; // not used
11fdf7f2
TL
6078 encode(old_logbl, bl);
6079 encode(valid_through, bl);
6080 encode(incr_since, bl);
7c673cae
FG
6081 ENCODE_FINISH(bl);
6082}
6083
11fdf7f2 6084void ScrubMap::decode(bufferlist::const_iterator& bl, int64_t pool)
7c673cae
FG
6085{
6086 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
11fdf7f2 6087 decode(objects, bl);
7c673cae
FG
6088 {
6089 map<string,string> attrs; // deprecated
11fdf7f2 6090 decode(attrs, bl);
7c673cae
FG
6091 }
6092 bufferlist old_logbl; // not used
11fdf7f2
TL
6093 decode(old_logbl, bl);
6094 decode(valid_through, bl);
6095 decode(incr_since, bl);
7c673cae
FG
6096 DECODE_FINISH(bl);
6097
6098 // handle hobject_t upgrade
6099 if (struct_v < 3) {
6100 map<hobject_t, object> tmp;
6101 tmp.swap(objects);
6102 for (map<hobject_t, object>::iterator i = tmp.begin();
6103 i != tmp.end();
6104 ++i) {
6105 hobject_t first(i->first);
6106 if (!first.is_max() && first.pool == -1)
6107 first.pool = pool;
6108 objects[first] = i->second;
6109 }
6110 }
6111}
6112
6113void ScrubMap::dump(Formatter *f) const
6114{
6115 f->dump_stream("valid_through") << valid_through;
6116 f->dump_stream("incremental_since") << incr_since;
6117 f->open_array_section("objects");
6118 for (map<hobject_t,object>::const_iterator p = objects.begin(); p != objects.end(); ++p) {
6119 f->open_object_section("object");
6120 f->dump_string("name", p->first.oid.name);
6121 f->dump_unsigned("hash", p->first.get_hash());
6122 f->dump_string("key", p->first.get_key());
6123 f->dump_int("snapid", p->first.snap);
6124 p->second.dump(f);
6125 f->close_section();
6126 }
6127 f->close_section();
6128}
6129
6130void ScrubMap::generate_test_instances(list<ScrubMap*>& o)
6131{
6132 o.push_back(new ScrubMap);
6133 o.push_back(new ScrubMap);
6134 o.back()->valid_through = eversion_t(1, 2);
6135 o.back()->incr_since = eversion_t(3, 4);
6136 list<object*> obj;
6137 object::generate_test_instances(obj);
6138 o.back()->objects[hobject_t(object_t("foo"), "fookey", 123, 456, 0, "")] = *obj.back();
6139 obj.pop_back();
6140 o.back()->objects[hobject_t(object_t("bar"), string(), 123, 456, 0, "")] = *obj.back();
6141}
6142
6143// -- ScrubMap::object --
6144
6145void ScrubMap::object::encode(bufferlist& bl) const
6146{
6147 bool compat_read_error = read_error || ec_hash_mismatch || ec_size_mismatch;
11fdf7f2
TL
6148 ENCODE_START(10, 7, bl);
6149 encode(size, bl);
6150 encode(negative, bl);
6151 encode(attrs, bl);
6152 encode(digest, bl);
6153 encode(digest_present, bl);
6154 encode((uint32_t)0, bl); // obsolete nlinks
6155 encode((uint32_t)0, bl); // snapcolls
6156 encode(omap_digest, bl);
6157 encode(omap_digest_present, bl);
6158 encode(compat_read_error, bl);
6159 encode(stat_error, bl);
6160 encode(read_error, bl);
6161 encode(ec_hash_mismatch, bl);
6162 encode(ec_size_mismatch, bl);
6163 encode(large_omap_object_found, bl);
6164 encode(large_omap_object_key_count, bl);
6165 encode(large_omap_object_value_size, bl);
6166 encode(object_omap_bytes, bl);
6167 encode(object_omap_keys, bl);
7c673cae
FG
6168 ENCODE_FINISH(bl);
6169}
6170
11fdf7f2 6171void ScrubMap::object::decode(bufferlist::const_iterator& bl)
7c673cae 6172{
11fdf7f2
TL
6173 DECODE_START(10, bl);
6174 decode(size, bl);
7c673cae 6175 bool tmp, compat_read_error = false;
11fdf7f2 6176 decode(tmp, bl);
7c673cae 6177 negative = tmp;
11fdf7f2
TL
6178 decode(attrs, bl);
6179 decode(digest, bl);
6180 decode(tmp, bl);
7c673cae
FG
6181 digest_present = tmp;
6182 {
6183 uint32_t nlinks;
11fdf7f2 6184 decode(nlinks, bl);
7c673cae 6185 set<snapid_t> snapcolls;
11fdf7f2 6186 decode(snapcolls, bl);
7c673cae 6187 }
11fdf7f2
TL
6188 decode(omap_digest, bl);
6189 decode(tmp, bl);
7c673cae 6190 omap_digest_present = tmp;
11fdf7f2
TL
6191 decode(compat_read_error, bl);
6192 decode(tmp, bl);
7c673cae
FG
6193 stat_error = tmp;
6194 if (struct_v >= 8) {
11fdf7f2 6195 decode(tmp, bl);
7c673cae 6196 read_error = tmp;
11fdf7f2 6197 decode(tmp, bl);
7c673cae 6198 ec_hash_mismatch = tmp;
11fdf7f2 6199 decode(tmp, bl);
7c673cae
FG
6200 ec_size_mismatch = tmp;
6201 }
6202 // If older encoder found a read_error, set read_error
6203 if (compat_read_error && !read_error && !ec_hash_mismatch && !ec_size_mismatch)
6204 read_error = true;
28e407b8 6205 if (struct_v >= 9) {
11fdf7f2 6206 decode(tmp, bl);
28e407b8 6207 large_omap_object_found = tmp;
11fdf7f2
TL
6208 decode(large_omap_object_key_count, bl);
6209 decode(large_omap_object_value_size, bl);
6210 }
6211 if (struct_v >= 10) {
6212 decode(object_omap_bytes, bl);
6213 decode(object_omap_keys, bl);
28e407b8 6214 }
7c673cae
FG
6215 DECODE_FINISH(bl);
6216}
6217
6218void ScrubMap::object::dump(Formatter *f) const
6219{
6220 f->dump_int("size", size);
6221 f->dump_int("negative", negative);
6222 f->open_array_section("attrs");
6223 for (map<string,bufferptr>::const_iterator p = attrs.begin(); p != attrs.end(); ++p) {
6224 f->open_object_section("attr");
6225 f->dump_string("name", p->first);
6226 f->dump_int("length", p->second.length());
6227 f->close_section();
6228 }
6229 f->close_section();
6230}
6231
6232void ScrubMap::object::generate_test_instances(list<object*>& o)
6233{
6234 o.push_back(new object);
6235 o.push_back(new object);
6236 o.back()->negative = true;
6237 o.push_back(new object);
6238 o.back()->size = 123;
6239 o.back()->attrs["foo"] = buffer::copy("foo", 3);
6240 o.back()->attrs["bar"] = buffer::copy("barval", 6);
6241}
6242
6243// -- OSDOp --
6244
6245ostream& operator<<(ostream& out, const OSDOp& op)
6246{
6247 out << ceph_osd_op_name(op.op.op);
6248 if (ceph_osd_op_type_data(op.op.op)) {
6249 // data extent
6250 switch (op.op.op) {
6251 case CEPH_OSD_OP_ASSERT_VER:
6252 out << " v" << op.op.assert_ver.ver;
6253 break;
6254 case CEPH_OSD_OP_TRUNCATE:
6255 out << " " << op.op.extent.offset;
6256 break;
6257 case CEPH_OSD_OP_MASKTRUNC:
6258 case CEPH_OSD_OP_TRIMTRUNC:
6259 out << " " << op.op.extent.truncate_seq << "@"
6260 << (int64_t)op.op.extent.truncate_size;
6261 break;
6262 case CEPH_OSD_OP_ROLLBACK:
6263 out << " " << snapid_t(op.op.snap.snapid);
6264 break;
6265 case CEPH_OSD_OP_WATCH:
6266 out << " " << ceph_osd_watch_op_name(op.op.watch.op)
6267 << " cookie " << op.op.watch.cookie;
6268 if (op.op.watch.gen)
6269 out << " gen " << op.op.watch.gen;
6270 break;
6271 case CEPH_OSD_OP_NOTIFY:
7c673cae
FG
6272 out << " cookie " << op.op.notify.cookie;
6273 break;
6274 case CEPH_OSD_OP_COPY_GET:
6275 out << " max " << op.op.copy_get.max;
6276 break;
6277 case CEPH_OSD_OP_COPY_FROM:
6278 out << " ver " << op.op.copy_from.src_version;
6279 break;
6280 case CEPH_OSD_OP_SETALLOCHINT:
6281 out << " object_size " << op.op.alloc_hint.expected_object_size
6282 << " write_size " << op.op.alloc_hint.expected_write_size;
6283 break;
6284 case CEPH_OSD_OP_READ:
6285 case CEPH_OSD_OP_SPARSE_READ:
6286 case CEPH_OSD_OP_SYNC_READ:
6287 case CEPH_OSD_OP_WRITE:
6288 case CEPH_OSD_OP_WRITEFULL:
6289 case CEPH_OSD_OP_ZERO:
6290 case CEPH_OSD_OP_APPEND:
6291 case CEPH_OSD_OP_MAPEXT:
11fdf7f2 6292 case CEPH_OSD_OP_CMPEXT:
7c673cae
FG
6293 out << " " << op.op.extent.offset << "~" << op.op.extent.length;
6294 if (op.op.extent.truncate_seq)
6295 out << " [" << op.op.extent.truncate_seq << "@"
6296 << (int64_t)op.op.extent.truncate_size << "]";
6297 if (op.op.flags)
6298 out << " [" << ceph_osd_op_flag_string(op.op.flags) << "]";
6299 default:
6300 // don't show any arg info
6301 break;
6302 }
6303 } else if (ceph_osd_op_type_attr(op.op.op)) {
6304 // xattr name
6305 if (op.op.xattr.name_len && op.indata.length()) {
6306 out << " ";
6307 op.indata.write(0, op.op.xattr.name_len, out);
6308 }
6309 if (op.op.xattr.value_len)
6310 out << " (" << op.op.xattr.value_len << ")";
6311 if (op.op.op == CEPH_OSD_OP_CMPXATTR)
6312 out << " op " << (int)op.op.xattr.cmp_op
6313 << " mode " << (int)op.op.xattr.cmp_mode;
6314 } else if (ceph_osd_op_type_exec(op.op.op)) {
6315 // class.method
6316 if (op.op.cls.class_len && op.indata.length()) {
6317 out << " ";
6318 op.indata.write(0, op.op.cls.class_len, out);
6319 out << ".";
6320 op.indata.write(op.op.cls.class_len, op.op.cls.method_len, out);
6321 }
6322 } else if (ceph_osd_op_type_pg(op.op.op)) {
6323 switch (op.op.op) {
6324 case CEPH_OSD_OP_PGLS:
6325 case CEPH_OSD_OP_PGLS_FILTER:
6326 case CEPH_OSD_OP_PGNLS:
6327 case CEPH_OSD_OP_PGNLS_FILTER:
6328 out << " start_epoch " << op.op.pgls.start_epoch;
6329 break;
6330 case CEPH_OSD_OP_PG_HITSET_LS:
6331 break;
6332 case CEPH_OSD_OP_PG_HITSET_GET:
6333 out << " " << utime_t(op.op.hit_set_get.stamp);
6334 break;
6335 case CEPH_OSD_OP_SCRUBLS:
6336 break;
6337 }
6338 }
6339 return out;
6340}
6341
6342
6343void OSDOp::split_osd_op_vector_in_data(vector<OSDOp>& ops, bufferlist& in)
6344{
6345 bufferlist::iterator datap = in.begin();
6346 for (unsigned i = 0; i < ops.size(); i++) {
6347 if (ops[i].op.payload_len) {
6348 datap.copy(ops[i].op.payload_len, ops[i].indata);
6349 }
6350 }
6351}
6352
6353void OSDOp::merge_osd_op_vector_in_data(vector<OSDOp>& ops, bufferlist& out)
6354{
6355 for (unsigned i = 0; i < ops.size(); i++) {
6356 if (ops[i].indata.length()) {
6357 ops[i].op.payload_len = ops[i].indata.length();
6358 out.append(ops[i].indata);
6359 }
6360 }
6361}
6362
6363void OSDOp::split_osd_op_vector_out_data(vector<OSDOp>& ops, bufferlist& in)
6364{
6365 bufferlist::iterator datap = in.begin();
6366 for (unsigned i = 0; i < ops.size(); i++) {
6367 if (ops[i].op.payload_len) {
6368 datap.copy(ops[i].op.payload_len, ops[i].outdata);
6369 }
6370 }
6371}
6372
6373void OSDOp::merge_osd_op_vector_out_data(vector<OSDOp>& ops, bufferlist& out)
6374{
6375 for (unsigned i = 0; i < ops.size(); i++) {
6376 if (ops[i].outdata.length()) {
6377 ops[i].op.payload_len = ops[i].outdata.length();
6378 out.append(ops[i].outdata);
6379 }
6380 }
6381}
6382
224ce89b
WB
6383void OSDOp::clear_data(vector<OSDOp>& ops)
6384{
6385 for (unsigned i = 0; i < ops.size(); i++) {
6386 OSDOp& op = ops[i];
6387 op.outdata.clear();
6388 if (ceph_osd_op_type_attr(op.op.op) &&
6389 op.op.xattr.name_len &&
6390 op.indata.length() >= op.op.xattr.name_len) {
6391 bufferptr bp(op.op.xattr.name_len);
6392 bufferlist bl;
6393 bl.append(bp);
6394 bl.copy_in(0, op.op.xattr.name_len, op.indata);
6395 op.indata.claim(bl);
6396 } else if (ceph_osd_op_type_exec(op.op.op) &&
6397 op.op.cls.class_len &&
6398 op.indata.length() >
6399 (op.op.cls.class_len + op.op.cls.method_len)) {
6400 __u8 len = op.op.cls.class_len + op.op.cls.method_len;
6401 bufferptr bp(len);
6402 bufferlist bl;
6403 bl.append(bp);
6404 bl.copy_in(0, len, op.indata);
6405 op.indata.claim(bl);
6406 } else {
6407 op.indata.clear();
6408 }
6409 }
6410}