]> git.proxmox.com Git - ceph.git/blame - ceph/src/osd/osd_types.cc
update sources to v12.1.1
[ceph.git] / ceph / src / osd / osd_types.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2011 New Dream Network
7 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License version 2.1, as published by the Free Software
14 * Foundation. See file COPYING.
15 *
16 */
17
18#include <boost/assign/list_of.hpp>
19
20#include "osd_types.h"
21#include "include/ceph_features.h"
22extern "C" {
23#include "crush/hash.h"
24}
25#include "PG.h"
26#include "OSDMap.h"
27#include "PGBackend.h"
28
29const char *ceph_osd_flag_name(unsigned flag)
30{
31 switch (flag) {
32 case CEPH_OSD_FLAG_ACK: return "ack";
33 case CEPH_OSD_FLAG_ONNVRAM: return "onnvram";
34 case CEPH_OSD_FLAG_ONDISK: return "ondisk";
35 case CEPH_OSD_FLAG_RETRY: return "retry";
36 case CEPH_OSD_FLAG_READ: return "read";
37 case CEPH_OSD_FLAG_WRITE: return "write";
38 case CEPH_OSD_FLAG_ORDERSNAP: return "ordersnap";
39 case CEPH_OSD_FLAG_PEERSTAT_OLD: return "peerstat_old";
40 case CEPH_OSD_FLAG_BALANCE_READS: return "balance_reads";
41 case CEPH_OSD_FLAG_PARALLELEXEC: return "parallelexec";
42 case CEPH_OSD_FLAG_PGOP: return "pgop";
43 case CEPH_OSD_FLAG_EXEC: return "exec";
44 case CEPH_OSD_FLAG_EXEC_PUBLIC: return "exec_public";
45 case CEPH_OSD_FLAG_LOCALIZE_READS: return "localize_reads";
46 case CEPH_OSD_FLAG_RWORDERED: return "rwordered";
47 case CEPH_OSD_FLAG_IGNORE_CACHE: return "ignore_cache";
48 case CEPH_OSD_FLAG_SKIPRWLOCKS: return "skiprwlocks";
49 case CEPH_OSD_FLAG_IGNORE_OVERLAY: return "ignore_overlay";
50 case CEPH_OSD_FLAG_FLUSH: return "flush";
51 case CEPH_OSD_FLAG_MAP_SNAP_CLONE: return "map_snap_clone";
52 case CEPH_OSD_FLAG_ENFORCE_SNAPC: return "enforce_snapc";
53 case CEPH_OSD_FLAG_REDIRECTED: return "redirected";
54 case CEPH_OSD_FLAG_KNOWN_REDIR: return "known_if_redirected";
55 case CEPH_OSD_FLAG_FULL_TRY: return "full_try";
56 case CEPH_OSD_FLAG_FULL_FORCE: return "full_force";
224ce89b 57 case CEPH_OSD_FLAG_IGNORE_REDIRECT: return "ignore_redirect";
7c673cae
FG
58 default: return "???";
59 }
60}
61
62string ceph_osd_flag_string(unsigned flags)
63{
64 string s;
65 for (unsigned i=0; i<32; ++i) {
66 if (flags & (1u<<i)) {
67 if (s.length())
68 s += "+";
69 s += ceph_osd_flag_name(1u << i);
70 }
71 }
72 if (s.length())
73 return s;
74 return string("-");
75}
76
77const char * ceph_osd_op_flag_name(unsigned flag)
78{
79 const char *name;
80
81 switch(flag) {
82 case CEPH_OSD_OP_FLAG_EXCL:
83 name = "excl";
84 break;
85 case CEPH_OSD_OP_FLAG_FAILOK:
86 name = "failok";
87 break;
88 case CEPH_OSD_OP_FLAG_FADVISE_RANDOM:
89 name = "fadvise_random";
90 break;
91 case CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL:
92 name = "fadvise_sequential";
93 break;
94 case CEPH_OSD_OP_FLAG_FADVISE_WILLNEED:
95 name = "favise_willneed";
96 break;
97 case CEPH_OSD_OP_FLAG_FADVISE_DONTNEED:
98 name = "fadvise_dontneed";
99 break;
100 case CEPH_OSD_OP_FLAG_FADVISE_NOCACHE:
101 name = "fadvise_nocache";
102 break;
103 default:
104 name = "???";
105 };
106
107 return name;
108}
109
110string ceph_osd_op_flag_string(unsigned flags)
111{
112 string s;
113 for (unsigned i=0; i<32; ++i) {
114 if (flags & (1u<<i)) {
115 if (s.length())
116 s += "+";
117 s += ceph_osd_op_flag_name(1u << i);
118 }
119 }
120 if (s.length())
121 return s;
122 return string("-");
123}
124
125string ceph_osd_alloc_hint_flag_string(unsigned flags)
126{
127 string s;
128 for (unsigned i=0; i<32; ++i) {
129 if (flags & (1u<<i)) {
130 if (s.length())
131 s += "+";
132 s += ceph_osd_alloc_hint_flag_name(1u << i);
133 }
134 }
135 if (s.length())
136 return s;
137 return string("-");
138}
139
140void pg_shard_t::encode(bufferlist &bl) const
141{
142 ENCODE_START(1, 1, bl);
143 ::encode(osd, bl);
144 ::encode(shard, bl);
145 ENCODE_FINISH(bl);
146}
147void pg_shard_t::decode(bufferlist::iterator &bl)
148{
149 DECODE_START(1, bl);
150 ::decode(osd, bl);
151 ::decode(shard, bl);
152 DECODE_FINISH(bl);
153}
154
155ostream &operator<<(ostream &lhs, const pg_shard_t &rhs)
156{
157 if (rhs.is_undefined())
158 return lhs << "?";
159 if (rhs.shard == shard_id_t::NO_SHARD)
160 return lhs << rhs.osd;
161 return lhs << rhs.osd << '(' << (unsigned)(rhs.shard) << ')';
162}
163
164// -- osd_reqid_t --
165void osd_reqid_t::dump(Formatter *f) const
166{
167 f->dump_stream("name") << name;
168 f->dump_int("inc", inc);
169 f->dump_unsigned("tid", tid);
170}
171
172void osd_reqid_t::generate_test_instances(list<osd_reqid_t*>& o)
173{
174 o.push_back(new osd_reqid_t);
175 o.push_back(new osd_reqid_t(entity_name_t::CLIENT(123), 1, 45678));
176}
177
178// -- object_locator_t --
179
180void object_locator_t::encode(bufferlist& bl) const
181{
182 // verify that nobody's corrupted the locator
183 assert(hash == -1 || key.empty());
184 __u8 encode_compat = 3;
185 ENCODE_START(6, encode_compat, bl);
186 ::encode(pool, bl);
187 int32_t preferred = -1; // tell old code there is no preferred osd (-1).
188 ::encode(preferred, bl);
189 ::encode(key, bl);
190 ::encode(nspace, bl);
191 ::encode(hash, bl);
192 if (hash != -1)
193 encode_compat = MAX(encode_compat, 6); // need to interpret the hash
194 ENCODE_FINISH_NEW_COMPAT(bl, encode_compat);
195}
196
197void object_locator_t::decode(bufferlist::iterator& p)
198{
199 DECODE_START_LEGACY_COMPAT_LEN(6, 3, 3, p);
200 if (struct_v < 2) {
201 int32_t op;
202 ::decode(op, p);
203 pool = op;
204 int16_t pref;
205 ::decode(pref, p);
206 } else {
207 ::decode(pool, p);
208 int32_t preferred;
209 ::decode(preferred, p);
210 }
211 ::decode(key, p);
212 if (struct_v >= 5)
213 ::decode(nspace, p);
214 if (struct_v >= 6)
215 ::decode(hash, p);
216 else
217 hash = -1;
218 DECODE_FINISH(p);
219 // verify that nobody's corrupted the locator
220 assert(hash == -1 || key.empty());
221}
222
223void object_locator_t::dump(Formatter *f) const
224{
225 f->dump_int("pool", pool);
226 f->dump_string("key", key);
227 f->dump_string("namespace", nspace);
228 f->dump_int("hash", hash);
229}
230
231void object_locator_t::generate_test_instances(list<object_locator_t*>& o)
232{
233 o.push_back(new object_locator_t);
234 o.push_back(new object_locator_t(123));
235 o.push_back(new object_locator_t(123, 876));
236 o.push_back(new object_locator_t(1, "n2"));
237 o.push_back(new object_locator_t(1234, "", "key"));
238 o.push_back(new object_locator_t(12, "n1", "key2"));
239}
240
241// -- request_redirect_t --
242void request_redirect_t::encode(bufferlist& bl) const
243{
244 ENCODE_START(1, 1, bl);
245 ::encode(redirect_locator, bl);
246 ::encode(redirect_object, bl);
247 ::encode(osd_instructions, bl);
248 ENCODE_FINISH(bl);
249}
250
251void request_redirect_t::decode(bufferlist::iterator& bl)
252{
253 DECODE_START(1, bl);
254 ::decode(redirect_locator, bl);
255 ::decode(redirect_object, bl);
256 ::decode(osd_instructions, bl);
257 DECODE_FINISH(bl);
258}
259
260void request_redirect_t::dump(Formatter *f) const
261{
262 f->dump_string("object", redirect_object);
263 f->open_object_section("locator");
264 redirect_locator.dump(f);
265 f->close_section(); // locator
266}
267
268void request_redirect_t::generate_test_instances(list<request_redirect_t*>& o)
269{
270 object_locator_t loc(1, "redir_obj");
271 o.push_back(new request_redirect_t());
272 o.push_back(new request_redirect_t(loc, 0));
273 o.push_back(new request_redirect_t(loc, "redir_obj"));
274 o.push_back(new request_redirect_t(loc));
275}
276
277void objectstore_perf_stat_t::dump(Formatter *f) const
278{
279 f->dump_unsigned("commit_latency_ms", os_commit_latency);
280 f->dump_unsigned("apply_latency_ms", os_apply_latency);
281}
282
283void objectstore_perf_stat_t::encode(bufferlist &bl) const
284{
285 ENCODE_START(1, 1, bl);
286 ::encode(os_commit_latency, bl);
287 ::encode(os_apply_latency, bl);
288 ENCODE_FINISH(bl);
289}
290
291void objectstore_perf_stat_t::decode(bufferlist::iterator &bl)
292{
293 DECODE_START(1, bl);
294 ::decode(os_commit_latency, bl);
295 ::decode(os_apply_latency, bl);
296 DECODE_FINISH(bl);
297}
298
299void objectstore_perf_stat_t::generate_test_instances(std::list<objectstore_perf_stat_t*>& o)
300{
301 o.push_back(new objectstore_perf_stat_t());
302 o.push_back(new objectstore_perf_stat_t());
303 o.back()->os_commit_latency = 20;
304 o.back()->os_apply_latency = 30;
305}
306
307// -- osd_stat_t --
308void osd_stat_t::dump(Formatter *f) const
309{
31f18b77
FG
310 f->dump_unsigned("up_from", up_from);
311 f->dump_unsigned("seq", seq);
7c673cae
FG
312 f->dump_unsigned("kb", kb);
313 f->dump_unsigned("kb_used", kb_used);
314 f->dump_unsigned("kb_avail", kb_avail);
315 f->open_array_section("hb_peers");
316 for (auto p : hb_peers)
317 f->dump_int("osd", p);
318 f->close_section();
319 f->dump_int("snap_trim_queue_len", snap_trim_queue_len);
320 f->dump_int("num_snap_trimming", num_snap_trimming);
321 f->open_object_section("op_queue_age_hist");
322 op_queue_age_hist.dump(f);
323 f->close_section();
324 f->open_object_section("perf_stat");
325 os_perf_stat.dump(f);
326 f->close_section();
327}
328
329void osd_stat_t::encode(bufferlist &bl) const
330{
31f18b77 331 ENCODE_START(6, 2, bl);
7c673cae
FG
332 ::encode(kb, bl);
333 ::encode(kb_used, bl);
334 ::encode(kb_avail, bl);
335 ::encode(snap_trim_queue_len, bl);
336 ::encode(num_snap_trimming, bl);
337 ::encode(hb_peers, bl);
338 ::encode((uint32_t)0, bl);
339 ::encode(op_queue_age_hist, bl);
340 ::encode(os_perf_stat, bl);
31f18b77
FG
341 ::encode(up_from, bl);
342 ::encode(seq, bl);
7c673cae
FG
343 ENCODE_FINISH(bl);
344}
345
346void osd_stat_t::decode(bufferlist::iterator &bl)
347{
31f18b77 348 DECODE_START_LEGACY_COMPAT_LEN(6, 2, 2, bl);
7c673cae
FG
349 ::decode(kb, bl);
350 ::decode(kb_used, bl);
351 ::decode(kb_avail, bl);
352 ::decode(snap_trim_queue_len, bl);
353 ::decode(num_snap_trimming, bl);
354 ::decode(hb_peers, bl);
355 vector<int> num_hb_out;
356 ::decode(num_hb_out, bl);
357 if (struct_v >= 3)
358 ::decode(op_queue_age_hist, bl);
359 if (struct_v >= 4)
360 ::decode(os_perf_stat, bl);
31f18b77
FG
361 if (struct_v >= 6) {
362 ::decode(up_from, bl);
363 ::decode(seq, bl);
364 }
7c673cae
FG
365 DECODE_FINISH(bl);
366}
367
368void osd_stat_t::generate_test_instances(std::list<osd_stat_t*>& o)
369{
370 o.push_back(new osd_stat_t);
371
372 o.push_back(new osd_stat_t);
373 o.back()->kb = 1;
374 o.back()->kb_used = 2;
375 o.back()->kb_avail = 3;
376 o.back()->hb_peers.push_back(7);
377 o.back()->snap_trim_queue_len = 8;
378 o.back()->num_snap_trimming = 99;
379}
380
381// -- pg_t --
382
383int pg_t::print(char *o, int maxlen) const
384{
385 if (preferred() >= 0)
386 return snprintf(o, maxlen, "%llu.%xp%d", (unsigned long long)pool(), ps(), preferred());
387 else
388 return snprintf(o, maxlen, "%llu.%x", (unsigned long long)pool(), ps());
389}
390
391bool pg_t::parse(const char *s)
392{
393 uint64_t ppool;
394 uint32_t pseed;
395 int32_t pref;
396 int r = sscanf(s, "%llu.%xp%d", (long long unsigned *)&ppool, &pseed, &pref);
397 if (r < 2)
398 return false;
399 m_pool = ppool;
400 m_seed = pseed;
401 if (r == 3)
402 m_preferred = pref;
403 else
404 m_preferred = -1;
405 return true;
406}
407
408bool spg_t::parse(const char *s)
409{
410 pgid.set_preferred(-1);
411 shard = shard_id_t::NO_SHARD;
412 uint64_t ppool;
413 uint32_t pseed;
414 int32_t pref;
415 uint32_t pshard;
416 int r = sscanf(s, "%llu.%x", (long long unsigned *)&ppool, &pseed);
417 if (r < 2)
418 return false;
419 pgid.set_pool(ppool);
420 pgid.set_ps(pseed);
421
422 const char *p = strchr(s, 'p');
423 if (p) {
424 r = sscanf(p, "p%d", &pref);
425 if (r == 1) {
426 pgid.set_preferred(pref);
427 } else {
428 return false;
429 }
430 }
431
432 p = strchr(s, 's');
433 if (p) {
434 r = sscanf(p, "s%d", &pshard);
435 if (r == 1) {
436 shard = shard_id_t(pshard);
437 } else {
438 return false;
439 }
440 }
441 return true;
442}
443
444char *spg_t::calc_name(char *buf, const char *suffix_backwords) const
445{
446 while (*suffix_backwords)
447 *--buf = *suffix_backwords++;
448
449 if (!is_no_shard()) {
450 buf = ritoa<uint8_t, 10>((uint8_t)shard.id, buf);
451 *--buf = 's';
452 }
453
454 return pgid.calc_name(buf, "");
455}
456
457ostream& operator<<(ostream& out, const spg_t &pg)
458{
459 char buf[spg_t::calc_name_buf_size];
460 buf[spg_t::calc_name_buf_size - 1] = '\0';
461 out << pg.calc_name(buf + spg_t::calc_name_buf_size - 1, "");
462 return out;
463}
464
465pg_t pg_t::get_ancestor(unsigned old_pg_num) const
466{
467 int old_bits = cbits(old_pg_num);
468 int old_mask = (1 << old_bits) - 1;
469 pg_t ret = *this;
470 ret.m_seed = ceph_stable_mod(m_seed, old_pg_num, old_mask);
471 return ret;
472}
473
474bool pg_t::is_split(unsigned old_pg_num, unsigned new_pg_num, set<pg_t> *children) const
475{
476 assert(m_seed < old_pg_num);
477 if (new_pg_num <= old_pg_num)
478 return false;
479
480 bool split = false;
481 if (true) {
482 unsigned old_bits = cbits(old_pg_num);
483 unsigned old_mask = (1 << old_bits) - 1;
484 for (unsigned n = 1; ; n++) {
485 unsigned next_bit = (n << (old_bits-1));
486 unsigned s = next_bit | m_seed;
487
488 if (s < old_pg_num || s == m_seed)
489 continue;
490 if (s >= new_pg_num)
491 break;
492 if ((unsigned)ceph_stable_mod(s, old_pg_num, old_mask) == m_seed) {
493 split = true;
494 if (children)
495 children->insert(pg_t(s, m_pool, m_preferred));
496 }
497 }
498 }
499 if (false) {
500 // brute force
501 int old_bits = cbits(old_pg_num);
502 int old_mask = (1 << old_bits) - 1;
503 for (unsigned x = old_pg_num; x < new_pg_num; ++x) {
504 unsigned o = ceph_stable_mod(x, old_pg_num, old_mask);
505 if (o == m_seed) {
506 split = true;
507 children->insert(pg_t(x, m_pool, m_preferred));
508 }
509 }
510 }
511 return split;
512}
513
514unsigned pg_t::get_split_bits(unsigned pg_num) const {
515 if (pg_num == 1)
516 return 0;
517 assert(pg_num > 1);
518
519 // Find unique p such that pg_num \in [2^(p-1), 2^p)
520 unsigned p = cbits(pg_num);
521 assert(p); // silence coverity #751330
522
523 if ((m_seed % (1<<(p-1))) < (pg_num % (1<<(p-1))))
524 return p;
525 else
526 return p - 1;
527}
528
529pg_t pg_t::get_parent() const
530{
531 unsigned bits = cbits(m_seed);
532 assert(bits);
533 pg_t retval = *this;
534 retval.m_seed &= ~((~0)<<(bits - 1));
535 return retval;
536}
537
538hobject_t pg_t::get_hobj_start() const
539{
540 return hobject_t(object_t(), string(), CEPH_NOSNAP, m_seed, m_pool,
541 string());
542}
543
544hobject_t pg_t::get_hobj_end(unsigned pg_num) const
545{
546 // note: this assumes a bitwise sort; with the legacy nibblewise
547 // sort a PG did not always cover a single contiguous range of the
548 // (bit-reversed) hash range.
549 unsigned bits = get_split_bits(pg_num);
550 uint64_t rev_start = hobject_t::_reverse_bits(m_seed);
551 uint64_t rev_end = (rev_start | (0xffffffff >> bits)) + 1;
552 if (rev_end >= 0x100000000) {
553 assert(rev_end == 0x100000000);
554 return hobject_t::get_max();
555 } else {
556 return hobject_t(object_t(), string(), CEPH_NOSNAP,
557 hobject_t::_reverse_bits(rev_end), m_pool,
558 string());
559 }
560}
561
562void pg_t::dump(Formatter *f) const
563{
564 f->dump_unsigned("pool", m_pool);
565 f->dump_unsigned("seed", m_seed);
566 f->dump_int("preferred_osd", m_preferred);
567}
568
569void pg_t::generate_test_instances(list<pg_t*>& o)
570{
571 o.push_back(new pg_t);
572 o.push_back(new pg_t(1, 2, -1));
573 o.push_back(new pg_t(13123, 3, -1));
574 o.push_back(new pg_t(131223, 4, 23));
575}
576
577char *pg_t::calc_name(char *buf, const char *suffix_backwords) const
578{
579 while (*suffix_backwords)
580 *--buf = *suffix_backwords++;
581
582 if (m_preferred >= 0)
583 *--buf ='p';
584
585 buf = ritoa<uint32_t, 16>(m_seed, buf);
586
587 *--buf = '.';
588
589 return ritoa<uint64_t, 10>(m_pool, buf);
590}
591
592ostream& operator<<(ostream& out, const pg_t &pg)
593{
594 char buf[pg_t::calc_name_buf_size];
595 buf[pg_t::calc_name_buf_size - 1] = '\0';
596 out << pg.calc_name(buf + pg_t::calc_name_buf_size - 1, "");
597 return out;
598}
599
600
601// -- coll_t --
602
603void coll_t::calc_str()
604{
605 switch (type) {
606 case TYPE_META:
607 strcpy(_str_buff, "meta");
608 _str = _str_buff;
609 break;
610 case TYPE_PG:
611 _str_buff[spg_t::calc_name_buf_size - 1] = '\0';
612 _str = pgid.calc_name(_str_buff + spg_t::calc_name_buf_size - 1, "daeh_");
613 break;
614 case TYPE_PG_TEMP:
615 _str_buff[spg_t::calc_name_buf_size - 1] = '\0';
616 _str = pgid.calc_name(_str_buff + spg_t::calc_name_buf_size - 1, "PMET_");
617 break;
618 default:
619 assert(0 == "unknown collection type");
620 }
621}
622
623bool coll_t::parse(const std::string& s)
624{
625 if (s == "meta") {
626 type = TYPE_META;
627 pgid = spg_t();
628 removal_seq = 0;
629 calc_str();
630 assert(s == _str);
631 return true;
632 }
633 if (s.find("_head") == s.length() - 5 &&
634 pgid.parse(s.substr(0, s.length() - 5))) {
635 type = TYPE_PG;
636 removal_seq = 0;
637 calc_str();
638 assert(s == _str);
639 return true;
640 }
641 if (s.find("_TEMP") == s.length() - 5 &&
642 pgid.parse(s.substr(0, s.length() - 5))) {
643 type = TYPE_PG_TEMP;
644 removal_seq = 0;
645 calc_str();
646 assert(s == _str);
647 return true;
648 }
649 return false;
650}
651
652void coll_t::encode(bufferlist& bl) const
653{
654 // when changing this, remember to update encoded_size() too.
655 if (is_temp()) {
656 // can't express this as v2...
657 __u8 struct_v = 3;
658 ::encode(struct_v, bl);
659 ::encode(to_str(), bl);
660 } else {
661 __u8 struct_v = 2;
662 ::encode(struct_v, bl);
663 ::encode((__u8)type, bl);
664 ::encode(pgid, bl);
665 snapid_t snap = CEPH_NOSNAP;
666 ::encode(snap, bl);
667 }
668}
669
670size_t coll_t::encoded_size() const
671{
672 size_t r = sizeof(__u8);
673 if (is_temp()) {
674 // v3
675 r += sizeof(__u32);
676 if (_str) {
677 r += strlen(_str);
678 }
679 } else {
680 // v2
681 // 1. type
682 r += sizeof(__u8);
683 // 2. pgid
684 // - encoding header
685 r += sizeof(ceph_le32) + 2 * sizeof(__u8);
686 // - pg_t
687 r += sizeof(__u8) + sizeof(uint64_t) + 2 * sizeof(uint32_t);
688 // - shard_id_t
689 r += sizeof(int8_t);
690 // 3. snapid_t
691 r += sizeof(uint64_t);
692 }
693
694 return r;
695}
696
697void coll_t::decode(bufferlist::iterator& bl)
698{
699 __u8 struct_v;
700 ::decode(struct_v, bl);
701 switch (struct_v) {
702 case 1:
703 {
704 snapid_t snap;
705 ::decode(pgid, bl);
706 ::decode(snap, bl);
707
708 // infer the type
709 if (pgid == spg_t() && snap == 0) {
710 type = TYPE_META;
711 } else {
712 type = TYPE_PG;
713 }
714 removal_seq = 0;
715 }
716 break;
717
718 case 2:
719 {
720 __u8 _type;
721 snapid_t snap;
722 ::decode(_type, bl);
723 ::decode(pgid, bl);
724 ::decode(snap, bl);
725 type = (type_t)_type;
726 removal_seq = 0;
727 }
728 break;
729
730 case 3:
731 {
732 string str;
733 ::decode(str, bl);
734 bool ok = parse(str);
735 if (!ok)
736 throw std::domain_error(std::string("unable to parse pg ") + str);
737 }
738 break;
739
740 default:
741 {
742 ostringstream oss;
743 oss << "coll_t::decode(): don't know how to decode version "
744 << struct_v;
745 throw std::domain_error(oss.str());
746 }
747 }
748}
749
750void coll_t::dump(Formatter *f) const
751{
752 f->dump_unsigned("type_id", (unsigned)type);
753 if (type != TYPE_META)
754 f->dump_stream("pgid") << pgid;
755 f->dump_string("name", to_str());
756}
757
758void coll_t::generate_test_instances(list<coll_t*>& o)
759{
760 o.push_back(new coll_t());
761 o.push_back(new coll_t(spg_t(pg_t(1, 0), shard_id_t::NO_SHARD)));
762 o.push_back(new coll_t(o.back()->get_temp()));
763 o.push_back(new coll_t(spg_t(pg_t(3, 2), shard_id_t(12))));
764 o.push_back(new coll_t(o.back()->get_temp()));
765 o.push_back(new coll_t());
766}
767
768// ---
769
770std::string pg_vector_string(const vector<int32_t> &a)
771{
772 ostringstream oss;
773 oss << "[";
774 for (vector<int32_t>::const_iterator i = a.begin(); i != a.end(); ++i) {
775 if (i != a.begin())
776 oss << ",";
777 if (*i != CRUSH_ITEM_NONE)
778 oss << *i;
779 else
780 oss << "NONE";
781 }
782 oss << "]";
783 return oss.str();
784}
785
786std::string pg_state_string(int state)
787{
788 ostringstream oss;
789 if (state & PG_STATE_STALE)
790 oss << "stale+";
791 if (state & PG_STATE_CREATING)
792 oss << "creating+";
793 if (state & PG_STATE_ACTIVE)
794 oss << "active+";
795 if (state & PG_STATE_ACTIVATING)
796 oss << "activating+";
797 if (state & PG_STATE_CLEAN)
798 oss << "clean+";
799 if (state & PG_STATE_RECOVERY_WAIT)
800 oss << "recovery_wait+";
801 if (state & PG_STATE_RECOVERY_TOOFULL)
802 oss << "recovery_toofull+";
803 if (state & PG_STATE_RECOVERING)
804 oss << "recovering+";
805 if (state & PG_STATE_DOWN)
806 oss << "down+";
807 if (state & PG_STATE_UNDERSIZED)
808 oss << "undersized+";
809 if (state & PG_STATE_DEGRADED)
810 oss << "degraded+";
811 if (state & PG_STATE_REMAPPED)
812 oss << "remapped+";
813 if (state & PG_STATE_SCRUBBING)
814 oss << "scrubbing+";
815 if (state & PG_STATE_DEEP_SCRUB)
816 oss << "deep+";
817 if (state & PG_STATE_INCONSISTENT)
818 oss << "inconsistent+";
819 if (state & PG_STATE_PEERING)
820 oss << "peering+";
821 if (state & PG_STATE_REPAIR)
822 oss << "repair+";
823 if ((state & PG_STATE_BACKFILL_WAIT) &&
824 !(state &PG_STATE_BACKFILL))
825 oss << "backfill_wait+";
826 if (state & PG_STATE_BACKFILL)
827 oss << "backfilling+";
828 if (state & PG_STATE_BACKFILL_TOOFULL)
829 oss << "backfill_toofull+";
830 if (state & PG_STATE_INCOMPLETE)
831 oss << "incomplete+";
832 if (state & PG_STATE_PEERED)
833 oss << "peered+";
834 if (state & PG_STATE_SNAPTRIM)
835 oss << "snaptrim+";
836 if (state & PG_STATE_SNAPTRIM_WAIT)
837 oss << "snaptrim_wait+";
224ce89b
WB
838 if (state & PG_STATE_SNAPTRIM_ERROR)
839 oss << "snaptrim_error+";
7c673cae
FG
840 string ret(oss.str());
841 if (ret.length() > 0)
842 ret.resize(ret.length() - 1);
843 else
31f18b77 844 ret = "unknown";
7c673cae
FG
845 return ret;
846}
847
848int pg_string_state(const std::string& state)
849{
850 int type;
851 if (state == "active")
852 type = PG_STATE_ACTIVE;
853 else if (state == "clean")
854 type = PG_STATE_CLEAN;
855 else if (state == "down")
856 type = PG_STATE_DOWN;
857 else if (state == "scrubbing")
858 type = PG_STATE_SCRUBBING;
859 else if (state == "degraded")
860 type = PG_STATE_DEGRADED;
861 else if (state == "inconsistent")
862 type = PG_STATE_INCONSISTENT;
863 else if (state == "peering")
864 type = PG_STATE_PEERING;
865 else if (state == "repair")
866 type = PG_STATE_REPAIR;
867 else if (state == "recovering")
868 type = PG_STATE_RECOVERING;
869 else if (state == "backfill_wait")
870 type = PG_STATE_BACKFILL_WAIT;
871 else if (state == "incomplete")
872 type = PG_STATE_INCOMPLETE;
873 else if (state == "stale")
874 type = PG_STATE_STALE;
875 else if (state == "remapped")
876 type = PG_STATE_REMAPPED;
877 else if (state == "deep_scrub")
878 type = PG_STATE_DEEP_SCRUB;
879 else if (state == "backfill")
880 type = PG_STATE_BACKFILL;
881 else if (state == "backfill_toofull")
882 type = PG_STATE_BACKFILL_TOOFULL;
883 else if (state == "recovery_wait")
884 type = PG_STATE_RECOVERY_WAIT;
885 else if (state == "recovery_toofull")
886 type = PG_STATE_RECOVERY_TOOFULL;
887 else if (state == "undersized")
888 type = PG_STATE_UNDERSIZED;
889 else if (state == "activating")
890 type = PG_STATE_ACTIVATING;
891 else if (state == "peered")
892 type = PG_STATE_PEERED;
893 else if (state == "snaptrim")
894 type = PG_STATE_SNAPTRIM;
895 else if (state == "snaptrim_wait")
896 type = PG_STATE_SNAPTRIM_WAIT;
224ce89b
WB
897 else if (state == "snaptrim_error")
898 type = PG_STATE_SNAPTRIM_ERROR;
7c673cae
FG
899 else
900 type = -1;
901 return type;
902}
903
904// -- eversion_t --
905string eversion_t::get_key_name() const
906{
907 char key[32];
908 // Below is equivalent of sprintf("%010u.%020llu");
909 key[31] = 0;
910 ritoa<uint64_t, 10, 20>(version, key + 31);
911 key[10] = '.';
912 ritoa<uint32_t, 10, 10>(epoch, key + 10);
913 return string(key);
914}
915
916
917// -- pool_snap_info_t --
918void pool_snap_info_t::dump(Formatter *f) const
919{
920 f->dump_unsigned("snapid", snapid);
921 f->dump_stream("stamp") << stamp;
922 f->dump_string("name", name);
923}
924
925void pool_snap_info_t::encode(bufferlist& bl, uint64_t features) const
926{
927 if ((features & CEPH_FEATURE_PGPOOL3) == 0) {
928 __u8 struct_v = 1;
929 ::encode(struct_v, bl);
930 ::encode(snapid, bl);
931 ::encode(stamp, bl);
932 ::encode(name, bl);
933 return;
934 }
935 ENCODE_START(2, 2, bl);
936 ::encode(snapid, bl);
937 ::encode(stamp, bl);
938 ::encode(name, bl);
939 ENCODE_FINISH(bl);
940}
941
942void pool_snap_info_t::decode(bufferlist::iterator& bl)
943{
944 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
945 ::decode(snapid, bl);
946 ::decode(stamp, bl);
947 ::decode(name, bl);
948 DECODE_FINISH(bl);
949}
950
951void pool_snap_info_t::generate_test_instances(list<pool_snap_info_t*>& o)
952{
953 o.push_back(new pool_snap_info_t);
954 o.push_back(new pool_snap_info_t);
955 o.back()->snapid = 1;
956 o.back()->stamp = utime_t(1, 2);
957 o.back()->name = "foo";
958}
959
960// -- pool_opts_t --
961
962typedef std::map<std::string, pool_opts_t::opt_desc_t> opt_mapping_t;
963static opt_mapping_t opt_mapping = boost::assign::map_list_of
964 ("scrub_min_interval", pool_opts_t::opt_desc_t(
965 pool_opts_t::SCRUB_MIN_INTERVAL, pool_opts_t::DOUBLE))
966 ("scrub_max_interval", pool_opts_t::opt_desc_t(
967 pool_opts_t::SCRUB_MAX_INTERVAL, pool_opts_t::DOUBLE))
968 ("deep_scrub_interval", pool_opts_t::opt_desc_t(
969 pool_opts_t::DEEP_SCRUB_INTERVAL, pool_opts_t::DOUBLE))
970 ("recovery_priority", pool_opts_t::opt_desc_t(
971 pool_opts_t::RECOVERY_PRIORITY, pool_opts_t::INT))
972 ("recovery_op_priority", pool_opts_t::opt_desc_t(
973 pool_opts_t::RECOVERY_OP_PRIORITY, pool_opts_t::INT))
974 ("scrub_priority", pool_opts_t::opt_desc_t(
975 pool_opts_t::SCRUB_PRIORITY, pool_opts_t::INT))
976 ("compression_mode", pool_opts_t::opt_desc_t(
977 pool_opts_t::COMPRESSION_MODE, pool_opts_t::STR))
978 ("compression_algorithm", pool_opts_t::opt_desc_t(
979 pool_opts_t::COMPRESSION_ALGORITHM, pool_opts_t::STR))
980 ("compression_required_ratio", pool_opts_t::opt_desc_t(
981 pool_opts_t::COMPRESSION_REQUIRED_RATIO, pool_opts_t::DOUBLE))
982 ("compression_max_blob_size", pool_opts_t::opt_desc_t(
983 pool_opts_t::COMPRESSION_MAX_BLOB_SIZE, pool_opts_t::INT))
984 ("compression_min_blob_size", pool_opts_t::opt_desc_t(
985 pool_opts_t::COMPRESSION_MIN_BLOB_SIZE, pool_opts_t::INT))
986 ("csum_type", pool_opts_t::opt_desc_t(
987 pool_opts_t::CSUM_TYPE, pool_opts_t::INT))
988 ("csum_max_block", pool_opts_t::opt_desc_t(
989 pool_opts_t::CSUM_MAX_BLOCK, pool_opts_t::INT))
990 ("csum_min_block", pool_opts_t::opt_desc_t(
991 pool_opts_t::CSUM_MIN_BLOCK, pool_opts_t::INT));
992
993bool pool_opts_t::is_opt_name(const std::string& name) {
224ce89b 994 return opt_mapping.count(name);
7c673cae
FG
995}
996
997pool_opts_t::opt_desc_t pool_opts_t::get_opt_desc(const std::string& name) {
998 opt_mapping_t::iterator i = opt_mapping.find(name);
999 assert(i != opt_mapping.end());
1000 return i->second;
1001}
1002
1003bool pool_opts_t::is_set(pool_opts_t::key_t key) const {
224ce89b 1004 return opts.count(key);
7c673cae
FG
1005}
1006
1007const pool_opts_t::value_t& pool_opts_t::get(pool_opts_t::key_t key) const {
1008 opts_t::const_iterator i = opts.find(key);
1009 assert(i != opts.end());
1010 return i->second;
1011}
1012
1013bool pool_opts_t::unset(pool_opts_t::key_t key) {
1014 return opts.erase(key) > 0;
1015}
1016
1017class pool_opts_dumper_t : public boost::static_visitor<>
1018{
1019public:
1020 pool_opts_dumper_t(const std::string& name_, Formatter* f_) :
1021 name(name_.c_str()), f(f_) {}
1022
1023 void operator()(std::string s) const {
1024 f->dump_string(name, s);
1025 }
1026 void operator()(int i) const {
1027 f->dump_int(name, i);
1028 }
1029 void operator()(double d) const {
1030 f->dump_float(name, d);
1031 }
1032
1033private:
1034 const char* name;
1035 Formatter* f;
1036};
1037
1038void pool_opts_t::dump(const std::string& name, Formatter* f) const
1039{
1040 const opt_desc_t& desc = get_opt_desc(name);
1041 opts_t::const_iterator i = opts.find(desc.key);
1042 if (i == opts.end()) {
1043 return;
1044 }
1045 boost::apply_visitor(pool_opts_dumper_t(name, f), i->second);
1046}
1047
1048void pool_opts_t::dump(Formatter* f) const
1049{
1050 for (opt_mapping_t::iterator i = opt_mapping.begin(); i != opt_mapping.end();
1051 ++i) {
1052 const std::string& name = i->first;
1053 const opt_desc_t& desc = i->second;
1054 opts_t::const_iterator j = opts.find(desc.key);
1055 if (j == opts.end()) {
1056 continue;
1057 }
1058 boost::apply_visitor(pool_opts_dumper_t(name, f), j->second);
1059 }
1060}
1061
1062class pool_opts_encoder_t : public boost::static_visitor<>
1063{
1064public:
1065 explicit pool_opts_encoder_t(bufferlist& bl_) : bl(bl_) {}
1066
1067 void operator()(std::string s) const {
1068 ::encode(static_cast<int32_t>(pool_opts_t::STR), bl);
1069 ::encode(s, bl);
1070 }
1071 void operator()(int i) const {
1072 ::encode(static_cast<int32_t>(pool_opts_t::INT), bl);
1073 ::encode(i, bl);
1074 }
1075 void operator()(double d) const {
1076 ::encode(static_cast<int32_t>(pool_opts_t::DOUBLE), bl);
1077 ::encode(d, bl);
1078 }
1079
1080private:
1081 bufferlist& bl;
1082};
1083
1084void pool_opts_t::encode(bufferlist& bl) const {
1085 ENCODE_START(1, 1, bl);
1086 uint32_t n = static_cast<uint32_t>(opts.size());
1087 ::encode(n, bl);
1088 for (opts_t::const_iterator i = opts.begin(); i != opts.end(); ++i) {
1089 ::encode(static_cast<int32_t>(i->first), bl);
1090 boost::apply_visitor(pool_opts_encoder_t(bl), i->second);
1091 }
1092 ENCODE_FINISH(bl);
1093}
1094
1095void pool_opts_t::decode(bufferlist::iterator& bl) {
1096 DECODE_START(1, bl);
1097 __u32 n;
1098 ::decode(n, bl);
1099 opts.clear();
1100 while (n--) {
1101 int32_t k, t;
1102 ::decode(k, bl);
1103 ::decode(t, bl);
1104 if (t == STR) {
1105 std::string s;
1106 ::decode(s, bl);
1107 opts[static_cast<key_t>(k)] = s;
1108 } else if (t == INT) {
1109 int i;
1110 ::decode(i, bl);
1111 opts[static_cast<key_t>(k)] = i;
1112 } else if (t == DOUBLE) {
1113 double d;
1114 ::decode(d, bl);
1115 opts[static_cast<key_t>(k)] = d;
1116 } else {
1117 assert(!"invalid type");
1118 }
1119 }
1120 DECODE_FINISH(bl);
1121}
1122
1123ostream& operator<<(ostream& out, const pool_opts_t& opts)
1124{
1125 for (opt_mapping_t::iterator i = opt_mapping.begin(); i != opt_mapping.end();
1126 ++i) {
1127 const std::string& name = i->first;
1128 const pool_opts_t::opt_desc_t& desc = i->second;
1129 pool_opts_t::opts_t::const_iterator j = opts.opts.find(desc.key);
1130 if (j == opts.opts.end()) {
1131 continue;
1132 }
1133 out << " " << name << " " << j->second;
1134 }
1135 return out;
1136}
1137
1138// -- pg_pool_t --
1139
1140void pg_pool_t::dump(Formatter *f) const
1141{
1142 f->dump_unsigned("flags", get_flags());
1143 f->dump_string("flags_names", get_flags_string());
1144 f->dump_int("type", get_type());
1145 f->dump_int("size", get_size());
1146 f->dump_int("min_size", get_min_size());
31f18b77 1147 f->dump_int("crush_rule", get_crush_rule());
7c673cae
FG
1148 f->dump_int("object_hash", get_object_hash());
1149 f->dump_unsigned("pg_num", get_pg_num());
1150 f->dump_unsigned("pg_placement_num", get_pgp_num());
1151 f->dump_unsigned("crash_replay_interval", get_crash_replay_interval());
1152 f->dump_stream("last_change") << get_last_change();
1153 f->dump_stream("last_force_op_resend") << get_last_force_op_resend();
1154 f->dump_stream("last_force_op_resend_preluminous")
1155 << get_last_force_op_resend_preluminous();
1156 f->dump_unsigned("auid", get_auid());
1157 f->dump_string("snap_mode", is_pool_snaps_mode() ? "pool" : "selfmanaged");
1158 f->dump_unsigned("snap_seq", get_snap_seq());
1159 f->dump_unsigned("snap_epoch", get_snap_epoch());
1160 f->open_array_section("pool_snaps");
1161 for (map<snapid_t, pool_snap_info_t>::const_iterator p = snaps.begin(); p != snaps.end(); ++p) {
1162 f->open_object_section("pool_snap_info");
1163 p->second.dump(f);
1164 f->close_section();
1165 }
1166 f->close_section();
1167 f->dump_stream("removed_snaps") << removed_snaps;
1168 f->dump_unsigned("quota_max_bytes", quota_max_bytes);
1169 f->dump_unsigned("quota_max_objects", quota_max_objects);
1170 f->open_array_section("tiers");
1171 for (set<uint64_t>::const_iterator p = tiers.begin(); p != tiers.end(); ++p)
1172 f->dump_unsigned("pool_id", *p);
1173 f->close_section();
1174 f->dump_int("tier_of", tier_of);
1175 f->dump_int("read_tier", read_tier);
1176 f->dump_int("write_tier", write_tier);
1177 f->dump_string("cache_mode", get_cache_mode_name());
1178 f->dump_unsigned("target_max_bytes", target_max_bytes);
1179 f->dump_unsigned("target_max_objects", target_max_objects);
1180 f->dump_unsigned("cache_target_dirty_ratio_micro",
1181 cache_target_dirty_ratio_micro);
1182 f->dump_unsigned("cache_target_dirty_high_ratio_micro",
1183 cache_target_dirty_high_ratio_micro);
1184 f->dump_unsigned("cache_target_full_ratio_micro",
1185 cache_target_full_ratio_micro);
1186 f->dump_unsigned("cache_min_flush_age", cache_min_flush_age);
1187 f->dump_unsigned("cache_min_evict_age", cache_min_evict_age);
1188 f->dump_string("erasure_code_profile", erasure_code_profile);
1189 f->open_object_section("hit_set_params");
1190 hit_set_params.dump(f);
1191 f->close_section(); // hit_set_params
1192 f->dump_unsigned("hit_set_period", hit_set_period);
1193 f->dump_unsigned("hit_set_count", hit_set_count);
1194 f->dump_bool("use_gmt_hitset", use_gmt_hitset);
1195 f->dump_unsigned("min_read_recency_for_promote", min_read_recency_for_promote);
1196 f->dump_unsigned("min_write_recency_for_promote", min_write_recency_for_promote);
1197 f->dump_unsigned("hit_set_grade_decay_rate", hit_set_grade_decay_rate);
1198 f->dump_unsigned("hit_set_search_last_n", hit_set_search_last_n);
1199 f->open_array_section("grade_table");
1200 for (unsigned i = 0; i < hit_set_count; ++i)
1201 f->dump_unsigned("value", get_grade(i));
1202 f->close_section();
1203 f->dump_unsigned("stripe_width", get_stripe_width());
1204 f->dump_unsigned("expected_num_objects", expected_num_objects);
1205 f->dump_bool("fast_read", fast_read);
1206 f->open_object_section("options");
1207 opts.dump(f);
1208 f->close_section(); // options
1209}
1210
1211void pg_pool_t::convert_to_pg_shards(const vector<int> &from, set<pg_shard_t>* to) const {
1212 for (size_t i = 0; i < from.size(); ++i) {
1213 if (from[i] != CRUSH_ITEM_NONE) {
1214 to->insert(
1215 pg_shard_t(
1216 from[i],
1217 ec_pool() ? shard_id_t(i) : shard_id_t::NO_SHARD));
1218 }
1219 }
1220}
1221
1222void pg_pool_t::calc_pg_masks()
1223{
1224 pg_num_mask = (1 << cbits(pg_num-1)) - 1;
1225 pgp_num_mask = (1 << cbits(pgp_num-1)) - 1;
1226}
1227
1228unsigned pg_pool_t::get_pg_num_divisor(pg_t pgid) const
1229{
1230 if (pg_num == pg_num_mask + 1)
1231 return pg_num; // power-of-2 split
1232 unsigned mask = pg_num_mask >> 1;
1233 if ((pgid.ps() & mask) < (pg_num & mask))
1234 return pg_num_mask + 1; // smaller bin size (already split)
1235 else
1236 return (pg_num_mask + 1) >> 1; // bigger bin (not yet split)
1237}
1238
1239/*
1240 * we have two snap modes:
1241 * - pool global snaps
1242 * - snap existence/non-existence defined by snaps[] and snap_seq
1243 * - user managed snaps
1244 * - removal governed by removed_snaps
1245 *
1246 * we know which mode we're using based on whether removed_snaps is empty.
1247 */
1248bool pg_pool_t::is_pool_snaps_mode() const
1249{
1250 return removed_snaps.empty() && get_snap_seq() > 0;
1251}
1252
1253bool pg_pool_t::is_unmanaged_snaps_mode() const
1254{
1255 return removed_snaps.size() && get_snap_seq() > 0;
1256}
1257
1258bool pg_pool_t::is_removed_snap(snapid_t s) const
1259{
1260 if (is_pool_snaps_mode())
1261 return s <= get_snap_seq() && snaps.count(s) == 0;
1262 else
1263 return removed_snaps.contains(s);
1264}
1265
1266/*
1267 * build set of known-removed sets from either pool snaps or
1268 * explicit removed_snaps set.
1269 */
1270void pg_pool_t::build_removed_snaps(interval_set<snapid_t>& rs) const
1271{
1272 if (is_pool_snaps_mode()) {
1273 rs.clear();
1274 for (snapid_t s = 1; s <= get_snap_seq(); s = s + 1)
1275 if (snaps.count(s) == 0)
1276 rs.insert(s);
1277 } else {
1278 rs = removed_snaps;
1279 }
1280}
1281
1282snapid_t pg_pool_t::snap_exists(const char *s) const
1283{
1284 for (map<snapid_t,pool_snap_info_t>::const_iterator p = snaps.begin();
1285 p != snaps.end();
1286 ++p)
1287 if (p->second.name == s)
1288 return p->second.snapid;
1289 return 0;
1290}
1291
1292void pg_pool_t::add_snap(const char *n, utime_t stamp)
1293{
1294 assert(!is_unmanaged_snaps_mode());
1295 snapid_t s = get_snap_seq() + 1;
1296 snap_seq = s;
1297 snaps[s].snapid = s;
1298 snaps[s].name = n;
1299 snaps[s].stamp = stamp;
1300}
1301
1302void pg_pool_t::add_unmanaged_snap(uint64_t& snapid)
1303{
1304 if (removed_snaps.empty()) {
1305 assert(!is_pool_snaps_mode());
1306 removed_snaps.insert(snapid_t(1));
1307 snap_seq = 1;
1308 }
1309 snapid = snap_seq = snap_seq + 1;
1310}
1311
1312void pg_pool_t::remove_snap(snapid_t s)
1313{
1314 assert(snaps.count(s));
1315 snaps.erase(s);
1316 snap_seq = snap_seq + 1;
1317}
1318
1319void pg_pool_t::remove_unmanaged_snap(snapid_t s)
1320{
1321 assert(is_unmanaged_snaps_mode());
1322 removed_snaps.insert(s);
1323 snap_seq = snap_seq + 1;
1324 removed_snaps.insert(get_snap_seq());
1325}
1326
1327SnapContext pg_pool_t::get_snap_context() const
1328{
1329 vector<snapid_t> s(snaps.size());
1330 unsigned i = 0;
1331 for (map<snapid_t, pool_snap_info_t>::const_reverse_iterator p = snaps.rbegin();
1332 p != snaps.rend();
1333 ++p)
1334 s[i++] = p->first;
1335 return SnapContext(get_snap_seq(), s);
1336}
1337
1338uint32_t pg_pool_t::hash_key(const string& key, const string& ns) const
1339{
1340 if (ns.empty())
1341 return ceph_str_hash(object_hash, key.data(), key.length());
1342 int nsl = ns.length();
1343 int len = key.length() + nsl + 1;
1344 char buf[len];
1345 memcpy(&buf[0], ns.data(), nsl);
1346 buf[nsl] = '\037';
1347 memcpy(&buf[nsl+1], key.data(), key.length());
1348 return ceph_str_hash(object_hash, &buf[0], len);
1349}
1350
1351uint32_t pg_pool_t::raw_hash_to_pg(uint32_t v) const
1352{
1353 return ceph_stable_mod(v, pg_num, pg_num_mask);
1354}
1355
1356/*
1357 * map a raw pg (with full precision ps) into an actual pg, for storage
1358 */
1359pg_t pg_pool_t::raw_pg_to_pg(pg_t pg) const
1360{
1361 pg.set_ps(ceph_stable_mod(pg.ps(), pg_num, pg_num_mask));
1362 return pg;
1363}
1364
1365/*
1366 * map raw pg (full precision ps) into a placement seed. include
1367 * pool id in that value so that different pools don't use the same
1368 * seeds.
1369 */
1370ps_t pg_pool_t::raw_pg_to_pps(pg_t pg) const
1371{
1372 if (flags & FLAG_HASHPSPOOL) {
1373 // Hash the pool id so that pool PGs do not overlap.
1374 return
1375 crush_hash32_2(CRUSH_HASH_RJENKINS1,
1376 ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask),
1377 pg.pool());
1378 } else {
1379 // Legacy behavior; add ps and pool together. This is not a great
1380 // idea because the PGs from each pool will essentially overlap on
1381 // top of each other: 0.5 == 1.4 == 2.3 == ...
1382 return
1383 ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask) +
1384 pg.pool();
1385 }
1386}
1387
1388uint32_t pg_pool_t::get_random_pg_position(pg_t pg, uint32_t seed) const
1389{
1390 uint32_t r = crush_hash32_2(CRUSH_HASH_RJENKINS1, seed, 123);
1391 if (pg_num == pg_num_mask + 1) {
1392 r &= ~pg_num_mask;
1393 } else {
1394 unsigned smaller_mask = pg_num_mask >> 1;
1395 if ((pg.ps() & smaller_mask) < (pg_num & smaller_mask)) {
1396 r &= ~pg_num_mask;
1397 } else {
1398 r &= ~smaller_mask;
1399 }
1400 }
1401 r |= pg.ps();
1402 return r;
1403}
1404
1405void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
1406{
1407 if ((features & CEPH_FEATURE_PGPOOL3) == 0) {
1408 // this encoding matches the old struct ceph_pg_pool
1409 __u8 struct_v = 2;
1410 ::encode(struct_v, bl);
1411 ::encode(type, bl);
1412 ::encode(size, bl);
31f18b77 1413 ::encode(crush_rule, bl);
7c673cae
FG
1414 ::encode(object_hash, bl);
1415 ::encode(pg_num, bl);
1416 ::encode(pgp_num, bl);
1417 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
1418 ::encode(lpg_num, bl);
1419 ::encode(lpgp_num, bl);
1420 ::encode(last_change, bl);
1421 ::encode(snap_seq, bl);
1422 ::encode(snap_epoch, bl);
1423
1424 __u32 n = snaps.size();
1425 ::encode(n, bl);
1426 n = removed_snaps.num_intervals();
1427 ::encode(n, bl);
1428
1429 ::encode(auid, bl);
1430
1431 ::encode_nohead(snaps, bl, features);
1432 ::encode_nohead(removed_snaps, bl);
1433 return;
1434 }
1435
1436 if ((features & CEPH_FEATURE_OSDENC) == 0) {
1437 __u8 struct_v = 4;
1438 ::encode(struct_v, bl);
1439 ::encode(type, bl);
1440 ::encode(size, bl);
31f18b77 1441 ::encode(crush_rule, bl);
7c673cae
FG
1442 ::encode(object_hash, bl);
1443 ::encode(pg_num, bl);
1444 ::encode(pgp_num, bl);
1445 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
1446 ::encode(lpg_num, bl);
1447 ::encode(lpgp_num, bl);
1448 ::encode(last_change, bl);
1449 ::encode(snap_seq, bl);
1450 ::encode(snap_epoch, bl);
1451 ::encode(snaps, bl, features);
1452 ::encode(removed_snaps, bl);
1453 ::encode(auid, bl);
1454 ::encode(flags, bl);
1455 ::encode(crash_replay_interval, bl);
1456 return;
1457 }
1458
1459 if ((features & CEPH_FEATURE_OSD_POOLRESEND) == 0) {
1460 // we simply added last_force_op_resend here, which is a fully
1461 // backward compatible change. however, encoding the same map
1462 // differently between monitors triggers scrub noise (even though
1463 // they are decodable without the feature), so let's be pendantic
1464 // about it.
1465 ENCODE_START(14, 5, bl);
1466 ::encode(type, bl);
1467 ::encode(size, bl);
31f18b77 1468 ::encode(crush_rule, bl);
7c673cae
FG
1469 ::encode(object_hash, bl);
1470 ::encode(pg_num, bl);
1471 ::encode(pgp_num, bl);
1472 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
1473 ::encode(lpg_num, bl);
1474 ::encode(lpgp_num, bl);
1475 ::encode(last_change, bl);
1476 ::encode(snap_seq, bl);
1477 ::encode(snap_epoch, bl);
1478 ::encode(snaps, bl, features);
1479 ::encode(removed_snaps, bl);
1480 ::encode(auid, bl);
1481 ::encode(flags, bl);
1482 ::encode(crash_replay_interval, bl);
1483 ::encode(min_size, bl);
1484 ::encode(quota_max_bytes, bl);
1485 ::encode(quota_max_objects, bl);
1486 ::encode(tiers, bl);
1487 ::encode(tier_of, bl);
1488 __u8 c = cache_mode;
1489 ::encode(c, bl);
1490 ::encode(read_tier, bl);
1491 ::encode(write_tier, bl);
1492 ::encode(properties, bl);
1493 ::encode(hit_set_params, bl);
1494 ::encode(hit_set_period, bl);
1495 ::encode(hit_set_count, bl);
1496 ::encode(stripe_width, bl);
1497 ::encode(target_max_bytes, bl);
1498 ::encode(target_max_objects, bl);
1499 ::encode(cache_target_dirty_ratio_micro, bl);
1500 ::encode(cache_target_full_ratio_micro, bl);
1501 ::encode(cache_min_flush_age, bl);
1502 ::encode(cache_min_evict_age, bl);
1503 ::encode(erasure_code_profile, bl);
1504 ENCODE_FINISH(bl);
1505 return;
1506 }
1507
1508 uint8_t v = 25;
1509 if (!(features & CEPH_FEATURE_NEW_OSDOP_ENCODING)) {
1510 // this was the first post-hammer thing we added; if it's missing, encode
1511 // like hammer.
1512 v = 21;
1513 }
1514 if ((features &
1515 (CEPH_FEATURE_RESEND_ON_SPLIT|CEPH_FEATURE_SERVER_JEWEL)) !=
1516 (CEPH_FEATURE_RESEND_ON_SPLIT|CEPH_FEATURE_SERVER_JEWEL)) {
1517 v = 24;
1518 }
1519
1520 ENCODE_START(v, 5, bl);
1521 ::encode(type, bl);
1522 ::encode(size, bl);
31f18b77 1523 ::encode(crush_rule, bl);
7c673cae
FG
1524 ::encode(object_hash, bl);
1525 ::encode(pg_num, bl);
1526 ::encode(pgp_num, bl);
1527 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
1528 ::encode(lpg_num, bl);
1529 ::encode(lpgp_num, bl);
1530 ::encode(last_change, bl);
1531 ::encode(snap_seq, bl);
1532 ::encode(snap_epoch, bl);
1533 ::encode(snaps, bl, features);
1534 ::encode(removed_snaps, bl);
1535 ::encode(auid, bl);
1536 ::encode(flags, bl);
1537 ::encode(crash_replay_interval, bl);
1538 ::encode(min_size, bl);
1539 ::encode(quota_max_bytes, bl);
1540 ::encode(quota_max_objects, bl);
1541 ::encode(tiers, bl);
1542 ::encode(tier_of, bl);
1543 __u8 c = cache_mode;
1544 ::encode(c, bl);
1545 ::encode(read_tier, bl);
1546 ::encode(write_tier, bl);
1547 ::encode(properties, bl);
1548 ::encode(hit_set_params, bl);
1549 ::encode(hit_set_period, bl);
1550 ::encode(hit_set_count, bl);
1551 ::encode(stripe_width, bl);
1552 ::encode(target_max_bytes, bl);
1553 ::encode(target_max_objects, bl);
1554 ::encode(cache_target_dirty_ratio_micro, bl);
1555 ::encode(cache_target_full_ratio_micro, bl);
1556 ::encode(cache_min_flush_age, bl);
1557 ::encode(cache_min_evict_age, bl);
1558 ::encode(erasure_code_profile, bl);
1559 ::encode(last_force_op_resend_preluminous, bl);
1560 ::encode(min_read_recency_for_promote, bl);
1561 ::encode(expected_num_objects, bl);
1562 if (v >= 19) {
1563 ::encode(cache_target_dirty_high_ratio_micro, bl);
1564 }
1565 if (v >= 20) {
1566 ::encode(min_write_recency_for_promote, bl);
1567 }
1568 if (v >= 21) {
1569 ::encode(use_gmt_hitset, bl);
1570 }
1571 if (v >= 22) {
1572 ::encode(fast_read, bl);
1573 }
1574 if (v >= 23) {
1575 ::encode(hit_set_grade_decay_rate, bl);
1576 ::encode(hit_set_search_last_n, bl);
1577 }
1578 if (v >= 24) {
1579 ::encode(opts, bl);
1580 }
1581 if (v >= 25) {
1582 ::encode(last_force_op_resend, bl);
1583 }
1584 ENCODE_FINISH(bl);
1585}
1586
1587void pg_pool_t::decode(bufferlist::iterator& bl)
1588{
31f18b77 1589 DECODE_START_LEGACY_COMPAT_LEN(25, 5, 5, bl);
7c673cae
FG
1590 ::decode(type, bl);
1591 ::decode(size, bl);
31f18b77 1592 ::decode(crush_rule, bl);
7c673cae
FG
1593 ::decode(object_hash, bl);
1594 ::decode(pg_num, bl);
1595 ::decode(pgp_num, bl);
1596 {
1597 __u32 lpg_num, lpgp_num;
1598 ::decode(lpg_num, bl);
1599 ::decode(lpgp_num, bl);
1600 }
1601 ::decode(last_change, bl);
1602 ::decode(snap_seq, bl);
1603 ::decode(snap_epoch, bl);
1604
1605 if (struct_v >= 3) {
1606 ::decode(snaps, bl);
1607 ::decode(removed_snaps, bl);
1608 ::decode(auid, bl);
1609 } else {
1610 __u32 n, m;
1611 ::decode(n, bl);
1612 ::decode(m, bl);
1613 ::decode(auid, bl);
1614 ::decode_nohead(n, snaps, bl);
1615 ::decode_nohead(m, removed_snaps, bl);
1616 }
1617
1618 if (struct_v >= 4) {
1619 ::decode(flags, bl);
1620 ::decode(crash_replay_interval, bl);
1621 } else {
1622 flags = 0;
1623
1624 // if this looks like the 'data' pool, set the
1625 // crash_replay_interval appropriately. unfortunately, we can't
1626 // be precise here. this should be good enough to preserve replay
1627 // on the data pool for the majority of cluster upgrades, though.
31f18b77 1628 if (crush_rule == 0 && auid == 0)
7c673cae
FG
1629 crash_replay_interval = 60;
1630 else
1631 crash_replay_interval = 0;
1632 }
1633 if (struct_v >= 7) {
1634 ::decode(min_size, bl);
1635 } else {
1636 min_size = size - size/2;
1637 }
1638 if (struct_v >= 8) {
1639 ::decode(quota_max_bytes, bl);
1640 ::decode(quota_max_objects, bl);
1641 }
1642 if (struct_v >= 9) {
1643 ::decode(tiers, bl);
1644 ::decode(tier_of, bl);
1645 __u8 v;
1646 ::decode(v, bl);
1647 cache_mode = (cache_mode_t)v;
1648 ::decode(read_tier, bl);
1649 ::decode(write_tier, bl);
1650 }
1651 if (struct_v >= 10) {
1652 ::decode(properties, bl);
1653 }
1654 if (struct_v >= 11) {
1655 ::decode(hit_set_params, bl);
1656 ::decode(hit_set_period, bl);
1657 ::decode(hit_set_count, bl);
1658 } else {
1659 pg_pool_t def;
1660 hit_set_period = def.hit_set_period;
1661 hit_set_count = def.hit_set_count;
1662 }
1663 if (struct_v >= 12) {
1664 ::decode(stripe_width, bl);
1665 } else {
1666 set_stripe_width(0);
1667 }
1668 if (struct_v >= 13) {
1669 ::decode(target_max_bytes, bl);
1670 ::decode(target_max_objects, bl);
1671 ::decode(cache_target_dirty_ratio_micro, bl);
1672 ::decode(cache_target_full_ratio_micro, bl);
1673 ::decode(cache_min_flush_age, bl);
1674 ::decode(cache_min_evict_age, bl);
1675 } else {
1676 target_max_bytes = 0;
1677 target_max_objects = 0;
1678 cache_target_dirty_ratio_micro = 0;
1679 cache_target_full_ratio_micro = 0;
1680 cache_min_flush_age = 0;
1681 cache_min_evict_age = 0;
1682 }
1683 if (struct_v >= 14) {
1684 ::decode(erasure_code_profile, bl);
1685 }
1686 if (struct_v >= 15) {
1687 ::decode(last_force_op_resend_preluminous, bl);
1688 } else {
1689 last_force_op_resend_preluminous = 0;
1690 }
1691 if (struct_v >= 16) {
1692 ::decode(min_read_recency_for_promote, bl);
1693 } else {
1694 min_read_recency_for_promote = 1;
1695 }
1696 if (struct_v >= 17) {
1697 ::decode(expected_num_objects, bl);
1698 } else {
1699 expected_num_objects = 0;
1700 }
1701 if (struct_v >= 19) {
1702 ::decode(cache_target_dirty_high_ratio_micro, bl);
1703 } else {
1704 cache_target_dirty_high_ratio_micro = cache_target_dirty_ratio_micro;
1705 }
1706 if (struct_v >= 20) {
1707 ::decode(min_write_recency_for_promote, bl);
1708 } else {
1709 min_write_recency_for_promote = 1;
1710 }
1711 if (struct_v >= 21) {
1712 ::decode(use_gmt_hitset, bl);
1713 } else {
1714 use_gmt_hitset = false;
1715 }
1716 if (struct_v >= 22) {
1717 ::decode(fast_read, bl);
1718 } else {
1719 fast_read = false;
1720 }
1721 if (struct_v >= 23) {
1722 ::decode(hit_set_grade_decay_rate, bl);
1723 ::decode(hit_set_search_last_n, bl);
1724 } else {
1725 hit_set_grade_decay_rate = 0;
1726 hit_set_search_last_n = 1;
1727 }
1728 if (struct_v >= 24) {
1729 ::decode(opts, bl);
1730 }
1731 if (struct_v >= 25) {
1732 ::decode(last_force_op_resend, bl);
1733 } else {
1734 last_force_op_resend = last_force_op_resend_preluminous;
1735 }
1736 DECODE_FINISH(bl);
1737 calc_pg_masks();
1738 calc_grade_table();
1739}
1740
1741void pg_pool_t::generate_test_instances(list<pg_pool_t*>& o)
1742{
1743 pg_pool_t a;
1744 o.push_back(new pg_pool_t(a));
1745
1746 a.type = TYPE_REPLICATED;
1747 a.size = 2;
31f18b77 1748 a.crush_rule = 3;
7c673cae
FG
1749 a.object_hash = 4;
1750 a.pg_num = 6;
1751 a.pgp_num = 5;
1752 a.last_change = 9;
1753 a.last_force_op_resend = 123823;
1754 a.last_force_op_resend_preluminous = 123824;
1755 a.snap_seq = 10;
1756 a.snap_epoch = 11;
1757 a.auid = 12;
1758 a.crash_replay_interval = 13;
1759 a.quota_max_bytes = 473;
1760 a.quota_max_objects = 474;
1761 o.push_back(new pg_pool_t(a));
1762
1763 a.snaps[3].name = "asdf";
1764 a.snaps[3].snapid = 3;
1765 a.snaps[3].stamp = utime_t(123, 4);
1766 a.snaps[6].name = "qwer";
1767 a.snaps[6].snapid = 6;
1768 a.snaps[6].stamp = utime_t(23423, 4);
1769 o.push_back(new pg_pool_t(a));
1770
1771 a.removed_snaps.insert(2); // not quite valid to combine with snaps!
1772 a.quota_max_bytes = 2473;
1773 a.quota_max_objects = 4374;
1774 a.tiers.insert(0);
1775 a.tiers.insert(1);
1776 a.tier_of = 2;
1777 a.cache_mode = CACHEMODE_WRITEBACK;
1778 a.read_tier = 1;
1779 a.write_tier = 1;
1780 a.hit_set_params = HitSet::Params(new BloomHitSet::Params);
1781 a.hit_set_period = 3600;
1782 a.hit_set_count = 8;
1783 a.min_read_recency_for_promote = 1;
1784 a.min_write_recency_for_promote = 1;
1785 a.hit_set_grade_decay_rate = 50;
1786 a.hit_set_search_last_n = 1;
1787 a.calc_grade_table();
1788 a.set_stripe_width(12345);
1789 a.target_max_bytes = 1238132132;
1790 a.target_max_objects = 1232132;
1791 a.cache_target_dirty_ratio_micro = 187232;
1792 a.cache_target_dirty_high_ratio_micro = 309856;
1793 a.cache_target_full_ratio_micro = 987222;
1794 a.cache_min_flush_age = 231;
1795 a.cache_min_evict_age = 2321;
1796 a.erasure_code_profile = "profile in osdmap";
1797 a.expected_num_objects = 123456;
1798 a.fast_read = false;
1799 o.push_back(new pg_pool_t(a));
1800}
1801
1802ostream& operator<<(ostream& out, const pg_pool_t& p)
1803{
1804 out << p.get_type_name()
1805 << " size " << p.get_size()
1806 << " min_size " << p.get_min_size()
31f18b77 1807 << " crush_rule " << p.get_crush_rule()
7c673cae
FG
1808 << " object_hash " << p.get_object_hash_name()
1809 << " pg_num " << p.get_pg_num()
1810 << " pgp_num " << p.get_pgp_num()
1811 << " last_change " << p.get_last_change();
1812 if (p.get_last_force_op_resend() ||
1813 p.get_last_force_op_resend_preluminous())
1814 out << " lfor " << p.get_last_force_op_resend() << "/"
1815 << p.get_last_force_op_resend_preluminous();
1816 if (p.get_auid())
1817 out << " owner " << p.get_auid();
1818 if (p.flags)
1819 out << " flags " << p.get_flags_string();
1820 if (p.crash_replay_interval)
1821 out << " crash_replay_interval " << p.crash_replay_interval;
1822 if (p.quota_max_bytes)
1823 out << " max_bytes " << p.quota_max_bytes;
1824 if (p.quota_max_objects)
1825 out << " max_objects " << p.quota_max_objects;
1826 if (!p.tiers.empty())
1827 out << " tiers " << p.tiers;
1828 if (p.is_tier())
1829 out << " tier_of " << p.tier_of;
1830 if (p.has_read_tier())
1831 out << " read_tier " << p.read_tier;
1832 if (p.has_write_tier())
1833 out << " write_tier " << p.write_tier;
1834 if (p.cache_mode)
1835 out << " cache_mode " << p.get_cache_mode_name();
1836 if (p.target_max_bytes)
1837 out << " target_bytes " << p.target_max_bytes;
1838 if (p.target_max_objects)
1839 out << " target_objects " << p.target_max_objects;
1840 if (p.hit_set_params.get_type() != HitSet::TYPE_NONE) {
1841 out << " hit_set " << p.hit_set_params
1842 << " " << p.hit_set_period << "s"
1843 << " x" << p.hit_set_count << " decay_rate "
1844 << p.hit_set_grade_decay_rate
1845 << " search_last_n " << p.hit_set_search_last_n;
1846 }
1847 if (p.min_read_recency_for_promote)
1848 out << " min_read_recency_for_promote " << p.min_read_recency_for_promote;
1849 if (p.min_write_recency_for_promote)
1850 out << " min_write_recency_for_promote " << p.min_write_recency_for_promote;
1851 out << " stripe_width " << p.get_stripe_width();
1852 if (p.expected_num_objects)
1853 out << " expected_num_objects " << p.expected_num_objects;
1854 if (p.fast_read)
1855 out << " fast_read " << p.fast_read;
1856 out << p.opts;
1857 return out;
1858}
1859
1860
1861// -- object_stat_sum_t --
1862
1863void object_stat_sum_t::dump(Formatter *f) const
1864{
1865 f->dump_int("num_bytes", num_bytes);
1866 f->dump_int("num_objects", num_objects);
1867 f->dump_int("num_object_clones", num_object_clones);
1868 f->dump_int("num_object_copies", num_object_copies);
1869 f->dump_int("num_objects_missing_on_primary", num_objects_missing_on_primary);
1870 f->dump_int("num_objects_missing", num_objects_missing);
1871 f->dump_int("num_objects_degraded", num_objects_degraded);
1872 f->dump_int("num_objects_misplaced", num_objects_misplaced);
1873 f->dump_int("num_objects_unfound", num_objects_unfound);
1874 f->dump_int("num_objects_dirty", num_objects_dirty);
1875 f->dump_int("num_whiteouts", num_whiteouts);
1876 f->dump_int("num_read", num_rd);
1877 f->dump_int("num_read_kb", num_rd_kb);
1878 f->dump_int("num_write", num_wr);
1879 f->dump_int("num_write_kb", num_wr_kb);
1880 f->dump_int("num_scrub_errors", num_scrub_errors);
1881 f->dump_int("num_shallow_scrub_errors", num_shallow_scrub_errors);
1882 f->dump_int("num_deep_scrub_errors", num_deep_scrub_errors);
1883 f->dump_int("num_objects_recovered", num_objects_recovered);
1884 f->dump_int("num_bytes_recovered", num_bytes_recovered);
1885 f->dump_int("num_keys_recovered", num_keys_recovered);
1886 f->dump_int("num_objects_omap", num_objects_omap);
1887 f->dump_int("num_objects_hit_set_archive", num_objects_hit_set_archive);
1888 f->dump_int("num_bytes_hit_set_archive", num_bytes_hit_set_archive);
1889 f->dump_int("num_flush", num_flush);
1890 f->dump_int("num_flush_kb", num_flush_kb);
1891 f->dump_int("num_evict", num_evict);
1892 f->dump_int("num_evict_kb", num_evict_kb);
1893 f->dump_int("num_promote", num_promote);
1894 f->dump_int("num_flush_mode_high", num_flush_mode_high);
1895 f->dump_int("num_flush_mode_low", num_flush_mode_low);
1896 f->dump_int("num_evict_mode_some", num_evict_mode_some);
1897 f->dump_int("num_evict_mode_full", num_evict_mode_full);
1898 f->dump_int("num_objects_pinned", num_objects_pinned);
1899 f->dump_int("num_legacy_snapsets", num_legacy_snapsets);
1900}
1901
1902void object_stat_sum_t::encode(bufferlist& bl) const
1903{
1904 ENCODE_START(16, 14, bl);
1905#if defined(CEPH_LITTLE_ENDIAN)
1906 bl.append((char *)(&num_bytes), sizeof(object_stat_sum_t));
1907#else
1908 ::encode(num_bytes, bl);
1909 ::encode(num_objects, bl);
1910 ::encode(num_object_clones, bl);
1911 ::encode(num_object_copies, bl);
1912 ::encode(num_objects_missing_on_primary, bl);
1913 ::encode(num_objects_degraded, bl);
1914 ::encode(num_objects_unfound, bl);
1915 ::encode(num_rd, bl);
1916 ::encode(num_rd_kb, bl);
1917 ::encode(num_wr, bl);
1918 ::encode(num_wr_kb, bl);
1919 ::encode(num_scrub_errors, bl);
1920 ::encode(num_objects_recovered, bl);
1921 ::encode(num_bytes_recovered, bl);
1922 ::encode(num_keys_recovered, bl);
1923 ::encode(num_shallow_scrub_errors, bl);
1924 ::encode(num_deep_scrub_errors, bl);
1925 ::encode(num_objects_dirty, bl);
1926 ::encode(num_whiteouts, bl);
1927 ::encode(num_objects_omap, bl);
1928 ::encode(num_objects_hit_set_archive, bl);
1929 ::encode(num_objects_misplaced, bl);
1930 ::encode(num_bytes_hit_set_archive, bl);
1931 ::encode(num_flush, bl);
1932 ::encode(num_flush_kb, bl);
1933 ::encode(num_evict, bl);
1934 ::encode(num_evict_kb, bl);
1935 ::encode(num_promote, bl);
1936 ::encode(num_flush_mode_high, bl);
1937 ::encode(num_flush_mode_low, bl);
1938 ::encode(num_evict_mode_some, bl);
1939 ::encode(num_evict_mode_full, bl);
1940 ::encode(num_objects_pinned, bl);
1941 ::encode(num_objects_missing, bl);
1942 ::encode(num_legacy_snapsets, bl);
1943#endif
1944 ENCODE_FINISH(bl);
1945}
1946
1947void object_stat_sum_t::decode(bufferlist::iterator& bl)
1948{
1949 bool decode_finish = false;
1950 DECODE_START(16, bl);
1951#if defined(CEPH_LITTLE_ENDIAN)
1952 if (struct_v >= 16) {
1953 bl.copy(sizeof(object_stat_sum_t), (char*)(&num_bytes));
1954 decode_finish = true;
1955 }
1956#endif
1957 if (!decode_finish) {
1958 ::decode(num_bytes, bl);
1959 ::decode(num_objects, bl);
1960 ::decode(num_object_clones, bl);
1961 ::decode(num_object_copies, bl);
1962 ::decode(num_objects_missing_on_primary, bl);
1963 ::decode(num_objects_degraded, bl);
1964 ::decode(num_objects_unfound, bl);
1965 ::decode(num_rd, bl);
1966 ::decode(num_rd_kb, bl);
1967 ::decode(num_wr, bl);
1968 ::decode(num_wr_kb, bl);
1969 ::decode(num_scrub_errors, bl);
1970 ::decode(num_objects_recovered, bl);
1971 ::decode(num_bytes_recovered, bl);
1972 ::decode(num_keys_recovered, bl);
1973 ::decode(num_shallow_scrub_errors, bl);
1974 ::decode(num_deep_scrub_errors, bl);
1975 ::decode(num_objects_dirty, bl);
1976 ::decode(num_whiteouts, bl);
1977 ::decode(num_objects_omap, bl);
1978 ::decode(num_objects_hit_set_archive, bl);
1979 ::decode(num_objects_misplaced, bl);
1980 ::decode(num_bytes_hit_set_archive, bl);
1981 ::decode(num_flush, bl);
1982 ::decode(num_flush_kb, bl);
1983 ::decode(num_evict, bl);
1984 ::decode(num_evict_kb, bl);
1985 ::decode(num_promote, bl);
1986 ::decode(num_flush_mode_high, bl);
1987 ::decode(num_flush_mode_low, bl);
1988 ::decode(num_evict_mode_some, bl);
1989 ::decode(num_evict_mode_full, bl);
1990 ::decode(num_objects_pinned, bl);
1991 ::decode(num_objects_missing, bl);
1992 if (struct_v >= 16) {
1993 ::decode(num_legacy_snapsets, bl);
1994 } else {
1995 num_legacy_snapsets = num_object_clones; // upper bound
1996 }
1997 }
1998 DECODE_FINISH(bl);
1999}
2000
2001void object_stat_sum_t::generate_test_instances(list<object_stat_sum_t*>& o)
2002{
2003 object_stat_sum_t a;
2004
2005 a.num_bytes = 1;
2006 a.num_objects = 3;
2007 a.num_object_clones = 4;
2008 a.num_object_copies = 5;
2009 a.num_objects_missing_on_primary = 6;
2010 a.num_objects_missing = 123;
2011 a.num_objects_degraded = 7;
2012 a.num_objects_unfound = 8;
2013 a.num_rd = 9; a.num_rd_kb = 10;
2014 a.num_wr = 11; a.num_wr_kb = 12;
2015 a.num_objects_recovered = 14;
2016 a.num_bytes_recovered = 15;
2017 a.num_keys_recovered = 16;
2018 a.num_deep_scrub_errors = 17;
2019 a.num_shallow_scrub_errors = 18;
2020 a.num_scrub_errors = a.num_deep_scrub_errors + a.num_shallow_scrub_errors;
2021 a.num_objects_dirty = 21;
2022 a.num_whiteouts = 22;
2023 a.num_objects_misplaced = 1232;
2024 a.num_objects_hit_set_archive = 2;
2025 a.num_bytes_hit_set_archive = 27;
2026 a.num_flush = 5;
2027 a.num_flush_kb = 6;
2028 a.num_evict = 7;
2029 a.num_evict_kb = 8;
2030 a.num_promote = 9;
2031 a.num_flush_mode_high = 0;
2032 a.num_flush_mode_low = 1;
2033 a.num_evict_mode_some = 1;
2034 a.num_evict_mode_full = 0;
2035 a.num_objects_pinned = 20;
2036 o.push_back(new object_stat_sum_t(a));
2037}
2038
2039void object_stat_sum_t::add(const object_stat_sum_t& o)
2040{
2041 num_bytes += o.num_bytes;
2042 num_objects += o.num_objects;
2043 num_object_clones += o.num_object_clones;
2044 num_object_copies += o.num_object_copies;
2045 num_objects_missing_on_primary += o.num_objects_missing_on_primary;
2046 num_objects_missing += o.num_objects_missing;
2047 num_objects_degraded += o.num_objects_degraded;
2048 num_objects_misplaced += o.num_objects_misplaced;
2049 num_rd += o.num_rd;
2050 num_rd_kb += o.num_rd_kb;
2051 num_wr += o.num_wr;
2052 num_wr_kb += o.num_wr_kb;
2053 num_objects_unfound += o.num_objects_unfound;
2054 num_scrub_errors += o.num_scrub_errors;
2055 num_shallow_scrub_errors += o.num_shallow_scrub_errors;
2056 num_deep_scrub_errors += o.num_deep_scrub_errors;
2057 num_objects_recovered += o.num_objects_recovered;
2058 num_bytes_recovered += o.num_bytes_recovered;
2059 num_keys_recovered += o.num_keys_recovered;
2060 num_objects_dirty += o.num_objects_dirty;
2061 num_whiteouts += o.num_whiteouts;
2062 num_objects_omap += o.num_objects_omap;
2063 num_objects_hit_set_archive += o.num_objects_hit_set_archive;
2064 num_bytes_hit_set_archive += o.num_bytes_hit_set_archive;
2065 num_flush += o.num_flush;
2066 num_flush_kb += o.num_flush_kb;
2067 num_evict += o.num_evict;
2068 num_evict_kb += o.num_evict_kb;
2069 num_promote += o.num_promote;
2070 num_flush_mode_high += o.num_flush_mode_high;
2071 num_flush_mode_low += o.num_flush_mode_low;
2072 num_evict_mode_some += o.num_evict_mode_some;
2073 num_evict_mode_full += o.num_evict_mode_full;
2074 num_objects_pinned += o.num_objects_pinned;
2075 num_legacy_snapsets += o.num_legacy_snapsets;
2076}
2077
2078void object_stat_sum_t::sub(const object_stat_sum_t& o)
2079{
2080 num_bytes -= o.num_bytes;
2081 num_objects -= o.num_objects;
2082 num_object_clones -= o.num_object_clones;
2083 num_object_copies -= o.num_object_copies;
2084 num_objects_missing_on_primary -= o.num_objects_missing_on_primary;
2085 num_objects_missing -= o.num_objects_missing;
2086 num_objects_degraded -= o.num_objects_degraded;
2087 num_objects_misplaced -= o.num_objects_misplaced;
2088 num_rd -= o.num_rd;
2089 num_rd_kb -= o.num_rd_kb;
2090 num_wr -= o.num_wr;
2091 num_wr_kb -= o.num_wr_kb;
2092 num_objects_unfound -= o.num_objects_unfound;
2093 num_scrub_errors -= o.num_scrub_errors;
2094 num_shallow_scrub_errors -= o.num_shallow_scrub_errors;
2095 num_deep_scrub_errors -= o.num_deep_scrub_errors;
2096 num_objects_recovered -= o.num_objects_recovered;
2097 num_bytes_recovered -= o.num_bytes_recovered;
2098 num_keys_recovered -= o.num_keys_recovered;
2099 num_objects_dirty -= o.num_objects_dirty;
2100 num_whiteouts -= o.num_whiteouts;
2101 num_objects_omap -= o.num_objects_omap;
2102 num_objects_hit_set_archive -= o.num_objects_hit_set_archive;
2103 num_bytes_hit_set_archive -= o.num_bytes_hit_set_archive;
2104 num_flush -= o.num_flush;
2105 num_flush_kb -= o.num_flush_kb;
2106 num_evict -= o.num_evict;
2107 num_evict_kb -= o.num_evict_kb;
2108 num_promote -= o.num_promote;
2109 num_flush_mode_high -= o.num_flush_mode_high;
2110 num_flush_mode_low -= o.num_flush_mode_low;
2111 num_evict_mode_some -= o.num_evict_mode_some;
2112 num_evict_mode_full -= o.num_evict_mode_full;
2113 num_objects_pinned -= o.num_objects_pinned;
2114 num_legacy_snapsets -= o.num_legacy_snapsets;
2115}
2116
2117bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r)
2118{
2119 return
2120 l.num_bytes == r.num_bytes &&
2121 l.num_objects == r.num_objects &&
2122 l.num_object_clones == r.num_object_clones &&
2123 l.num_object_copies == r.num_object_copies &&
2124 l.num_objects_missing_on_primary == r.num_objects_missing_on_primary &&
2125 l.num_objects_missing == r.num_objects_missing &&
2126 l.num_objects_degraded == r.num_objects_degraded &&
2127 l.num_objects_misplaced == r.num_objects_misplaced &&
2128 l.num_objects_unfound == r.num_objects_unfound &&
2129 l.num_rd == r.num_rd &&
2130 l.num_rd_kb == r.num_rd_kb &&
2131 l.num_wr == r.num_wr &&
2132 l.num_wr_kb == r.num_wr_kb &&
2133 l.num_scrub_errors == r.num_scrub_errors &&
2134 l.num_shallow_scrub_errors == r.num_shallow_scrub_errors &&
2135 l.num_deep_scrub_errors == r.num_deep_scrub_errors &&
2136 l.num_objects_recovered == r.num_objects_recovered &&
2137 l.num_bytes_recovered == r.num_bytes_recovered &&
2138 l.num_keys_recovered == r.num_keys_recovered &&
2139 l.num_objects_dirty == r.num_objects_dirty &&
2140 l.num_whiteouts == r.num_whiteouts &&
2141 l.num_objects_omap == r.num_objects_omap &&
2142 l.num_objects_hit_set_archive == r.num_objects_hit_set_archive &&
2143 l.num_bytes_hit_set_archive == r.num_bytes_hit_set_archive &&
2144 l.num_flush == r.num_flush &&
2145 l.num_flush_kb == r.num_flush_kb &&
2146 l.num_evict == r.num_evict &&
2147 l.num_evict_kb == r.num_evict_kb &&
2148 l.num_promote == r.num_promote &&
2149 l.num_flush_mode_high == r.num_flush_mode_high &&
2150 l.num_flush_mode_low == r.num_flush_mode_low &&
2151 l.num_evict_mode_some == r.num_evict_mode_some &&
2152 l.num_evict_mode_full == r.num_evict_mode_full &&
2153 l.num_objects_pinned == r.num_objects_pinned &&
2154 l.num_legacy_snapsets == r.num_legacy_snapsets;
2155}
2156
2157// -- object_stat_collection_t --
2158
2159void object_stat_collection_t::dump(Formatter *f) const
2160{
2161 f->open_object_section("stat_sum");
2162 sum.dump(f);
2163 f->close_section();
2164}
2165
2166void object_stat_collection_t::encode(bufferlist& bl) const
2167{
2168 ENCODE_START(2, 2, bl);
2169 ::encode(sum, bl);
2170 ::encode((__u32)0, bl);
2171 ENCODE_FINISH(bl);
2172}
2173
2174void object_stat_collection_t::decode(bufferlist::iterator& bl)
2175{
2176 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
2177 ::decode(sum, bl);
2178 {
2179 map<string,object_stat_sum_t> cat_sum;
2180 ::decode(cat_sum, bl);
2181 }
2182 DECODE_FINISH(bl);
2183}
2184
2185void object_stat_collection_t::generate_test_instances(list<object_stat_collection_t*>& o)
2186{
2187 object_stat_collection_t a;
2188 o.push_back(new object_stat_collection_t(a));
2189 list<object_stat_sum_t*> l;
2190 object_stat_sum_t::generate_test_instances(l);
2191 for (list<object_stat_sum_t*>::iterator p = l.begin(); p != l.end(); ++p) {
2192 a.add(**p);
2193 o.push_back(new object_stat_collection_t(a));
2194 }
2195}
2196
2197
2198// -- pg_stat_t --
2199
2200bool pg_stat_t::is_acting_osd(int32_t osd, bool primary) const
2201{
2202 if (primary && osd == acting_primary) {
2203 return true;
2204 } else if (!primary) {
2205 for(vector<int32_t>::const_iterator it = acting.begin();
2206 it != acting.end(); ++it)
2207 {
2208 if (*it == osd)
2209 return true;
2210 }
2211 }
2212 return false;
2213}
2214
2215void pg_stat_t::dump(Formatter *f) const
2216{
2217 f->dump_stream("version") << version;
2218 f->dump_stream("reported_seq") << reported_seq;
2219 f->dump_stream("reported_epoch") << reported_epoch;
2220 f->dump_string("state", pg_state_string(state));
2221 f->dump_stream("last_fresh") << last_fresh;
2222 f->dump_stream("last_change") << last_change;
2223 f->dump_stream("last_active") << last_active;
2224 f->dump_stream("last_peered") << last_peered;
2225 f->dump_stream("last_clean") << last_clean;
2226 f->dump_stream("last_became_active") << last_became_active;
2227 f->dump_stream("last_became_peered") << last_became_peered;
2228 f->dump_stream("last_unstale") << last_unstale;
2229 f->dump_stream("last_undegraded") << last_undegraded;
2230 f->dump_stream("last_fullsized") << last_fullsized;
2231 f->dump_unsigned("mapping_epoch", mapping_epoch);
2232 f->dump_stream("log_start") << log_start;
2233 f->dump_stream("ondisk_log_start") << ondisk_log_start;
2234 f->dump_unsigned("created", created);
2235 f->dump_unsigned("last_epoch_clean", last_epoch_clean);
2236 f->dump_stream("parent") << parent;
2237 f->dump_unsigned("parent_split_bits", parent_split_bits);
2238 f->dump_stream("last_scrub") << last_scrub;
2239 f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
2240 f->dump_stream("last_deep_scrub") << last_deep_scrub;
2241 f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
2242 f->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp;
2243 f->dump_int("log_size", log_size);
2244 f->dump_int("ondisk_log_size", ondisk_log_size);
2245 f->dump_bool("stats_invalid", stats_invalid);
2246 f->dump_bool("dirty_stats_invalid", dirty_stats_invalid);
2247 f->dump_bool("omap_stats_invalid", omap_stats_invalid);
2248 f->dump_bool("hitset_stats_invalid", hitset_stats_invalid);
2249 f->dump_bool("hitset_bytes_stats_invalid", hitset_bytes_stats_invalid);
2250 f->dump_bool("pin_stats_invalid", pin_stats_invalid);
2251 stats.dump(f);
2252 f->open_array_section("up");
2253 for (vector<int32_t>::const_iterator p = up.begin(); p != up.end(); ++p)
2254 f->dump_int("osd", *p);
2255 f->close_section();
2256 f->open_array_section("acting");
2257 for (vector<int32_t>::const_iterator p = acting.begin(); p != acting.end(); ++p)
2258 f->dump_int("osd", *p);
2259 f->close_section();
2260 f->open_array_section("blocked_by");
2261 for (vector<int32_t>::const_iterator p = blocked_by.begin();
2262 p != blocked_by.end(); ++p)
2263 f->dump_int("osd", *p);
2264 f->close_section();
2265 f->dump_int("up_primary", up_primary);
2266 f->dump_int("acting_primary", acting_primary);
2267}
2268
2269void pg_stat_t::dump_brief(Formatter *f) const
2270{
2271 f->dump_string("state", pg_state_string(state));
2272 f->open_array_section("up");
2273 for (vector<int32_t>::const_iterator p = up.begin(); p != up.end(); ++p)
2274 f->dump_int("osd", *p);
2275 f->close_section();
2276 f->open_array_section("acting");
2277 for (vector<int32_t>::const_iterator p = acting.begin(); p != acting.end(); ++p)
2278 f->dump_int("osd", *p);
2279 f->close_section();
2280 f->dump_int("up_primary", up_primary);
2281 f->dump_int("acting_primary", acting_primary);
2282}
2283
2284void pg_stat_t::encode(bufferlist &bl) const
2285{
2286 ENCODE_START(22, 22, bl);
2287 ::encode(version, bl);
2288 ::encode(reported_seq, bl);
2289 ::encode(reported_epoch, bl);
2290 ::encode(state, bl);
2291 ::encode(log_start, bl);
2292 ::encode(ondisk_log_start, bl);
2293 ::encode(created, bl);
2294 ::encode(last_epoch_clean, bl);
2295 ::encode(parent, bl);
2296 ::encode(parent_split_bits, bl);
2297 ::encode(last_scrub, bl);
2298 ::encode(last_scrub_stamp, bl);
2299 ::encode(stats, bl);
2300 ::encode(log_size, bl);
2301 ::encode(ondisk_log_size, bl);
2302 ::encode(up, bl);
2303 ::encode(acting, bl);
2304 ::encode(last_fresh, bl);
2305 ::encode(last_change, bl);
2306 ::encode(last_active, bl);
2307 ::encode(last_clean, bl);
2308 ::encode(last_unstale, bl);
2309 ::encode(mapping_epoch, bl);
2310 ::encode(last_deep_scrub, bl);
2311 ::encode(last_deep_scrub_stamp, bl);
2312 ::encode(stats_invalid, bl);
2313 ::encode(last_clean_scrub_stamp, bl);
2314 ::encode(last_became_active, bl);
2315 ::encode(dirty_stats_invalid, bl);
2316 ::encode(up_primary, bl);
2317 ::encode(acting_primary, bl);
2318 ::encode(omap_stats_invalid, bl);
2319 ::encode(hitset_stats_invalid, bl);
2320 ::encode(blocked_by, bl);
2321 ::encode(last_undegraded, bl);
2322 ::encode(last_fullsized, bl);
2323 ::encode(hitset_bytes_stats_invalid, bl);
2324 ::encode(last_peered, bl);
2325 ::encode(last_became_peered, bl);
2326 ::encode(pin_stats_invalid, bl);
2327 ENCODE_FINISH(bl);
2328}
2329
2330void pg_stat_t::decode(bufferlist::iterator &bl)
2331{
2332 bool tmp;
2333 DECODE_START(22, bl);
2334 ::decode(version, bl);
2335 ::decode(reported_seq, bl);
2336 ::decode(reported_epoch, bl);
2337 ::decode(state, bl);
2338 ::decode(log_start, bl);
2339 ::decode(ondisk_log_start, bl);
2340 ::decode(created, bl);
2341 ::decode(last_epoch_clean, bl);
2342 ::decode(parent, bl);
2343 ::decode(parent_split_bits, bl);
2344 ::decode(last_scrub, bl);
2345 ::decode(last_scrub_stamp, bl);
2346 ::decode(stats, bl);
2347 ::decode(log_size, bl);
2348 ::decode(ondisk_log_size, bl);
2349 ::decode(up, bl);
2350 ::decode(acting, bl);
2351 ::decode(last_fresh, bl);
2352 ::decode(last_change, bl);
2353 ::decode(last_active, bl);
2354 ::decode(last_clean, bl);
2355 ::decode(last_unstale, bl);
2356 ::decode(mapping_epoch, bl);
2357 ::decode(last_deep_scrub, bl);
2358 ::decode(last_deep_scrub_stamp, bl);
2359 ::decode(tmp, bl);
2360 stats_invalid = tmp;
2361 ::decode(last_clean_scrub_stamp, bl);
2362 ::decode(last_became_active, bl);
2363 ::decode(tmp, bl);
2364 dirty_stats_invalid = tmp;
2365 ::decode(up_primary, bl);
2366 ::decode(acting_primary, bl);
2367 ::decode(tmp, bl);
2368 omap_stats_invalid = tmp;
2369 ::decode(tmp, bl);
2370 hitset_stats_invalid = tmp;
2371 ::decode(blocked_by, bl);
2372 ::decode(last_undegraded, bl);
2373 ::decode(last_fullsized, bl);
2374 ::decode(tmp, bl);
2375 hitset_bytes_stats_invalid = tmp;
2376 ::decode(last_peered, bl);
2377 ::decode(last_became_peered, bl);
2378 ::decode(tmp, bl);
2379 pin_stats_invalid = tmp;
2380 DECODE_FINISH(bl);
2381}
2382
2383void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o)
2384{
2385 pg_stat_t a;
2386 o.push_back(new pg_stat_t(a));
2387
2388 a.version = eversion_t(1, 3);
2389 a.reported_epoch = 1;
2390 a.reported_seq = 2;
2391 a.state = 123;
2392 a.mapping_epoch = 998;
2393 a.last_fresh = utime_t(1002, 1);
2394 a.last_change = utime_t(1002, 2);
2395 a.last_active = utime_t(1002, 3);
2396 a.last_clean = utime_t(1002, 4);
2397 a.last_unstale = utime_t(1002, 5);
2398 a.last_undegraded = utime_t(1002, 7);
2399 a.last_fullsized = utime_t(1002, 8);
2400 a.log_start = eversion_t(1, 4);
2401 a.ondisk_log_start = eversion_t(1, 5);
2402 a.created = 6;
2403 a.last_epoch_clean = 7;
2404 a.parent = pg_t(1, 2, 3);
2405 a.parent_split_bits = 12;
2406 a.last_scrub = eversion_t(9, 10);
2407 a.last_scrub_stamp = utime_t(11, 12);
2408 a.last_deep_scrub = eversion_t(13, 14);
2409 a.last_deep_scrub_stamp = utime_t(15, 16);
2410 a.last_clean_scrub_stamp = utime_t(17, 18);
2411 list<object_stat_collection_t*> l;
2412 object_stat_collection_t::generate_test_instances(l);
2413 a.stats = *l.back();
2414 a.log_size = 99;
2415 a.ondisk_log_size = 88;
2416 a.up.push_back(123);
2417 a.up_primary = 123;
2418 a.acting.push_back(456);
2419 a.acting_primary = 456;
2420 o.push_back(new pg_stat_t(a));
2421
2422 a.up.push_back(124);
2423 a.up_primary = 124;
2424 a.acting.push_back(124);
2425 a.acting_primary = 124;
2426 a.blocked_by.push_back(155);
2427 a.blocked_by.push_back(156);
2428 o.push_back(new pg_stat_t(a));
2429}
2430
2431bool operator==(const pg_stat_t& l, const pg_stat_t& r)
2432{
2433 return
2434 l.version == r.version &&
2435 l.reported_seq == r.reported_seq &&
2436 l.reported_epoch == r.reported_epoch &&
2437 l.state == r.state &&
2438 l.last_fresh == r.last_fresh &&
2439 l.last_change == r.last_change &&
2440 l.last_active == r.last_active &&
2441 l.last_peered == r.last_peered &&
2442 l.last_clean == r.last_clean &&
2443 l.last_unstale == r.last_unstale &&
2444 l.last_undegraded == r.last_undegraded &&
2445 l.last_fullsized == r.last_fullsized &&
2446 l.log_start == r.log_start &&
2447 l.ondisk_log_start == r.ondisk_log_start &&
2448 l.created == r.created &&
2449 l.last_epoch_clean == r.last_epoch_clean &&
2450 l.parent == r.parent &&
2451 l.parent_split_bits == r.parent_split_bits &&
2452 l.last_scrub == r.last_scrub &&
2453 l.last_deep_scrub == r.last_deep_scrub &&
2454 l.last_scrub_stamp == r.last_scrub_stamp &&
2455 l.last_deep_scrub_stamp == r.last_deep_scrub_stamp &&
2456 l.last_clean_scrub_stamp == r.last_clean_scrub_stamp &&
2457 l.stats == r.stats &&
2458 l.stats_invalid == r.stats_invalid &&
2459 l.log_size == r.log_size &&
2460 l.ondisk_log_size == r.ondisk_log_size &&
2461 l.up == r.up &&
2462 l.acting == r.acting &&
2463 l.mapping_epoch == r.mapping_epoch &&
2464 l.blocked_by == r.blocked_by &&
2465 l.last_became_active == r.last_became_active &&
2466 l.last_became_peered == r.last_became_peered &&
2467 l.dirty_stats_invalid == r.dirty_stats_invalid &&
2468 l.omap_stats_invalid == r.omap_stats_invalid &&
2469 l.hitset_stats_invalid == r.hitset_stats_invalid &&
2470 l.hitset_bytes_stats_invalid == r.hitset_bytes_stats_invalid &&
2471 l.up_primary == r.up_primary &&
2472 l.acting_primary == r.acting_primary &&
2473 l.pin_stats_invalid == r.pin_stats_invalid;
2474}
2475
2476// -- pool_stat_t --
2477
2478void pool_stat_t::dump(Formatter *f) const
2479{
2480 stats.dump(f);
2481 f->dump_int("log_size", log_size);
2482 f->dump_int("ondisk_log_size", ondisk_log_size);
2483 f->dump_int("up", up);
2484 f->dump_int("acting", acting);
2485}
2486
2487void pool_stat_t::encode(bufferlist &bl, uint64_t features) const
2488{
2489 if ((features & CEPH_FEATURE_OSDENC) == 0) {
2490 __u8 v = 4;
2491 ::encode(v, bl);
2492 ::encode(stats, bl);
2493 ::encode(log_size, bl);
2494 ::encode(ondisk_log_size, bl);
2495 return;
2496 }
2497
2498 ENCODE_START(6, 5, bl);
2499 ::encode(stats, bl);
2500 ::encode(log_size, bl);
2501 ::encode(ondisk_log_size, bl);
2502 ::encode(up, bl);
2503 ::encode(acting, bl);
2504 ENCODE_FINISH(bl);
2505}
2506
2507void pool_stat_t::decode(bufferlist::iterator &bl)
2508{
2509 DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl);
2510 if (struct_v >= 4) {
2511 ::decode(stats, bl);
2512 ::decode(log_size, bl);
2513 ::decode(ondisk_log_size, bl);
2514 if (struct_v >= 6) {
2515 ::decode(up, bl);
2516 ::decode(acting, bl);
2517 } else {
2518 up = 0;
2519 acting = 0;
2520 }
2521 } else {
2522 ::decode(stats.sum.num_bytes, bl);
2523 uint64_t num_kb;
2524 ::decode(num_kb, bl);
2525 ::decode(stats.sum.num_objects, bl);
2526 ::decode(stats.sum.num_object_clones, bl);
2527 ::decode(stats.sum.num_object_copies, bl);
2528 ::decode(stats.sum.num_objects_missing_on_primary, bl);
2529 ::decode(stats.sum.num_objects_degraded, bl);
2530 ::decode(log_size, bl);
2531 ::decode(ondisk_log_size, bl);
2532 if (struct_v >= 2) {
2533 ::decode(stats.sum.num_rd, bl);
2534 ::decode(stats.sum.num_rd_kb, bl);
2535 ::decode(stats.sum.num_wr, bl);
2536 ::decode(stats.sum.num_wr_kb, bl);
2537 }
2538 if (struct_v >= 3) {
2539 ::decode(stats.sum.num_objects_unfound, bl);
2540 }
2541 }
2542 DECODE_FINISH(bl);
2543}
2544
2545void pool_stat_t::generate_test_instances(list<pool_stat_t*>& o)
2546{
2547 pool_stat_t a;
2548 o.push_back(new pool_stat_t(a));
2549
2550 list<object_stat_collection_t*> l;
2551 object_stat_collection_t::generate_test_instances(l);
2552 a.stats = *l.back();
2553 a.log_size = 123;
2554 a.ondisk_log_size = 456;
2555 a.acting = 3;
2556 a.up = 4;
2557 o.push_back(new pool_stat_t(a));
2558}
2559
2560
2561// -- pg_history_t --
2562
2563void pg_history_t::encode(bufferlist &bl) const
2564{
31f18b77 2565 ENCODE_START(9, 4, bl);
7c673cae
FG
2566 ::encode(epoch_created, bl);
2567 ::encode(last_epoch_started, bl);
2568 ::encode(last_epoch_clean, bl);
2569 ::encode(last_epoch_split, bl);
2570 ::encode(same_interval_since, bl);
2571 ::encode(same_up_since, bl);
2572 ::encode(same_primary_since, bl);
2573 ::encode(last_scrub, bl);
2574 ::encode(last_scrub_stamp, bl);
2575 ::encode(last_deep_scrub, bl);
2576 ::encode(last_deep_scrub_stamp, bl);
2577 ::encode(last_clean_scrub_stamp, bl);
2578 ::encode(last_epoch_marked_full, bl);
2579 ::encode(last_interval_started, bl);
2580 ::encode(last_interval_clean, bl);
31f18b77 2581 ::encode(epoch_pool_created, bl);
7c673cae
FG
2582 ENCODE_FINISH(bl);
2583}
2584
2585void pg_history_t::decode(bufferlist::iterator &bl)
2586{
31f18b77 2587 DECODE_START_LEGACY_COMPAT_LEN(9, 4, 4, bl);
7c673cae
FG
2588 ::decode(epoch_created, bl);
2589 ::decode(last_epoch_started, bl);
2590 if (struct_v >= 3)
2591 ::decode(last_epoch_clean, bl);
2592 else
2593 last_epoch_clean = last_epoch_started; // careful, it's a lie!
2594 ::decode(last_epoch_split, bl);
2595 ::decode(same_interval_since, bl);
2596 ::decode(same_up_since, bl);
2597 ::decode(same_primary_since, bl);
2598 if (struct_v >= 2) {
2599 ::decode(last_scrub, bl);
2600 ::decode(last_scrub_stamp, bl);
2601 }
2602 if (struct_v >= 5) {
2603 ::decode(last_deep_scrub, bl);
2604 ::decode(last_deep_scrub_stamp, bl);
2605 }
2606 if (struct_v >= 6) {
2607 ::decode(last_clean_scrub_stamp, bl);
2608 }
2609 if (struct_v >= 7) {
2610 ::decode(last_epoch_marked_full, bl);
2611 }
2612 if (struct_v >= 8) {
2613 ::decode(last_interval_started, bl);
2614 ::decode(last_interval_clean, bl);
2615 } else {
2616 if (last_epoch_started >= same_interval_since) {
2617 last_interval_started = same_interval_since;
2618 } else {
2619 last_interval_started = last_epoch_started; // best guess
2620 }
2621 if (last_epoch_clean >= same_interval_since) {
2622 last_interval_clean = same_interval_since;
2623 } else {
2624 last_interval_clean = last_epoch_clean; // best guess
2625 }
2626 }
31f18b77
FG
2627 if (struct_v >= 9) {
2628 ::decode(epoch_pool_created, bl);
2629 } else {
2630 epoch_pool_created = epoch_created;
2631 }
7c673cae
FG
2632 DECODE_FINISH(bl);
2633}
2634
2635void pg_history_t::dump(Formatter *f) const
2636{
2637 f->dump_int("epoch_created", epoch_created);
31f18b77 2638 f->dump_int("epoch_pool_created", epoch_pool_created);
7c673cae
FG
2639 f->dump_int("last_epoch_started", last_epoch_started);
2640 f->dump_int("last_interval_started", last_interval_started);
2641 f->dump_int("last_epoch_clean", last_epoch_clean);
2642 f->dump_int("last_interval_clean", last_interval_clean);
2643 f->dump_int("last_epoch_split", last_epoch_split);
2644 f->dump_int("last_epoch_marked_full", last_epoch_marked_full);
2645 f->dump_int("same_up_since", same_up_since);
2646 f->dump_int("same_interval_since", same_interval_since);
2647 f->dump_int("same_primary_since", same_primary_since);
2648 f->dump_stream("last_scrub") << last_scrub;
2649 f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
2650 f->dump_stream("last_deep_scrub") << last_deep_scrub;
2651 f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
2652 f->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp;
2653}
2654
2655void pg_history_t::generate_test_instances(list<pg_history_t*>& o)
2656{
2657 o.push_back(new pg_history_t);
2658 o.push_back(new pg_history_t);
2659 o.back()->epoch_created = 1;
31f18b77 2660 o.back()->epoch_pool_created = 1;
7c673cae
FG
2661 o.back()->last_epoch_started = 2;
2662 o.back()->last_interval_started = 2;
2663 o.back()->last_epoch_clean = 3;
2664 o.back()->last_interval_clean = 2;
2665 o.back()->last_epoch_split = 4;
2666 o.back()->same_up_since = 5;
2667 o.back()->same_interval_since = 6;
2668 o.back()->same_primary_since = 7;
2669 o.back()->last_scrub = eversion_t(8, 9);
2670 o.back()->last_scrub_stamp = utime_t(10, 11);
2671 o.back()->last_deep_scrub = eversion_t(12, 13);
2672 o.back()->last_deep_scrub_stamp = utime_t(14, 15);
2673 o.back()->last_clean_scrub_stamp = utime_t(16, 17);
2674 o.back()->last_epoch_marked_full = 18;
2675}
2676
2677
2678// -- pg_info_t --
2679
2680void pg_info_t::encode(bufferlist &bl) const
2681{
2682 ENCODE_START(32, 26, bl);
2683 ::encode(pgid.pgid, bl);
2684 ::encode(last_update, bl);
2685 ::encode(last_complete, bl);
2686 ::encode(log_tail, bl);
2687 if (last_backfill_bitwise && !last_backfill.is_max()) {
2688 ::encode(hobject_t(), bl);
2689 } else {
2690 ::encode(last_backfill, bl);
2691 }
2692 ::encode(stats, bl);
2693 history.encode(bl);
2694 ::encode(purged_snaps, bl);
2695 ::encode(last_epoch_started, bl);
2696 ::encode(last_user_version, bl);
2697 ::encode(hit_set, bl);
2698 ::encode(pgid.shard, bl);
2699 ::encode(last_backfill, bl);
2700 ::encode(last_backfill_bitwise, bl);
2701 ::encode(last_interval_started, bl);
2702 ENCODE_FINISH(bl);
2703}
2704
2705void pg_info_t::decode(bufferlist::iterator &bl)
2706{
2707 DECODE_START(32, bl);
2708 ::decode(pgid.pgid, bl);
2709 ::decode(last_update, bl);
2710 ::decode(last_complete, bl);
2711 ::decode(log_tail, bl);
2712 {
2713 hobject_t old_last_backfill;
2714 ::decode(old_last_backfill, bl);
2715 }
2716 ::decode(stats, bl);
2717 history.decode(bl);
2718 ::decode(purged_snaps, bl);
2719 ::decode(last_epoch_started, bl);
2720 ::decode(last_user_version, bl);
2721 ::decode(hit_set, bl);
2722 ::decode(pgid.shard, bl);
2723 ::decode(last_backfill, bl);
2724 ::decode(last_backfill_bitwise, bl);
2725 if (struct_v >= 32) {
2726 ::decode(last_interval_started, bl);
2727 } else {
2728 last_interval_started = last_epoch_started;
2729 }
2730 DECODE_FINISH(bl);
2731}
2732
2733// -- pg_info_t --
2734
2735void pg_info_t::dump(Formatter *f) const
2736{
2737 f->dump_stream("pgid") << pgid;
2738 f->dump_stream("last_update") << last_update;
2739 f->dump_stream("last_complete") << last_complete;
2740 f->dump_stream("log_tail") << log_tail;
2741 f->dump_int("last_user_version", last_user_version);
2742 f->dump_stream("last_backfill") << last_backfill;
2743 f->dump_int("last_backfill_bitwise", (int)last_backfill_bitwise);
2744 f->open_array_section("purged_snaps");
2745 for (interval_set<snapid_t>::const_iterator i=purged_snaps.begin();
2746 i != purged_snaps.end();
2747 ++i) {
2748 f->open_object_section("purged_snap_interval");
2749 f->dump_stream("start") << i.get_start();
2750 f->dump_stream("length") << i.get_len();
2751 f->close_section();
2752 }
2753 f->close_section();
2754 f->open_object_section("history");
2755 history.dump(f);
2756 f->close_section();
2757 f->open_object_section("stats");
2758 stats.dump(f);
2759 f->close_section();
2760
2761 f->dump_int("empty", is_empty());
2762 f->dump_int("dne", dne());
2763 f->dump_int("incomplete", is_incomplete());
2764 f->dump_int("last_epoch_started", last_epoch_started);
2765
2766 f->open_object_section("hit_set_history");
2767 hit_set.dump(f);
2768 f->close_section();
2769}
2770
2771void pg_info_t::generate_test_instances(list<pg_info_t*>& o)
2772{
2773 o.push_back(new pg_info_t);
2774 o.push_back(new pg_info_t);
2775 list<pg_history_t*> h;
2776 pg_history_t::generate_test_instances(h);
2777 o.back()->history = *h.back();
2778 o.back()->pgid = spg_t(pg_t(1, 2, -1), shard_id_t::NO_SHARD);
2779 o.back()->last_update = eversion_t(3, 4);
2780 o.back()->last_complete = eversion_t(5, 6);
2781 o.back()->last_user_version = 2;
2782 o.back()->log_tail = eversion_t(7, 8);
2783 o.back()->last_backfill = hobject_t(object_t("objname"), "key", 123, 456, -1, "");
2784 o.back()->last_backfill_bitwise = true;
2785 {
2786 list<pg_stat_t*> s;
2787 pg_stat_t::generate_test_instances(s);
2788 o.back()->stats = *s.back();
2789 }
2790 {
2791 list<pg_hit_set_history_t*> s;
2792 pg_hit_set_history_t::generate_test_instances(s);
2793 o.back()->hit_set = *s.back();
2794 }
2795}
2796
2797// -- pg_notify_t --
2798void pg_notify_t::encode(bufferlist &bl) const
2799{
2800 ENCODE_START(2, 2, bl);
2801 ::encode(query_epoch, bl);
2802 ::encode(epoch_sent, bl);
2803 ::encode(info, bl);
2804 ::encode(to, bl);
2805 ::encode(from, bl);
2806 ENCODE_FINISH(bl);
2807}
2808
2809void pg_notify_t::decode(bufferlist::iterator &bl)
2810{
2811 DECODE_START(2, bl);
2812 ::decode(query_epoch, bl);
2813 ::decode(epoch_sent, bl);
2814 ::decode(info, bl);
2815 ::decode(to, bl);
2816 ::decode(from, bl);
2817 DECODE_FINISH(bl);
2818}
2819
2820void pg_notify_t::dump(Formatter *f) const
2821{
2822 f->dump_int("from", from);
2823 f->dump_int("to", to);
2824 f->dump_unsigned("query_epoch", query_epoch);
2825 f->dump_unsigned("epoch_sent", epoch_sent);
2826 {
2827 f->open_object_section("info");
2828 info.dump(f);
2829 f->close_section();
2830 }
2831}
2832
2833void pg_notify_t::generate_test_instances(list<pg_notify_t*>& o)
2834{
2835 o.push_back(new pg_notify_t(shard_id_t(3), shard_id_t::NO_SHARD, 1, 1, pg_info_t()));
2836 o.push_back(new pg_notify_t(shard_id_t(0), shard_id_t(0), 3, 10, pg_info_t()));
2837}
2838
2839ostream &operator<<(ostream &lhs, const pg_notify_t &notify)
2840{
2841 lhs << "(query:" << notify.query_epoch
2842 << " sent:" << notify.epoch_sent
2843 << " " << notify.info;
2844 if (notify.from != shard_id_t::NO_SHARD ||
2845 notify.to != shard_id_t::NO_SHARD)
2846 lhs << " " << (unsigned)notify.from
2847 << "->" << (unsigned)notify.to;
2848 return lhs << ")";
2849}
2850
2851// -- pg_interval_t --
2852
2853void PastIntervals::pg_interval_t::encode(bufferlist& bl) const
2854{
2855 ENCODE_START(4, 2, bl);
2856 ::encode(first, bl);
2857 ::encode(last, bl);
2858 ::encode(up, bl);
2859 ::encode(acting, bl);
2860 ::encode(maybe_went_rw, bl);
2861 ::encode(primary, bl);
2862 ::encode(up_primary, bl);
2863 ENCODE_FINISH(bl);
2864}
2865
2866void PastIntervals::pg_interval_t::decode(bufferlist::iterator& bl)
2867{
2868 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
2869 ::decode(first, bl);
2870 ::decode(last, bl);
2871 ::decode(up, bl);
2872 ::decode(acting, bl);
2873 ::decode(maybe_went_rw, bl);
2874 if (struct_v >= 3) {
2875 ::decode(primary, bl);
2876 } else {
2877 if (acting.size())
2878 primary = acting[0];
2879 }
2880 if (struct_v >= 4) {
2881 ::decode(up_primary, bl);
2882 } else {
2883 if (up.size())
2884 up_primary = up[0];
2885 }
2886 DECODE_FINISH(bl);
2887}
2888
2889void PastIntervals::pg_interval_t::dump(Formatter *f) const
2890{
2891 f->dump_unsigned("first", first);
2892 f->dump_unsigned("last", last);
2893 f->dump_int("maybe_went_rw", maybe_went_rw ? 1 : 0);
2894 f->open_array_section("up");
2895 for (vector<int>::const_iterator p = up.begin(); p != up.end(); ++p)
2896 f->dump_int("osd", *p);
2897 f->close_section();
2898 f->open_array_section("acting");
2899 for (vector<int>::const_iterator p = acting.begin(); p != acting.end(); ++p)
2900 f->dump_int("osd", *p);
2901 f->close_section();
2902 f->dump_int("primary", primary);
2903 f->dump_int("up_primary", up_primary);
2904}
2905
2906void PastIntervals::pg_interval_t::generate_test_instances(list<pg_interval_t*>& o)
2907{
2908 o.push_back(new pg_interval_t);
2909 o.push_back(new pg_interval_t);
2910 o.back()->up.push_back(1);
2911 o.back()->acting.push_back(2);
2912 o.back()->acting.push_back(3);
2913 o.back()->first = 4;
2914 o.back()->last = 5;
2915 o.back()->maybe_went_rw = true;
2916}
2917
2918WRITE_CLASS_ENCODER(PastIntervals::pg_interval_t)
2919
2920class pi_simple_rep : public PastIntervals::interval_rep {
2921 map<epoch_t, PastIntervals::pg_interval_t> interval_map;
2922
2923 pi_simple_rep(
2924 bool ec_pool,
2925 std::list<PastIntervals::pg_interval_t> &&intervals) {
2926 for (auto &&i: intervals)
2927 add_interval(ec_pool, i);
2928 }
2929
2930public:
2931 pi_simple_rep() = default;
2932 pi_simple_rep(const pi_simple_rep &) = default;
2933 pi_simple_rep(pi_simple_rep &&) = default;
2934 pi_simple_rep &operator=(pi_simple_rep &&) = default;
2935 pi_simple_rep &operator=(const pi_simple_rep &) = default;
2936
2937 size_t size() const override { return interval_map.size(); }
2938 bool empty() const override { return interval_map.empty(); }
2939 void clear() override { interval_map.clear(); }
2940 pair<epoch_t, epoch_t> get_bounds() const override {
2941 auto iter = interval_map.begin();
2942 if (iter != interval_map.end()) {
2943 auto riter = interval_map.rbegin();
2944 return make_pair(
2945 iter->second.first,
2946 riter->second.last + 1);
2947 } else {
2948 return make_pair(0, 0);
2949 }
2950 }
2951 set<pg_shard_t> get_all_participants(
2952 bool ec_pool) const override {
2953 set<pg_shard_t> all_participants;
2954
2955 // We need to decide who might have unfound objects that we need
2956 auto p = interval_map.rbegin();
2957 auto end = interval_map.rend();
2958 for (; p != end; ++p) {
2959 const PastIntervals::pg_interval_t &interval(p->second);
2960 // If nothing changed, we don't care about this interval.
2961 if (!interval.maybe_went_rw)
2962 continue;
2963
2964 int i = 0;
2965 std::vector<int>::const_iterator a = interval.acting.begin();
2966 std::vector<int>::const_iterator a_end = interval.acting.end();
2967 for (; a != a_end; ++a, ++i) {
2968 pg_shard_t shard(*a, ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD);
2969 if (*a != CRUSH_ITEM_NONE)
2970 all_participants.insert(shard);
2971 }
2972 }
2973 return all_participants;
2974 }
2975 void add_interval(
2976 bool ec_pool,
2977 const PastIntervals::pg_interval_t &interval) override {
2978 interval_map[interval.first] = interval;
2979 }
2980 unique_ptr<PastIntervals::interval_rep> clone() const override {
2981 return unique_ptr<PastIntervals::interval_rep>(new pi_simple_rep(*this));
2982 }
2983 ostream &print(ostream &out) const override {
2984 return out << interval_map;
2985 }
2986 void encode(bufferlist &bl) const override {
2987 ::encode(interval_map, bl);
2988 }
2989 void decode(bufferlist::iterator &bl) override {
2990 ::decode(interval_map, bl);
2991 }
2992 void dump(Formatter *f) const override {
2993 f->open_array_section("PastIntervals::compat_rep");
2994 for (auto &&i: interval_map) {
2995 f->open_object_section("pg_interval_t");
2996 f->dump_int("epoch", i.first);
2997 f->open_object_section("interval");
2998 i.second.dump(f);
2999 f->close_section();
3000 f->close_section();
3001 }
3002 f->close_section();
3003 }
3004 bool is_classic() const override {
3005 return true;
3006 }
3007 static void generate_test_instances(list<pi_simple_rep*> &o) {
3008 using ival = PastIntervals::pg_interval_t;
3009 using ivallst = std::list<ival>;
3010 o.push_back(
3011 new pi_simple_rep(
3012 true, ivallst
3013 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3014 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3015 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3016 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3017 }));
3018 o.push_back(
3019 new pi_simple_rep(
3020 false, ivallst
3021 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3022 , ival{{ 1, 2}, { 1, 2}, 20, 30, true, 1, 1}
3023 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3024 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3025 }));
3026 o.push_back(
3027 new pi_simple_rep(
3028 true, ivallst
3029 { ival{{2, 1, 0}, {2, 1, 0}, 10, 20, true, 1, 1}
3030 , ival{{ 0, 2}, { 0, 2}, 21, 30, true, 0, 0}
3031 , ival{{ 0, 2}, {2, 0}, 31, 35, true, 2, 2}
3032 , ival{{ 0, 2}, { 0, 2}, 36, 50, true, 0, 0}
3033 }));
3034 return;
3035 }
3036 void iterate_mayberw_back_to(
3037 bool ec_pool,
3038 epoch_t les,
3039 std::function<void(epoch_t, const set<pg_shard_t> &)> &&f) const override {
3040 for (auto i = interval_map.rbegin(); i != interval_map.rend(); ++i) {
3041 if (!i->second.maybe_went_rw)
3042 continue;
3043 if (i->second.last < les)
3044 break;
3045 set<pg_shard_t> actingset;
3046 for (unsigned j = 0; j < i->second.acting.size(); ++j) {
3047 if (i->second.acting[j] == CRUSH_ITEM_NONE)
3048 continue;
3049 actingset.insert(
3050 pg_shard_t(
3051 i->second.acting[j],
3052 ec_pool ? shard_id_t(j) : shard_id_t::NO_SHARD));
3053 }
3054 f(i->second.first, actingset);
3055 }
3056 }
3057
3058 bool has_full_intervals() const override { return true; }
3059 void iterate_all_intervals(
3060 std::function<void(const PastIntervals::pg_interval_t &)> &&f
3061 ) const override {
3062 for (auto &&i: interval_map) {
3063 f(i.second);
3064 }
3065 }
3066 virtual ~pi_simple_rep() override {}
3067};
3068
3069/**
3070 * pi_compact_rep
3071 *
3072 * PastIntervals only needs to be able to answer two questions:
3073 * 1) Where should the primary look for unfound objects?
3074 * 2) List a set of subsets of the OSDs such that contacting at least
3075 * one from each subset guarrantees we speak to at least one witness
3076 * of any completed write.
3077 *
3078 * Crucially, 2) does not require keeping *all* past intervals. Certainly,
3079 * we don't need to keep any where maybe_went_rw would be false. We also
3080 * needn't keep two intervals where the actingset in one is a subset
3081 * of the other (only need to keep the smaller of the two sets). In order
3082 * to accurately trim the set of intervals as last_epoch_started changes
3083 * without rebuilding the set from scratch, we'll retain the larger set
3084 * if it in an older interval.
3085 */
3086struct compact_interval_t {
3087 epoch_t first;
3088 epoch_t last;
3089 set<pg_shard_t> acting;
3090 bool supersedes(const compact_interval_t &other) {
3091 for (auto &&i: acting) {
3092 if (!other.acting.count(i))
3093 return false;
3094 }
3095 return true;
3096 }
3097 void dump(Formatter *f) const {
3098 f->open_object_section("compact_interval_t");
3099 f->dump_stream("first") << first;
3100 f->dump_stream("last") << last;
3101 f->dump_stream("acting") << acting;
3102 f->close_section();
3103 }
3104 void encode(bufferlist &bl) const {
3105 ENCODE_START(1, 1, bl);
3106 ::encode(first, bl);
3107 ::encode(last, bl);
3108 ::encode(acting, bl);
3109 ENCODE_FINISH(bl);
3110 }
3111 void decode(bufferlist::iterator &bl) {
3112 DECODE_START(1, bl);
3113 ::decode(first, bl);
3114 ::decode(last, bl);
3115 ::decode(acting, bl);
3116 DECODE_FINISH(bl);
3117 }
3118 static void generate_test_instances(list<compact_interval_t*> & o) {
3119 /* Not going to be used, we'll generate pi_compact_rep directly */
3120 }
3121};
3122ostream &operator<<(ostream &o, const compact_interval_t &rhs)
3123{
3124 return o << "([" << rhs.first << "," << rhs.last
3125 << "] acting " << rhs.acting << ")";
3126}
3127WRITE_CLASS_ENCODER(compact_interval_t)
3128
3129class pi_compact_rep : public PastIntervals::interval_rep {
3130 epoch_t first = 0;
3131 epoch_t last = 0; // inclusive
3132 set<pg_shard_t> all_participants;
3133 list<compact_interval_t> intervals;
3134 pi_compact_rep(
3135 bool ec_pool,
3136 std::list<PastIntervals::pg_interval_t> &&intervals) {
3137 for (auto &&i: intervals)
3138 add_interval(ec_pool, i);
3139 }
3140public:
3141 pi_compact_rep() = default;
3142 pi_compact_rep(const pi_compact_rep &) = default;
3143 pi_compact_rep(pi_compact_rep &&) = default;
3144 pi_compact_rep &operator=(const pi_compact_rep &) = default;
3145 pi_compact_rep &operator=(pi_compact_rep &&) = default;
3146
3147 size_t size() const override { return intervals.size(); }
3148 bool empty() const override {
3149 return first > last || (first == 0 && last == 0);
3150 }
3151 void clear() override {
3152 *this = pi_compact_rep();
3153 }
3154 pair<epoch_t, epoch_t> get_bounds() const override {
3155 return make_pair(first, last + 1);
3156 }
3157 set<pg_shard_t> get_all_participants(
3158 bool ec_pool) const override {
3159 return all_participants;
3160 }
3161 void add_interval(
3162 bool ec_pool, const PastIntervals::pg_interval_t &interval) override {
3163 if (first == 0)
3164 first = interval.first;
3165 assert(interval.last > last);
3166 last = interval.last;
3167 set<pg_shard_t> acting;
3168 for (unsigned i = 0; i < interval.acting.size(); ++i) {
3169 if (interval.acting[i] == CRUSH_ITEM_NONE)
3170 continue;
3171 acting.insert(
3172 pg_shard_t(
3173 interval.acting[i],
3174 ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD));
3175 }
3176 all_participants.insert(acting.begin(), acting.end());
3177 if (!interval.maybe_went_rw)
3178 return;
3179 intervals.push_back(
3180 compact_interval_t{interval.first, interval.last, acting});
3181 auto plast = intervals.end();
3182 --plast;
3183 for (auto cur = intervals.begin(); cur != plast; ) {
3184 if (plast->supersedes(*cur)) {
3185 intervals.erase(cur++);
3186 } else {
3187 ++cur;
3188 }
3189 }
3190 }
3191 unique_ptr<PastIntervals::interval_rep> clone() const override {
3192 return unique_ptr<PastIntervals::interval_rep>(new pi_compact_rep(*this));
3193 }
3194 ostream &print(ostream &out) const override {
3195 return out << "([" << first << "," << last
3196 << "] intervals=" << intervals << ")";
3197 }
3198 void encode(bufferlist &bl) const override {
3199 ENCODE_START(1, 1, bl);
3200 ::encode(first, bl);
3201 ::encode(last, bl);
3202 ::encode(all_participants, bl);
3203 ::encode(intervals, bl);
3204 ENCODE_FINISH(bl);
3205 }
3206 void decode(bufferlist::iterator &bl) override {
3207 DECODE_START(1, bl);
3208 ::decode(first, bl);
3209 ::decode(last, bl);
3210 ::decode(all_participants, bl);
3211 ::decode(intervals, bl);
3212 DECODE_FINISH(bl);
3213 }
3214 void dump(Formatter *f) const override {
3215 f->open_object_section("PastIntervals::compact_rep");
3216 f->dump_stream("first") << first;
3217 f->dump_stream("last") << last;
3218 f->open_array_section("all_participants");
3219 for (auto& i : all_participants) {
3220 f->dump_object("pg_shard", i);
3221 }
3222 f->close_section();
3223 f->open_array_section("intervals");
3224 for (auto &&i: intervals) {
3225 i.dump(f);
3226 }
3227 f->close_section();
3228 f->close_section();
3229 }
3230 bool is_classic() const override {
3231 return false;
3232 }
3233 static void generate_test_instances(list<pi_compact_rep*> &o) {
3234 using ival = PastIntervals::pg_interval_t;
3235 using ivallst = std::list<ival>;
3236 o.push_back(
3237 new pi_compact_rep(
3238 true, ivallst
3239 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3240 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3241 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3242 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3243 }));
3244 o.push_back(
3245 new pi_compact_rep(
3246 false, ivallst
3247 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3248 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3249 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3250 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3251 }));
3252 o.push_back(
3253 new pi_compact_rep(
3254 true, ivallst
3255 { ival{{2, 1, 0}, {2, 1, 0}, 10, 20, true, 1, 1}
3256 , ival{{ 0, 2}, { 0, 2}, 21, 30, true, 0, 0}
3257 , ival{{ 0, 2}, {2, 0}, 31, 35, true, 2, 2}
3258 , ival{{ 0, 2}, { 0, 2}, 36, 50, true, 0, 0}
3259 }));
3260 }
3261 void iterate_mayberw_back_to(
3262 bool ec_pool,
3263 epoch_t les,
3264 std::function<void(epoch_t, const set<pg_shard_t> &)> &&f) const override {
3265 for (auto i = intervals.rbegin(); i != intervals.rend(); ++i) {
3266 if (i->last < les)
3267 break;
3268 f(i->first, i->acting);
3269 }
3270 }
3271 virtual ~pi_compact_rep() override {}
3272};
3273WRITE_CLASS_ENCODER(pi_compact_rep)
3274
3275PastIntervals::PastIntervals(const PastIntervals &rhs)
3276 : past_intervals(rhs.past_intervals ?
3277 rhs.past_intervals->clone() :
3278 nullptr) {}
3279
3280PastIntervals &PastIntervals::operator=(const PastIntervals &rhs)
3281{
3282 PastIntervals other(rhs);
31f18b77 3283 swap(other);
7c673cae
FG
3284 return *this;
3285}
3286
3287ostream& operator<<(ostream& out, const PastIntervals &i)
3288{
3289 if (i.past_intervals) {
3290 return i.past_intervals->print(out);
3291 } else {
3292 return out << "(empty)";
3293 }
3294}
3295
3296ostream& operator<<(ostream& out, const PastIntervals::PriorSet &i)
3297{
3298 return out << "PriorSet("
3299 << "ec_pool: " << i.ec_pool
3300 << ", probe: " << i.probe
3301 << ", down: " << i.down
3302 << ", blocked_by: " << i.blocked_by
3303 << ", pg_down: " << i.pg_down
3304 << ")";
3305}
3306
3307void PastIntervals::decode(bufferlist::iterator &bl)
3308{
3309 DECODE_START(1, bl);
3310 __u8 type = 0;
3311 ::decode(type, bl);
3312 switch (type) {
3313 case 0:
3314 break;
3315 case 1:
3316 past_intervals.reset(new pi_simple_rep);
3317 past_intervals->decode(bl);
3318 break;
3319 case 2:
3320 past_intervals.reset(new pi_compact_rep);
3321 past_intervals->decode(bl);
3322 break;
3323 }
3324 DECODE_FINISH(bl);
3325}
3326
3327void PastIntervals::decode_classic(bufferlist::iterator &bl)
3328{
3329 past_intervals.reset(new pi_simple_rep);
3330 past_intervals->decode(bl);
3331}
3332
3333void PastIntervals::generate_test_instances(list<PastIntervals*> &o)
3334{
3335 {
3336 list<pi_simple_rep *> simple;
3337 pi_simple_rep::generate_test_instances(simple);
3338 for (auto &&i: simple) {
3339 // takes ownership of contents
3340 o.push_back(new PastIntervals(i));
3341 }
3342 }
3343 {
3344 list<pi_compact_rep *> compact;
3345 pi_compact_rep::generate_test_instances(compact);
3346 for (auto &&i: compact) {
3347 // takes ownership of contents
3348 o.push_back(new PastIntervals(i));
3349 }
3350 }
3351 return;
3352}
3353
3354void PastIntervals::update_type(bool ec_pool, bool compact)
3355{
3356 if (!compact) {
3357 if (!past_intervals) {
3358 past_intervals.reset(new pi_simple_rep);
3359 } else {
3360 // we never convert from compact back to classic
3361 assert(is_classic());
3362 }
3363 } else {
3364 if (!past_intervals) {
3365 past_intervals.reset(new pi_compact_rep);
3366 } else if (is_classic()) {
3367 auto old = std::move(past_intervals);
3368 past_intervals.reset(new pi_compact_rep);
3369 assert(old->has_full_intervals());
3370 old->iterate_all_intervals([&](const pg_interval_t &i) {
3371 past_intervals->add_interval(ec_pool, i);
3372 });
3373 }
3374 }
3375}
3376
3377void PastIntervals::update_type_from_map(bool ec_pool, const OSDMap &osdmap)
3378{
31f18b77 3379 update_type(ec_pool, osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS);
7c673cae
FG
3380}
3381
3382bool PastIntervals::is_new_interval(
3383 int old_acting_primary,
3384 int new_acting_primary,
3385 const vector<int> &old_acting,
3386 const vector<int> &new_acting,
3387 int old_up_primary,
3388 int new_up_primary,
3389 const vector<int> &old_up,
3390 const vector<int> &new_up,
3391 int old_size,
3392 int new_size,
3393 int old_min_size,
3394 int new_min_size,
3395 unsigned old_pg_num,
3396 unsigned new_pg_num,
3397 bool old_sort_bitwise,
3398 bool new_sort_bitwise,
3399 pg_t pgid) {
3400 return old_acting_primary != new_acting_primary ||
3401 new_acting != old_acting ||
3402 old_up_primary != new_up_primary ||
3403 new_up != old_up ||
3404 old_min_size != new_min_size ||
3405 old_size != new_size ||
3406 pgid.is_split(old_pg_num, new_pg_num, 0) ||
3407 old_sort_bitwise != new_sort_bitwise;
3408}
3409
3410bool PastIntervals::is_new_interval(
3411 int old_acting_primary,
3412 int new_acting_primary,
3413 const vector<int> &old_acting,
3414 const vector<int> &new_acting,
3415 int old_up_primary,
3416 int new_up_primary,
3417 const vector<int> &old_up,
3418 const vector<int> &new_up,
3419 OSDMapRef osdmap,
3420 OSDMapRef lastmap,
3421 pg_t pgid) {
3422 return !(lastmap->get_pools().count(pgid.pool())) ||
3423 is_new_interval(old_acting_primary,
3424 new_acting_primary,
3425 old_acting,
3426 new_acting,
3427 old_up_primary,
3428 new_up_primary,
3429 old_up,
3430 new_up,
3431 lastmap->get_pools().find(pgid.pool())->second.size,
3432 osdmap->get_pools().find(pgid.pool())->second.size,
3433 lastmap->get_pools().find(pgid.pool())->second.min_size,
3434 osdmap->get_pools().find(pgid.pool())->second.min_size,
3435 lastmap->get_pg_num(pgid.pool()),
3436 osdmap->get_pg_num(pgid.pool()),
3437 lastmap->test_flag(CEPH_OSDMAP_SORTBITWISE),
3438 osdmap->test_flag(CEPH_OSDMAP_SORTBITWISE),
3439 pgid);
3440}
3441
3442bool PastIntervals::check_new_interval(
3443 int old_acting_primary,
3444 int new_acting_primary,
3445 const vector<int> &old_acting,
3446 const vector<int> &new_acting,
3447 int old_up_primary,
3448 int new_up_primary,
3449 const vector<int> &old_up,
3450 const vector<int> &new_up,
3451 epoch_t same_interval_since,
3452 epoch_t last_epoch_clean,
3453 OSDMapRef osdmap,
3454 OSDMapRef lastmap,
3455 pg_t pgid,
3456 IsPGRecoverablePredicate *could_have_gone_active,
3457 PastIntervals *past_intervals,
3458 std::ostream *out)
3459{
3460 /*
3461 * We have to be careful to gracefully deal with situations like
3462 * so. Say we have a power outage or something that takes out both
3463 * OSDs, but the monitor doesn't mark them down in the same epoch.
3464 * The history may look like
3465 *
3466 * 1: A B
3467 * 2: B
3468 * 3: let's say B dies for good, too (say, from the power spike)
3469 * 4: A
3470 *
3471 * which makes it look like B may have applied updates to the PG
3472 * that we need in order to proceed. This sucks...
3473 *
3474 * To minimize the risk of this happening, we CANNOT go active if
3475 * _any_ OSDs in the prior set are down until we send an MOSDAlive
3476 * to the monitor such that the OSDMap sets osd_up_thru to an epoch.
3477 * Then, we have something like
3478 *
3479 * 1: A B
3480 * 2: B up_thru[B]=0
3481 * 3:
3482 * 4: A
3483 *
3484 * -> we can ignore B, bc it couldn't have gone active (up_thru still 0).
3485 *
3486 * or,
3487 *
3488 * 1: A B
3489 * 2: B up_thru[B]=0
3490 * 3: B up_thru[B]=2
3491 * 4:
3492 * 5: A
3493 *
3494 * -> we must wait for B, bc it was alive through 2, and could have
3495 * written to the pg.
3496 *
3497 * If B is really dead, then an administrator will need to manually
3498 * intervene by marking the OSD as "lost."
3499 */
3500
3501 // remember past interval
3502 // NOTE: a change in the up set primary triggers an interval
3503 // change, even though the interval members in the pg_interval_t
3504 // do not change.
3505 assert(past_intervals);
3506 assert(past_intervals->past_intervals);
3507 if (is_new_interval(
3508 old_acting_primary,
3509 new_acting_primary,
3510 old_acting,
3511 new_acting,
3512 old_up_primary,
3513 new_up_primary,
3514 old_up,
3515 new_up,
3516 osdmap,
3517 lastmap,
3518 pgid)) {
3519 pg_interval_t i;
3520 i.first = same_interval_since;
3521 i.last = osdmap->get_epoch() - 1;
3522 assert(i.first <= i.last);
3523 i.acting = old_acting;
3524 i.up = old_up;
3525 i.primary = old_acting_primary;
3526 i.up_primary = old_up_primary;
3527
3528 unsigned num_acting = 0;
3529 for (vector<int>::const_iterator p = i.acting.begin(); p != i.acting.end();
3530 ++p)
3531 if (*p != CRUSH_ITEM_NONE)
3532 ++num_acting;
3533
3534 assert(lastmap->get_pools().count(pgid.pool()));
3535 const pg_pool_t& old_pg_pool = lastmap->get_pools().find(pgid.pool())->second;
3536 set<pg_shard_t> old_acting_shards;
3537 old_pg_pool.convert_to_pg_shards(old_acting, &old_acting_shards);
3538
3539 if (num_acting &&
3540 i.primary != -1 &&
3541 num_acting >= old_pg_pool.min_size &&
3542 (*could_have_gone_active)(old_acting_shards)) {
3543 if (out)
3544 *out << __func__ << " " << i
3545 << ": not rw,"
3546 << " up_thru " << lastmap->get_up_thru(i.primary)
3547 << " up_from " << lastmap->get_up_from(i.primary)
3548 << " last_epoch_clean " << last_epoch_clean
3549 << std::endl;
3550 if (lastmap->get_up_thru(i.primary) >= i.first &&
3551 lastmap->get_up_from(i.primary) <= i.first) {
3552 i.maybe_went_rw = true;
3553 if (out)
3554 *out << __func__ << " " << i
3555 << " : primary up " << lastmap->get_up_from(i.primary)
3556 << "-" << lastmap->get_up_thru(i.primary)
3557 << " includes interval"
3558 << std::endl;
3559 } else if (last_epoch_clean >= i.first &&
3560 last_epoch_clean <= i.last) {
3561 // If the last_epoch_clean is included in this interval, then
3562 // the pg must have been rw (for recovery to have completed).
3563 // This is important because we won't know the _real_
3564 // first_epoch because we stop at last_epoch_clean, and we
3565 // don't want the oldest interval to randomly have
3566 // maybe_went_rw false depending on the relative up_thru vs
3567 // last_epoch_clean timing.
3568 i.maybe_went_rw = true;
3569 if (out)
3570 *out << __func__ << " " << i
3571 << " : includes last_epoch_clean " << last_epoch_clean
3572 << " and presumed to have been rw"
3573 << std::endl;
3574 } else {
3575 i.maybe_went_rw = false;
3576 if (out)
3577 *out << __func__ << " " << i
3578 << " : primary up " << lastmap->get_up_from(i.primary)
3579 << "-" << lastmap->get_up_thru(i.primary)
3580 << " does not include interval"
3581 << std::endl;
3582 }
3583 } else {
3584 i.maybe_went_rw = false;
3585 if (out)
3586 *out << __func__ << " " << i << " : acting set is too small" << std::endl;
3587 }
3588 past_intervals->past_intervals->add_interval(old_pg_pool.ec_pool(), i);
3589 return true;
3590 } else {
3591 return false;
3592 }
3593}
3594
3595
3596// true if the given map affects the prior set
3597bool PastIntervals::PriorSet::affected_by_map(
3598 const OSDMap &osdmap,
3599 const DoutPrefixProvider *dpp) const
3600{
3601 for (set<pg_shard_t>::iterator p = probe.begin();
3602 p != probe.end();
3603 ++p) {
3604 int o = p->osd;
3605
3606 // did someone in the prior set go down?
3607 if (osdmap.is_down(o) && down.count(o) == 0) {
3608 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now down" << dendl;
3609 return true;
3610 }
3611
3612 // did a down osd in cur get (re)marked as lost?
3613 map<int, epoch_t>::const_iterator r = blocked_by.find(o);
3614 if (r != blocked_by.end()) {
3615 if (!osdmap.exists(o)) {
3616 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
3617 return true;
3618 }
3619 if (osdmap.get_info(o).lost_at != r->second) {
3620 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
3621 return true;
3622 }
3623 }
3624 }
3625
3626 // did someone in the prior down set go up?
3627 for (set<int>::const_iterator p = down.begin();
3628 p != down.end();
3629 ++p) {
3630 int o = *p;
3631
3632 if (osdmap.is_up(o)) {
3633 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now up" << dendl;
3634 return true;
3635 }
3636
3637 // did someone in the prior set get lost or destroyed?
3638 if (!osdmap.exists(o)) {
3639 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
3640 return true;
3641 }
3642 // did a down osd in down get (re)marked as lost?
3643 map<int, epoch_t>::const_iterator r = blocked_by.find(o);
3644 if (r != blocked_by.end()) {
3645 if (osdmap.get_info(o).lost_at != r->second) {
3646 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
3647 return true;
3648 }
3649 }
3650 }
3651
3652 return false;
3653}
3654
3655ostream& operator<<(ostream& out, const PastIntervals::pg_interval_t& i)
3656{
3657 out << "interval(" << i.first << "-" << i.last
3658 << " up " << i.up << "(" << i.up_primary << ")"
3659 << " acting " << i.acting << "(" << i.primary << ")";
3660 if (i.maybe_went_rw)
3661 out << " maybe_went_rw";
3662 out << ")";
3663 return out;
3664}
3665
3666
3667
3668// -- pg_query_t --
3669
3670void pg_query_t::encode(bufferlist &bl, uint64_t features) const {
3671 ENCODE_START(3, 3, bl);
3672 ::encode(type, bl);
3673 ::encode(since, bl);
3674 history.encode(bl);
3675 ::encode(epoch_sent, bl);
3676 ::encode(to, bl);
3677 ::encode(from, bl);
3678 ENCODE_FINISH(bl);
3679}
3680
3681void pg_query_t::decode(bufferlist::iterator &bl) {
3682 DECODE_START(3, bl);
3683 ::decode(type, bl);
3684 ::decode(since, bl);
3685 history.decode(bl);
3686 ::decode(epoch_sent, bl);
3687 ::decode(to, bl);
3688 ::decode(from, bl);
3689 DECODE_FINISH(bl);
3690}
3691
3692void pg_query_t::dump(Formatter *f) const
3693{
3694 f->dump_int("from", from);
3695 f->dump_int("to", to);
3696 f->dump_string("type", get_type_name());
3697 f->dump_stream("since") << since;
3698 f->dump_stream("epoch_sent") << epoch_sent;
3699 f->open_object_section("history");
3700 history.dump(f);
3701 f->close_section();
3702}
3703void pg_query_t::generate_test_instances(list<pg_query_t*>& o)
3704{
3705 o.push_back(new pg_query_t());
3706 list<pg_history_t*> h;
3707 pg_history_t::generate_test_instances(h);
3708 o.push_back(new pg_query_t(pg_query_t::INFO, shard_id_t(1), shard_id_t(2), *h.back(), 4));
3709 o.push_back(new pg_query_t(pg_query_t::MISSING, shard_id_t(2), shard_id_t(3), *h.back(), 4));
3710 o.push_back(new pg_query_t(pg_query_t::LOG, shard_id_t(0), shard_id_t(0),
3711 eversion_t(4, 5), *h.back(), 4));
3712 o.push_back(new pg_query_t(pg_query_t::FULLLOG,
3713 shard_id_t::NO_SHARD, shard_id_t::NO_SHARD,
3714 *h.back(), 5));
3715}
3716
3717// -- ObjectModDesc --
3718void ObjectModDesc::visit(Visitor *visitor) const
3719{
3720 bufferlist::iterator bp = bl.begin();
3721 try {
3722 while (!bp.end()) {
3723 DECODE_START(max_required_version, bp);
3724 uint8_t code;
3725 ::decode(code, bp);
3726 switch (code) {
3727 case APPEND: {
3728 uint64_t size;
3729 ::decode(size, bp);
3730 visitor->append(size);
3731 break;
3732 }
3733 case SETATTRS: {
3734 map<string, boost::optional<bufferlist> > attrs;
3735 ::decode(attrs, bp);
3736 visitor->setattrs(attrs);
3737 break;
3738 }
3739 case DELETE: {
3740 version_t old_version;
3741 ::decode(old_version, bp);
3742 visitor->rmobject(old_version);
3743 break;
3744 }
3745 case CREATE: {
3746 visitor->create();
3747 break;
3748 }
3749 case UPDATE_SNAPS: {
3750 set<snapid_t> snaps;
3751 ::decode(snaps, bp);
3752 visitor->update_snaps(snaps);
3753 break;
3754 }
3755 case TRY_DELETE: {
3756 version_t old_version;
3757 ::decode(old_version, bp);
3758 visitor->try_rmobject(old_version);
3759 break;
3760 }
3761 case ROLLBACK_EXTENTS: {
3762 vector<pair<uint64_t, uint64_t> > extents;
3763 version_t gen;
3764 ::decode(gen, bp);
3765 ::decode(extents, bp);
3766 visitor->rollback_extents(gen,extents);
3767 break;
3768 }
3769 default:
3770 assert(0 == "Invalid rollback code");
3771 }
3772 DECODE_FINISH(bp);
3773 }
3774 } catch (...) {
3775 assert(0 == "Invalid encoding");
3776 }
3777}
3778
3779struct DumpVisitor : public ObjectModDesc::Visitor {
3780 Formatter *f;
3781 explicit DumpVisitor(Formatter *f) : f(f) {}
3782 void append(uint64_t old_size) override {
3783 f->open_object_section("op");
3784 f->dump_string("code", "APPEND");
3785 f->dump_unsigned("old_size", old_size);
3786 f->close_section();
3787 }
3788 void setattrs(map<string, boost::optional<bufferlist> > &attrs) override {
3789 f->open_object_section("op");
3790 f->dump_string("code", "SETATTRS");
3791 f->open_array_section("attrs");
3792 for (map<string, boost::optional<bufferlist> >::iterator i = attrs.begin();
3793 i != attrs.end();
3794 ++i) {
3795 f->dump_string("attr_name", i->first);
3796 }
3797 f->close_section();
3798 f->close_section();
3799 }
3800 void rmobject(version_t old_version) override {
3801 f->open_object_section("op");
3802 f->dump_string("code", "RMOBJECT");
3803 f->dump_unsigned("old_version", old_version);
3804 f->close_section();
3805 }
3806 void try_rmobject(version_t old_version) override {
3807 f->open_object_section("op");
3808 f->dump_string("code", "TRY_RMOBJECT");
3809 f->dump_unsigned("old_version", old_version);
3810 f->close_section();
3811 }
3812 void create() override {
3813 f->open_object_section("op");
3814 f->dump_string("code", "CREATE");
3815 f->close_section();
3816 }
3817 void update_snaps(const set<snapid_t> &snaps) override {
3818 f->open_object_section("op");
3819 f->dump_string("code", "UPDATE_SNAPS");
3820 f->dump_stream("snaps") << snaps;
3821 f->close_section();
3822 }
3823 void rollback_extents(
3824 version_t gen,
3825 const vector<pair<uint64_t, uint64_t> > &extents) override {
3826 f->open_object_section("op");
3827 f->dump_string("code", "ROLLBACK_EXTENTS");
3828 f->dump_unsigned("gen", gen);
3829 f->dump_stream("snaps") << extents;
3830 f->close_section();
3831 }
3832};
3833
3834void ObjectModDesc::dump(Formatter *f) const
3835{
3836 f->open_object_section("object_mod_desc");
3837 f->dump_bool("can_local_rollback", can_local_rollback);
3838 f->dump_bool("rollback_info_completed", rollback_info_completed);
3839 {
3840 f->open_array_section("ops");
3841 DumpVisitor vis(f);
3842 visit(&vis);
3843 f->close_section();
3844 }
3845 f->close_section();
3846}
3847
3848void ObjectModDesc::generate_test_instances(list<ObjectModDesc*>& o)
3849{
3850 map<string, boost::optional<bufferlist> > attrs;
3851 attrs[OI_ATTR];
3852 attrs[SS_ATTR];
3853 attrs["asdf"];
3854 o.push_back(new ObjectModDesc());
3855 o.back()->append(100);
3856 o.back()->setattrs(attrs);
3857 o.push_back(new ObjectModDesc());
3858 o.back()->rmobject(1001);
3859 o.push_back(new ObjectModDesc());
3860 o.back()->create();
3861 o.back()->setattrs(attrs);
3862 o.push_back(new ObjectModDesc());
3863 o.back()->create();
3864 o.back()->setattrs(attrs);
3865 o.back()->mark_unrollbackable();
3866 o.back()->append(1000);
3867}
3868
3869void ObjectModDesc::encode(bufferlist &_bl) const
3870{
3871 ENCODE_START(max_required_version, max_required_version, _bl);
3872 ::encode(can_local_rollback, _bl);
3873 ::encode(rollback_info_completed, _bl);
3874 ::encode(bl, _bl);
3875 ENCODE_FINISH(_bl);
3876}
3877void ObjectModDesc::decode(bufferlist::iterator &_bl)
3878{
3879 DECODE_START(2, _bl);
3880 max_required_version = struct_v;
3881 ::decode(can_local_rollback, _bl);
3882 ::decode(rollback_info_completed, _bl);
3883 ::decode(bl, _bl);
3884 // ensure bl does not pin a larger buffer in memory
3885 bl.rebuild();
31f18b77 3886 bl.reassign_to_mempool(mempool::mempool_osd_pglog);
7c673cae
FG
3887 DECODE_FINISH(_bl);
3888}
3889
3890// -- pg_log_entry_t --
3891
3892string pg_log_entry_t::get_key_name() const
3893{
3894 return version.get_key_name();
3895}
3896
3897void pg_log_entry_t::encode_with_checksum(bufferlist& bl) const
3898{
3899 bufferlist ebl(sizeof(*this)*2);
3900 encode(ebl);
3901 __u32 crc = ebl.crc32c(0);
3902 ::encode(ebl, bl);
3903 ::encode(crc, bl);
3904}
3905
3906void pg_log_entry_t::decode_with_checksum(bufferlist::iterator& p)
3907{
3908 bufferlist bl;
3909 ::decode(bl, p);
3910 __u32 crc;
3911 ::decode(crc, p);
3912 if (crc != bl.crc32c(0))
3913 throw buffer::malformed_input("bad checksum on pg_log_entry_t");
3914 bufferlist::iterator q = bl.begin();
3915 decode(q);
3916}
3917
3918void pg_log_entry_t::encode(bufferlist &bl) const
3919{
3920 ENCODE_START(11, 4, bl);
3921 ::encode(op, bl);
3922 ::encode(soid, bl);
3923 ::encode(version, bl);
3924
3925 /**
3926 * Added with reverting_to:
3927 * Previous code used prior_version to encode
3928 * what we now call reverting_to. This will
3929 * allow older code to decode reverting_to
3930 * into prior_version as expected.
3931 */
3932 if (op == LOST_REVERT)
3933 ::encode(reverting_to, bl);
3934 else
3935 ::encode(prior_version, bl);
3936
3937 ::encode(reqid, bl);
3938 ::encode(mtime, bl);
3939 if (op == LOST_REVERT)
3940 ::encode(prior_version, bl);
3941 ::encode(snaps, bl);
3942 ::encode(user_version, bl);
3943 ::encode(mod_desc, bl);
3944 ::encode(extra_reqids, bl);
3945 if (op == ERROR)
3946 ::encode(return_code, bl);
3947 ENCODE_FINISH(bl);
3948}
3949
3950void pg_log_entry_t::decode(bufferlist::iterator &bl)
3951{
3952 DECODE_START_LEGACY_COMPAT_LEN(11, 4, 4, bl);
3953 ::decode(op, bl);
3954 if (struct_v < 2) {
3955 sobject_t old_soid;
3956 ::decode(old_soid, bl);
3957 soid.oid = old_soid.oid;
3958 soid.snap = old_soid.snap;
3959 invalid_hash = true;
3960 } else {
3961 ::decode(soid, bl);
3962 }
3963 if (struct_v < 3)
3964 invalid_hash = true;
3965 ::decode(version, bl);
3966
3967 if (struct_v >= 6 && op == LOST_REVERT)
3968 ::decode(reverting_to, bl);
3969 else
3970 ::decode(prior_version, bl);
3971
3972 ::decode(reqid, bl);
3973
3974 ::decode(mtime, bl);
3975 if (struct_v < 5)
3976 invalid_pool = true;
3977
3978 if (op == LOST_REVERT) {
3979 if (struct_v >= 6) {
3980 ::decode(prior_version, bl);
3981 } else {
3982 reverting_to = prior_version;
3983 }
3984 }
3985 if (struct_v >= 7 || // for v >= 7, this is for all ops.
3986 op == CLONE) { // for v < 7, it's only present for CLONE.
3987 ::decode(snaps, bl);
3988 // ensure snaps does not pin a larger buffer in memory
3989 snaps.rebuild();
31f18b77 3990 snaps.reassign_to_mempool(mempool::mempool_osd_pglog);
7c673cae
FG
3991 }
3992
3993 if (struct_v >= 8)
3994 ::decode(user_version, bl);
3995 else
3996 user_version = version.version;
3997
3998 if (struct_v >= 9)
3999 ::decode(mod_desc, bl);
4000 else
4001 mod_desc.mark_unrollbackable();
4002 if (struct_v >= 10)
4003 ::decode(extra_reqids, bl);
4004 if (struct_v >= 11 && op == ERROR)
4005 ::decode(return_code, bl);
4006 DECODE_FINISH(bl);
4007}
4008
4009void pg_log_entry_t::dump(Formatter *f) const
4010{
4011 f->dump_string("op", get_op_name());
4012 f->dump_stream("object") << soid;
4013 f->dump_stream("version") << version;
4014 f->dump_stream("prior_version") << prior_version;
4015 f->dump_stream("reqid") << reqid;
4016 f->open_array_section("extra_reqids");
31f18b77 4017 for (auto p = extra_reqids.begin();
7c673cae
FG
4018 p != extra_reqids.end();
4019 ++p) {
4020 f->open_object_section("extra_reqid");
4021 f->dump_stream("reqid") << p->first;
4022 f->dump_stream("user_version") << p->second;
4023 f->close_section();
4024 }
4025 f->close_section();
4026 f->dump_stream("mtime") << mtime;
4027 f->dump_int("return_code", return_code);
4028 if (snaps.length() > 0) {
4029 vector<snapid_t> v;
4030 bufferlist c = snaps;
4031 bufferlist::iterator p = c.begin();
4032 try {
4033 ::decode(v, p);
4034 } catch (...) {
4035 v.clear();
4036 }
4037 f->open_object_section("snaps");
4038 for (vector<snapid_t>::iterator p = v.begin(); p != v.end(); ++p)
4039 f->dump_unsigned("snap", *p);
4040 f->close_section();
4041 }
4042 {
4043 f->open_object_section("mod_desc");
4044 mod_desc.dump(f);
4045 f->close_section();
4046 }
4047}
4048
4049void pg_log_entry_t::generate_test_instances(list<pg_log_entry_t*>& o)
4050{
4051 o.push_back(new pg_log_entry_t());
4052 hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
4053 o.push_back(new pg_log_entry_t(MODIFY, oid, eversion_t(1,2), eversion_t(3,4),
4054 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4055 utime_t(8,9), 0));
4056 o.push_back(new pg_log_entry_t(ERROR, oid, eversion_t(1,2), eversion_t(3,4),
4057 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4058 utime_t(8,9), -ENOENT));
4059}
4060
4061ostream& operator<<(ostream& out, const pg_log_entry_t& e)
4062{
4063 out << e.version << " (" << e.prior_version << ") "
4064 << std::left << std::setw(8) << e.get_op_name() << ' '
4065 << e.soid << " by " << e.reqid << " " << e.mtime
4066 << " " << e.return_code;
4067 if (e.snaps.length()) {
4068 vector<snapid_t> snaps;
4069 bufferlist c = e.snaps;
4070 bufferlist::iterator p = c.begin();
4071 try {
4072 ::decode(snaps, p);
4073 } catch (...) {
4074 snaps.clear();
4075 }
4076 out << " snaps " << snaps;
4077 }
4078 return out;
4079}
4080
4081
4082// -- pg_log_t --
4083
4084// out: pg_log_t that only has entries that apply to import_pgid using curmap
4085// reject: Entries rejected from "in" are in the reject.log. Other fields not set.
4086void pg_log_t::filter_log(spg_t import_pgid, const OSDMap &curmap,
4087 const string &hit_set_namespace, const pg_log_t &in,
4088 pg_log_t &out, pg_log_t &reject)
4089{
4090 out = in;
4091 out.log.clear();
4092 reject.log.clear();
4093
4094 for (list<pg_log_entry_t>::const_iterator i = in.log.begin();
4095 i != in.log.end(); ++i) {
4096
4097 // Reject pg log entries for temporary objects
4098 if (i->soid.is_temp()) {
4099 reject.log.push_back(*i);
4100 continue;
4101 }
4102
4103 if (i->soid.nspace != hit_set_namespace) {
4104 object_t oid = i->soid.oid;
4105 object_locator_t loc(i->soid);
4106 pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
4107 pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
4108
4109 if (import_pgid.pgid == pgid) {
4110 out.log.push_back(*i);
4111 } else {
4112 reject.log.push_back(*i);
4113 }
4114 } else {
4115 out.log.push_back(*i);
4116 }
4117 }
4118}
4119
4120void pg_log_t::encode(bufferlist& bl) const
4121{
4122 ENCODE_START(6, 3, bl);
4123 ::encode(head, bl);
4124 ::encode(tail, bl);
4125 ::encode(log, bl);
4126 ::encode(can_rollback_to, bl);
4127 ::encode(rollback_info_trimmed_to, bl);
4128 ENCODE_FINISH(bl);
4129}
4130
4131void pg_log_t::decode(bufferlist::iterator &bl, int64_t pool)
4132{
4133 DECODE_START_LEGACY_COMPAT_LEN(6, 3, 3, bl);
4134 ::decode(head, bl);
4135 ::decode(tail, bl);
4136 if (struct_v < 2) {
4137 bool backlog;
4138 ::decode(backlog, bl);
4139 }
4140 ::decode(log, bl);
4141 if (struct_v >= 5)
4142 ::decode(can_rollback_to, bl);
4143
4144 if (struct_v >= 6)
4145 ::decode(rollback_info_trimmed_to, bl);
4146 else
4147 rollback_info_trimmed_to = tail;
4148 DECODE_FINISH(bl);
4149
4150 // handle hobject_t format change
4151 if (struct_v < 4) {
4152 for (list<pg_log_entry_t>::iterator i = log.begin();
4153 i != log.end();
4154 ++i) {
4155 if (!i->soid.is_max() && i->soid.pool == -1)
4156 i->soid.pool = pool;
4157 }
4158 }
4159}
4160
4161void pg_log_t::dump(Formatter *f) const
4162{
4163 f->dump_stream("head") << head;
4164 f->dump_stream("tail") << tail;
4165 f->open_array_section("log");
4166 for (list<pg_log_entry_t>::const_iterator p = log.begin(); p != log.end(); ++p) {
4167 f->open_object_section("entry");
4168 p->dump(f);
4169 f->close_section();
4170 }
4171 f->close_section();
4172}
4173
4174void pg_log_t::generate_test_instances(list<pg_log_t*>& o)
4175{
4176 o.push_back(new pg_log_t);
4177
4178 // this is nonsensical:
4179 o.push_back(new pg_log_t);
4180 o.back()->head = eversion_t(1,2);
4181 o.back()->tail = eversion_t(3,4);
4182 list<pg_log_entry_t*> e;
4183 pg_log_entry_t::generate_test_instances(e);
4184 for (list<pg_log_entry_t*>::iterator p = e.begin(); p != e.end(); ++p)
4185 o.back()->log.push_back(**p);
4186}
4187
4188void pg_log_t::copy_after(const pg_log_t &other, eversion_t v)
4189{
4190 can_rollback_to = other.can_rollback_to;
4191 head = other.head;
4192 tail = other.tail;
4193 for (list<pg_log_entry_t>::const_reverse_iterator i = other.log.rbegin();
4194 i != other.log.rend();
4195 ++i) {
4196 assert(i->version > other.tail);
4197 if (i->version <= v) {
4198 // make tail accurate.
4199 tail = i->version;
4200 break;
4201 }
4202 log.push_front(*i);
4203 }
4204}
4205
4206void pg_log_t::copy_range(const pg_log_t &other, eversion_t from, eversion_t to)
4207{
4208 can_rollback_to = other.can_rollback_to;
4209 list<pg_log_entry_t>::const_reverse_iterator i = other.log.rbegin();
4210 assert(i != other.log.rend());
4211 while (i->version > to) {
4212 ++i;
4213 assert(i != other.log.rend());
4214 }
4215 assert(i->version == to);
4216 head = to;
4217 for ( ; i != other.log.rend(); ++i) {
4218 if (i->version <= from) {
4219 tail = i->version;
4220 break;
4221 }
4222 log.push_front(*i);
4223 }
4224}
4225
4226void pg_log_t::copy_up_to(const pg_log_t &other, int max)
4227{
4228 can_rollback_to = other.can_rollback_to;
4229 int n = 0;
4230 head = other.head;
4231 tail = other.tail;
4232 for (list<pg_log_entry_t>::const_reverse_iterator i = other.log.rbegin();
4233 i != other.log.rend();
4234 ++i) {
4235 if (n++ >= max) {
4236 tail = i->version;
4237 break;
4238 }
4239 log.push_front(*i);
4240 }
4241}
4242
4243ostream& pg_log_t::print(ostream& out) const
4244{
4245 out << *this << std::endl;
4246 for (list<pg_log_entry_t>::const_iterator p = log.begin();
4247 p != log.end();
4248 ++p)
4249 out << *p << std::endl;
4250 return out;
4251}
4252
4253// -- pg_missing_t --
4254
4255ostream& operator<<(ostream& out, const pg_missing_item& i)
4256{
4257 out << i.need;
4258 if (i.have != eversion_t())
4259 out << "(" << i.have << ")";
4260 return out;
4261}
4262
4263// -- object_copy_cursor_t --
4264
4265void object_copy_cursor_t::encode(bufferlist& bl) const
4266{
4267 ENCODE_START(1, 1, bl);
4268 ::encode(attr_complete, bl);
4269 ::encode(data_offset, bl);
4270 ::encode(data_complete, bl);
4271 ::encode(omap_offset, bl);
4272 ::encode(omap_complete, bl);
4273 ENCODE_FINISH(bl);
4274}
4275
4276void object_copy_cursor_t::decode(bufferlist::iterator &bl)
4277{
4278 DECODE_START(1, bl);
4279 ::decode(attr_complete, bl);
4280 ::decode(data_offset, bl);
4281 ::decode(data_complete, bl);
4282 ::decode(omap_offset, bl);
4283 ::decode(omap_complete, bl);
4284 DECODE_FINISH(bl);
4285}
4286
4287void object_copy_cursor_t::dump(Formatter *f) const
4288{
4289 f->dump_unsigned("attr_complete", (int)attr_complete);
4290 f->dump_unsigned("data_offset", data_offset);
4291 f->dump_unsigned("data_complete", (int)data_complete);
4292 f->dump_string("omap_offset", omap_offset);
4293 f->dump_unsigned("omap_complete", (int)omap_complete);
4294}
4295
4296void object_copy_cursor_t::generate_test_instances(list<object_copy_cursor_t*>& o)
4297{
4298 o.push_back(new object_copy_cursor_t);
4299 o.push_back(new object_copy_cursor_t);
4300 o.back()->attr_complete = true;
4301 o.back()->data_offset = 123;
4302 o.push_back(new object_copy_cursor_t);
4303 o.back()->attr_complete = true;
4304 o.back()->data_complete = true;
4305 o.back()->omap_offset = "foo";
4306 o.push_back(new object_copy_cursor_t);
4307 o.back()->attr_complete = true;
4308 o.back()->data_complete = true;
4309 o.back()->omap_complete = true;
4310}
4311
4312// -- object_copy_data_t --
4313
4314void object_copy_data_t::encode(bufferlist& bl, uint64_t features) const
4315{
4316 ENCODE_START(7, 5, bl);
4317 ::encode(size, bl);
4318 ::encode(mtime, bl);
4319 ::encode(attrs, bl);
4320 ::encode(data, bl);
4321 ::encode(omap_data, bl);
4322 ::encode(cursor, bl);
4323 ::encode(omap_header, bl);
4324 ::encode(snaps, bl);
4325 ::encode(snap_seq, bl);
4326 ::encode(flags, bl);
4327 ::encode(data_digest, bl);
4328 ::encode(omap_digest, bl);
4329 ::encode(reqids, bl);
4330 ::encode(truncate_seq, bl);
4331 ::encode(truncate_size, bl);
4332 ENCODE_FINISH(bl);
4333}
4334
4335void object_copy_data_t::decode(bufferlist::iterator& bl)
4336{
4337 DECODE_START(7, bl);
4338 if (struct_v < 5) {
4339 // old
4340 ::decode(size, bl);
4341 ::decode(mtime, bl);
4342 {
4343 string category;
4344 ::decode(category, bl); // no longer used
4345 }
4346 ::decode(attrs, bl);
4347 ::decode(data, bl);
4348 {
4349 map<string,bufferlist> omap;
4350 ::decode(omap, bl);
4351 omap_data.clear();
4352 if (!omap.empty())
4353 ::encode(omap, omap_data);
4354 }
4355 ::decode(cursor, bl);
4356 if (struct_v >= 2)
4357 ::decode(omap_header, bl);
4358 if (struct_v >= 3) {
4359 ::decode(snaps, bl);
4360 ::decode(snap_seq, bl);
4361 } else {
4362 snaps.clear();
4363 snap_seq = 0;
4364 }
4365 if (struct_v >= 4) {
4366 ::decode(flags, bl);
4367 ::decode(data_digest, bl);
4368 ::decode(omap_digest, bl);
4369 }
4370 } else {
4371 // current
4372 ::decode(size, bl);
4373 ::decode(mtime, bl);
4374 ::decode(attrs, bl);
4375 ::decode(data, bl);
4376 ::decode(omap_data, bl);
4377 ::decode(cursor, bl);
4378 ::decode(omap_header, bl);
4379 ::decode(snaps, bl);
4380 ::decode(snap_seq, bl);
4381 if (struct_v >= 4) {
4382 ::decode(flags, bl);
4383 ::decode(data_digest, bl);
4384 ::decode(omap_digest, bl);
4385 }
4386 if (struct_v >= 6) {
4387 ::decode(reqids, bl);
4388 }
4389 if (struct_v >= 7) {
4390 ::decode(truncate_seq, bl);
4391 ::decode(truncate_size, bl);
4392 }
4393 }
4394 DECODE_FINISH(bl);
4395}
4396
4397void object_copy_data_t::generate_test_instances(list<object_copy_data_t*>& o)
4398{
4399 o.push_back(new object_copy_data_t());
4400
4401 list<object_copy_cursor_t*> cursors;
4402 object_copy_cursor_t::generate_test_instances(cursors);
4403 list<object_copy_cursor_t*>::iterator ci = cursors.begin();
4404 o.back()->cursor = **(ci++);
4405
4406 o.push_back(new object_copy_data_t());
4407 o.back()->cursor = **(ci++);
4408
4409 o.push_back(new object_copy_data_t());
4410 o.back()->size = 1234;
4411 o.back()->mtime.set_from_double(1234);
4412 bufferptr bp("there", 5);
4413 bufferlist bl;
4414 bl.push_back(bp);
4415 o.back()->attrs["hello"] = bl;
4416 bufferptr bp2("not", 3);
4417 bufferlist bl2;
4418 bl2.push_back(bp2);
4419 map<string,bufferlist> omap;
4420 omap["why"] = bl2;
4421 ::encode(omap, o.back()->omap_data);
4422 bufferptr databp("iamsomedatatocontain", 20);
4423 o.back()->data.push_back(databp);
4424 o.back()->omap_header.append("this is an omap header");
4425 o.back()->snaps.push_back(123);
4426 o.back()->reqids.push_back(make_pair(osd_reqid_t(), version_t()));
4427}
4428
4429void object_copy_data_t::dump(Formatter *f) const
4430{
4431 f->open_object_section("cursor");
4432 cursor.dump(f);
4433 f->close_section(); // cursor
4434 f->dump_int("size", size);
4435 f->dump_stream("mtime") << mtime;
4436 /* we should really print out the attrs here, but bufferlist
4437 const-correctness prevents that */
4438 f->dump_int("attrs_size", attrs.size());
4439 f->dump_int("flags", flags);
4440 f->dump_unsigned("data_digest", data_digest);
4441 f->dump_unsigned("omap_digest", omap_digest);
4442 f->dump_int("omap_data_length", omap_data.length());
4443 f->dump_int("omap_header_length", omap_header.length());
4444 f->dump_int("data_length", data.length());
4445 f->open_array_section("snaps");
4446 for (vector<snapid_t>::const_iterator p = snaps.begin();
4447 p != snaps.end(); ++p)
4448 f->dump_unsigned("snap", *p);
4449 f->close_section();
4450 f->open_array_section("reqids");
31f18b77 4451 for (auto p = reqids.begin();
7c673cae
FG
4452 p != reqids.end();
4453 ++p) {
4454 f->open_object_section("extra_reqid");
4455 f->dump_stream("reqid") << p->first;
4456 f->dump_stream("user_version") << p->second;
4457 f->close_section();
4458 }
4459 f->close_section();
4460}
4461
4462// -- pg_create_t --
4463
4464void pg_create_t::encode(bufferlist &bl) const
4465{
4466 ENCODE_START(1, 1, bl);
4467 ::encode(created, bl);
4468 ::encode(parent, bl);
4469 ::encode(split_bits, bl);
4470 ENCODE_FINISH(bl);
4471}
4472
4473void pg_create_t::decode(bufferlist::iterator &bl)
4474{
4475 DECODE_START(1, bl);
4476 ::decode(created, bl);
4477 ::decode(parent, bl);
4478 ::decode(split_bits, bl);
4479 DECODE_FINISH(bl);
4480}
4481
4482void pg_create_t::dump(Formatter *f) const
4483{
4484 f->dump_unsigned("created", created);
4485 f->dump_stream("parent") << parent;
4486 f->dump_int("split_bits", split_bits);
4487}
4488
4489void pg_create_t::generate_test_instances(list<pg_create_t*>& o)
4490{
4491 o.push_back(new pg_create_t);
4492 o.push_back(new pg_create_t(1, pg_t(3, 4, -1), 2));
4493}
4494
4495
4496// -- pg_hit_set_info_t --
4497
4498void pg_hit_set_info_t::encode(bufferlist& bl) const
4499{
4500 ENCODE_START(2, 1, bl);
4501 ::encode(begin, bl);
4502 ::encode(end, bl);
4503 ::encode(version, bl);
4504 ::encode(using_gmt, bl);
4505 ENCODE_FINISH(bl);
4506}
4507
4508void pg_hit_set_info_t::decode(bufferlist::iterator& p)
4509{
4510 DECODE_START(2, p);
4511 ::decode(begin, p);
4512 ::decode(end, p);
4513 ::decode(version, p);
4514 if (struct_v >= 2) {
4515 ::decode(using_gmt, p);
4516 } else {
4517 using_gmt = false;
4518 }
4519 DECODE_FINISH(p);
4520}
4521
4522void pg_hit_set_info_t::dump(Formatter *f) const
4523{
4524 f->dump_stream("begin") << begin;
4525 f->dump_stream("end") << end;
4526 f->dump_stream("version") << version;
4527 f->dump_stream("using_gmt") << using_gmt;
4528}
4529
4530void pg_hit_set_info_t::generate_test_instances(list<pg_hit_set_info_t*>& ls)
4531{
4532 ls.push_back(new pg_hit_set_info_t);
4533 ls.push_back(new pg_hit_set_info_t);
4534 ls.back()->begin = utime_t(1, 2);
4535 ls.back()->end = utime_t(3, 4);
4536}
4537
4538
4539// -- pg_hit_set_history_t --
4540
4541void pg_hit_set_history_t::encode(bufferlist& bl) const
4542{
4543 ENCODE_START(1, 1, bl);
4544 ::encode(current_last_update, bl);
4545 {
4546 utime_t dummy_stamp;
4547 ::encode(dummy_stamp, bl);
4548 }
4549 {
4550 pg_hit_set_info_t dummy_info;
4551 ::encode(dummy_info, bl);
4552 }
4553 ::encode(history, bl);
4554 ENCODE_FINISH(bl);
4555}
4556
4557void pg_hit_set_history_t::decode(bufferlist::iterator& p)
4558{
4559 DECODE_START(1, p);
4560 ::decode(current_last_update, p);
4561 {
4562 utime_t dummy_stamp;
4563 ::decode(dummy_stamp, p);
4564 }
4565 {
4566 pg_hit_set_info_t dummy_info;
4567 ::decode(dummy_info, p);
4568 }
4569 ::decode(history, p);
4570 DECODE_FINISH(p);
4571}
4572
4573void pg_hit_set_history_t::dump(Formatter *f) const
4574{
4575 f->dump_stream("current_last_update") << current_last_update;
4576 f->open_array_section("history");
4577 for (list<pg_hit_set_info_t>::const_iterator p = history.begin();
4578 p != history.end(); ++p) {
4579 f->open_object_section("info");
4580 p->dump(f);
4581 f->close_section();
4582 }
4583 f->close_section();
4584}
4585
4586void pg_hit_set_history_t::generate_test_instances(list<pg_hit_set_history_t*>& ls)
4587{
4588 ls.push_back(new pg_hit_set_history_t);
4589 ls.push_back(new pg_hit_set_history_t);
4590 ls.back()->current_last_update = eversion_t(1, 2);
4591 ls.back()->history.push_back(pg_hit_set_info_t());
4592}
4593
4594// -- osd_peer_stat_t --
4595
4596void osd_peer_stat_t::encode(bufferlist& bl) const
4597{
4598 ENCODE_START(1, 1, bl);
4599 ::encode(stamp, bl);
4600 ENCODE_FINISH(bl);
4601}
4602
4603void osd_peer_stat_t::decode(bufferlist::iterator& bl)
4604{
4605 DECODE_START(1, bl);
4606 ::decode(stamp, bl);
4607 DECODE_FINISH(bl);
4608}
4609
4610void osd_peer_stat_t::dump(Formatter *f) const
4611{
4612 f->dump_stream("stamp") << stamp;
4613}
4614
4615void osd_peer_stat_t::generate_test_instances(list<osd_peer_stat_t*>& o)
4616{
4617 o.push_back(new osd_peer_stat_t);
4618 o.push_back(new osd_peer_stat_t);
4619 o.back()->stamp = utime_t(1, 2);
4620}
4621
4622ostream& operator<<(ostream& out, const osd_peer_stat_t &stat)
4623{
4624 return out << "stat(" << stat.stamp << ")";
4625}
4626
4627
4628// -- OSDSuperblock --
4629
4630void OSDSuperblock::encode(bufferlist &bl) const
4631{
4632 ENCODE_START(8, 5, bl);
4633 ::encode(cluster_fsid, bl);
4634 ::encode(whoami, bl);
4635 ::encode(current_epoch, bl);
4636 ::encode(oldest_map, bl);
4637 ::encode(newest_map, bl);
4638 ::encode(weight, bl);
4639 compat_features.encode(bl);
4640 ::encode(clean_thru, bl);
4641 ::encode(mounted, bl);
4642 ::encode(osd_fsid, bl);
4643 ::encode((epoch_t)0, bl); // epoch_t last_epoch_marked_full
4644 ::encode((uint32_t)0, bl); // map<int64_t,epoch_t> pool_last_epoch_marked_full
4645 ENCODE_FINISH(bl);
4646}
4647
4648void OSDSuperblock::decode(bufferlist::iterator &bl)
4649{
4650 DECODE_START_LEGACY_COMPAT_LEN(8, 5, 5, bl);
4651 if (struct_v < 3) {
4652 string magic;
4653 ::decode(magic, bl);
4654 }
4655 ::decode(cluster_fsid, bl);
4656 ::decode(whoami, bl);
4657 ::decode(current_epoch, bl);
4658 ::decode(oldest_map, bl);
4659 ::decode(newest_map, bl);
4660 ::decode(weight, bl);
4661 if (struct_v >= 2) {
4662 compat_features.decode(bl);
4663 } else { //upgrade it!
4664 compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
4665 }
4666 ::decode(clean_thru, bl);
4667 ::decode(mounted, bl);
4668 if (struct_v >= 4)
4669 ::decode(osd_fsid, bl);
4670 if (struct_v >= 6) {
4671 epoch_t last_map_marked_full;
4672 ::decode(last_map_marked_full, bl);
4673 }
4674 if (struct_v >= 7) {
4675 map<int64_t,epoch_t> pool_last_map_marked_full;
4676 ::decode(pool_last_map_marked_full, bl);
4677 }
4678 DECODE_FINISH(bl);
4679}
4680
4681void OSDSuperblock::dump(Formatter *f) const
4682{
4683 f->dump_stream("cluster_fsid") << cluster_fsid;
4684 f->dump_stream("osd_fsid") << osd_fsid;
4685 f->dump_int("whoami", whoami);
4686 f->dump_int("current_epoch", current_epoch);
4687 f->dump_int("oldest_map", oldest_map);
4688 f->dump_int("newest_map", newest_map);
4689 f->dump_float("weight", weight);
4690 f->open_object_section("compat");
4691 compat_features.dump(f);
4692 f->close_section();
4693 f->dump_int("clean_thru", clean_thru);
4694 f->dump_int("last_epoch_mounted", mounted);
4695}
4696
4697void OSDSuperblock::generate_test_instances(list<OSDSuperblock*>& o)
4698{
4699 OSDSuperblock z;
4700 o.push_back(new OSDSuperblock(z));
4701 memset(&z.cluster_fsid, 1, sizeof(z.cluster_fsid));
4702 memset(&z.osd_fsid, 2, sizeof(z.osd_fsid));
4703 z.whoami = 3;
4704 z.current_epoch = 4;
4705 z.oldest_map = 5;
4706 z.newest_map = 9;
4707 z.mounted = 8;
4708 z.clean_thru = 7;
4709 o.push_back(new OSDSuperblock(z));
4710 o.push_back(new OSDSuperblock(z));
4711}
4712
4713// -- SnapSet --
4714
4715void SnapSet::encode(bufferlist& bl) const
4716{
4717 ENCODE_START(3, 2, bl);
4718 ::encode(seq, bl);
4719 ::encode(head_exists, bl);
4720 ::encode(snaps, bl);
4721 ::encode(clones, bl);
4722 ::encode(clone_overlap, bl);
4723 ::encode(clone_size, bl);
4724 ::encode(clone_snaps, bl);
4725 ENCODE_FINISH(bl);
4726}
4727
4728void SnapSet::decode(bufferlist::iterator& bl)
4729{
4730 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
4731 ::decode(seq, bl);
4732 ::decode(head_exists, bl);
4733 ::decode(snaps, bl);
4734 ::decode(clones, bl);
4735 ::decode(clone_overlap, bl);
4736 ::decode(clone_size, bl);
4737 if (struct_v >= 3) {
4738 ::decode(clone_snaps, bl);
4739 } else {
4740 clone_snaps.clear();
4741 }
4742 DECODE_FINISH(bl);
4743}
4744
4745void SnapSet::dump(Formatter *f) const
4746{
4747 SnapContext sc(seq, snaps);
4748 f->open_object_section("snap_context");
4749 sc.dump(f);
4750 f->close_section();
4751 f->dump_int("head_exists", head_exists);
4752 f->open_array_section("clones");
4753 for (vector<snapid_t>::const_iterator p = clones.begin(); p != clones.end(); ++p) {
4754 f->open_object_section("clone");
4755 f->dump_unsigned("snap", *p);
4756 f->dump_unsigned("size", clone_size.find(*p)->second);
4757 f->dump_stream("overlap") << clone_overlap.find(*p)->second;
4758 auto q = clone_snaps.find(*p);
4759 if (q != clone_snaps.end()) {
4760 f->open_array_section("snaps");
4761 for (auto s : q->second) {
4762 f->dump_unsigned("snap", s);
4763 }
4764 f->close_section();
4765 }
4766 f->close_section();
4767 }
4768 f->close_section();
4769}
4770
4771void SnapSet::generate_test_instances(list<SnapSet*>& o)
4772{
4773 o.push_back(new SnapSet);
4774 o.push_back(new SnapSet);
4775 o.back()->head_exists = true;
4776 o.back()->seq = 123;
4777 o.back()->snaps.push_back(123);
4778 o.back()->snaps.push_back(12);
4779 o.push_back(new SnapSet);
4780 o.back()->head_exists = true;
4781 o.back()->seq = 123;
4782 o.back()->snaps.push_back(123);
4783 o.back()->snaps.push_back(12);
4784 o.back()->clones.push_back(12);
4785 o.back()->clone_size[12] = 12345;
4786 o.back()->clone_overlap[12];
4787 o.back()->clone_snaps[12] = {12, 10, 8};
4788}
4789
4790ostream& operator<<(ostream& out, const SnapSet& cs)
4791{
4792 if (cs.is_legacy()) {
4793 out << cs.seq << "=" << cs.snaps << ":"
4794 << cs.clones
4795 << (cs.head_exists ? "+head":"");
4796 if (!cs.clone_snaps.empty()) {
4797 out << "+stray_clone_snaps=" << cs.clone_snaps;
4798 }
4799 return out;
4800 } else {
4801 return out << cs.seq << "=" << cs.snaps << ":"
4802 << cs.clone_snaps;
4803 }
4804}
4805
4806void SnapSet::from_snap_set(const librados::snap_set_t& ss, bool legacy)
4807{
4808 // NOTE: our reconstruction of snaps (and the snapc) is not strictly
4809 // correct: it will not include snaps that still logically exist
4810 // but for which there was no clone that is defined. For all
4811 // practical purposes this doesn't matter, since we only use that
4812 // information to clone on the OSD, and we have already moved
4813 // forward past that part of the object history.
4814
4815 seq = ss.seq;
4816 set<snapid_t> _snaps;
4817 set<snapid_t> _clones;
4818 head_exists = false;
4819 for (vector<librados::clone_info_t>::const_iterator p = ss.clones.begin();
4820 p != ss.clones.end();
4821 ++p) {
4822 if (p->cloneid == librados::SNAP_HEAD) {
4823 head_exists = true;
4824 } else {
4825 _clones.insert(p->cloneid);
4826 _snaps.insert(p->snaps.begin(), p->snaps.end());
4827 clone_size[p->cloneid] = p->size;
4828 clone_overlap[p->cloneid]; // the entry must exist, even if it's empty.
4829 for (vector<pair<uint64_t, uint64_t> >::const_iterator q =
4830 p->overlap.begin(); q != p->overlap.end(); ++q)
4831 clone_overlap[p->cloneid].insert(q->first, q->second);
4832 if (!legacy) {
4833 // p->snaps is ascending; clone_snaps is descending
4834 vector<snapid_t>& v = clone_snaps[p->cloneid];
4835 for (auto q = p->snaps.rbegin(); q != p->snaps.rend(); ++q) {
4836 v.push_back(*q);
4837 }
4838 }
4839 }
4840 }
4841
4842 // ascending
4843 clones.clear();
4844 clones.reserve(_clones.size());
4845 for (set<snapid_t>::iterator p = _clones.begin(); p != _clones.end(); ++p)
4846 clones.push_back(*p);
4847
4848 // descending
4849 snaps.clear();
4850 snaps.reserve(_snaps.size());
4851 for (set<snapid_t>::reverse_iterator p = _snaps.rbegin();
4852 p != _snaps.rend(); ++p)
4853 snaps.push_back(*p);
4854}
4855
4856uint64_t SnapSet::get_clone_bytes(snapid_t clone) const
4857{
4858 assert(clone_size.count(clone));
4859 uint64_t size = clone_size.find(clone)->second;
4860 assert(clone_overlap.count(clone));
4861 const interval_set<uint64_t> &overlap = clone_overlap.find(clone)->second;
4862 for (interval_set<uint64_t>::const_iterator i = overlap.begin();
4863 i != overlap.end();
4864 ++i) {
4865 assert(size >= i.get_len());
4866 size -= i.get_len();
4867 }
4868 return size;
4869}
4870
4871void SnapSet::filter(const pg_pool_t &pinfo)
4872{
4873 vector<snapid_t> oldsnaps;
4874 oldsnaps.swap(snaps);
4875 for (vector<snapid_t>::const_iterator i = oldsnaps.begin();
4876 i != oldsnaps.end();
4877 ++i) {
4878 if (!pinfo.is_removed_snap(*i))
4879 snaps.push_back(*i);
4880 }
4881}
4882
4883SnapSet SnapSet::get_filtered(const pg_pool_t &pinfo) const
4884{
4885 SnapSet ss = *this;
4886 ss.filter(pinfo);
4887 return ss;
4888}
4889
4890// -- watch_info_t --
4891
4892void watch_info_t::encode(bufferlist& bl, uint64_t features) const
4893{
4894 ENCODE_START(4, 3, bl);
4895 ::encode(cookie, bl);
4896 ::encode(timeout_seconds, bl);
4897 ::encode(addr, bl, features);
4898 ENCODE_FINISH(bl);
4899}
4900
4901void watch_info_t::decode(bufferlist::iterator& bl)
4902{
4903 DECODE_START_LEGACY_COMPAT_LEN(4, 3, 3, bl);
4904 ::decode(cookie, bl);
4905 if (struct_v < 2) {
4906 uint64_t ver;
4907 ::decode(ver, bl);
4908 }
4909 ::decode(timeout_seconds, bl);
4910 if (struct_v >= 4) {
4911 ::decode(addr, bl);
4912 }
4913 DECODE_FINISH(bl);
4914}
4915
4916void watch_info_t::dump(Formatter *f) const
4917{
4918 f->dump_unsigned("cookie", cookie);
4919 f->dump_unsigned("timeout_seconds", timeout_seconds);
4920 f->open_object_section("addr");
4921 addr.dump(f);
4922 f->close_section();
4923}
4924
4925void watch_info_t::generate_test_instances(list<watch_info_t*>& o)
4926{
4927 o.push_back(new watch_info_t);
4928 o.push_back(new watch_info_t);
4929 o.back()->cookie = 123;
4930 o.back()->timeout_seconds = 99;
4931 entity_addr_t ea;
4932 ea.set_type(entity_addr_t::TYPE_LEGACY);
4933 ea.set_nonce(1);
4934 ea.set_family(AF_INET);
4935 ea.set_in4_quad(0, 127);
4936 ea.set_in4_quad(1, 0);
4937 ea.set_in4_quad(2, 1);
4938 ea.set_in4_quad(3, 2);
4939 ea.set_port(2);
4940 o.back()->addr = ea;
4941}
4942
31f18b77
FG
4943// -- object_manifest_t --
4944
4945void object_manifest_t::encode(bufferlist& bl) const
4946{
4947 ENCODE_START(1, 1, bl);
4948 ::encode(type, bl);
4949 switch (type) {
4950 case TYPE_NONE: break;
4951 case TYPE_REDIRECT:
4952 ::encode(redirect_target, bl);
4953 break;
4954 default:
4955 ceph_abort();
4956 }
4957 ENCODE_FINISH(bl);
4958}
4959
4960void object_manifest_t::decode(bufferlist::iterator& bl)
4961{
4962 DECODE_START(1, bl);
4963 ::decode(type, bl);
4964 switch (type) {
4965 case TYPE_NONE: break;
4966 case TYPE_REDIRECT:
4967 ::decode(redirect_target, bl);
4968 break;
4969 default:
4970 ceph_abort();
4971 }
4972 DECODE_FINISH(bl);
4973}
4974
4975void object_manifest_t::dump(Formatter *f) const
4976{
4977 f->dump_unsigned("type", type);
4978 f->open_object_section("redirect_target");
4979 redirect_target.dump(f);
4980 f->close_section();
4981}
4982
4983void object_manifest_t::generate_test_instances(list<object_manifest_t*>& o)
4984{
4985 o.push_back(new object_manifest_t());
4986 o.back()->type = TYPE_REDIRECT;
4987}
4988
4989ostream& operator<<(ostream& out, const object_manifest_t& om)
4990{
4991 return out << "type:" << om.type << " redirect_target:" << om.redirect_target;
4992}
7c673cae
FG
4993
4994// -- object_info_t --
4995
4996void object_info_t::copy_user_bits(const object_info_t& other)
4997{
4998 // these bits are copied from head->clone.
4999 size = other.size;
5000 mtime = other.mtime;
5001 local_mtime = other.local_mtime;
5002 last_reqid = other.last_reqid;
5003 truncate_seq = other.truncate_seq;
5004 truncate_size = other.truncate_size;
5005 flags = other.flags;
5006 user_version = other.user_version;
5007 data_digest = other.data_digest;
5008 omap_digest = other.omap_digest;
5009}
5010
5011ps_t object_info_t::legacy_object_locator_to_ps(const object_t &oid,
5012 const object_locator_t &loc) {
5013 ps_t ps;
5014 if (loc.key.length())
5015 // Hack, we don't have the osd map, so we don't really know the hash...
5016 ps = ceph_str_hash(CEPH_STR_HASH_RJENKINS, loc.key.c_str(),
5017 loc.key.length());
5018 else
5019 ps = ceph_str_hash(CEPH_STR_HASH_RJENKINS, oid.name.c_str(),
5020 oid.name.length());
5021 return ps;
5022}
5023
5024void object_info_t::encode(bufferlist& bl, uint64_t features) const
5025{
5026 object_locator_t myoloc(soid);
5027 map<entity_name_t, watch_info_t> old_watchers;
5028 for (map<pair<uint64_t, entity_name_t>, watch_info_t>::const_iterator i =
5029 watchers.begin();
5030 i != watchers.end();
5031 ++i) {
5032 old_watchers.insert(make_pair(i->first.second, i->second));
5033 }
31f18b77 5034 ENCODE_START(17, 8, bl);
7c673cae
FG
5035 ::encode(soid, bl);
5036 ::encode(myoloc, bl); //Retained for compatibility
5037 ::encode((__u32)0, bl); // was category, no longer used
5038 ::encode(version, bl);
5039 ::encode(prior_version, bl);
5040 ::encode(last_reqid, bl);
5041 ::encode(size, bl);
5042 ::encode(mtime, bl);
5043 if (soid.snap == CEPH_NOSNAP)
5044 ::encode(osd_reqid_t(), bl); // used to be wrlock_by
5045 else
5046 ::encode(legacy_snaps, bl);
5047 ::encode(truncate_seq, bl);
5048 ::encode(truncate_size, bl);
5049 ::encode(is_lost(), bl);
5050 ::encode(old_watchers, bl, features);
5051 /* shenanigans to avoid breaking backwards compatibility in the disk format.
5052 * When we can, switch this out for simply putting the version_t on disk. */
5053 eversion_t user_eversion(0, user_version);
5054 ::encode(user_eversion, bl);
5055 ::encode(test_flag(FLAG_USES_TMAP), bl);
5056 ::encode(watchers, bl, features);
5057 __u32 _flags = flags;
5058 ::encode(_flags, bl);
5059 ::encode(local_mtime, bl);
5060 ::encode(data_digest, bl);
5061 ::encode(omap_digest, bl);
5062 ::encode(expected_object_size, bl);
5063 ::encode(expected_write_size, bl);
5064 ::encode(alloc_hint_flags, bl);
31f18b77
FG
5065 if (has_manifest()) {
5066 ::encode(manifest, bl);
5067 }
7c673cae
FG
5068 ENCODE_FINISH(bl);
5069}
5070
5071void object_info_t::decode(bufferlist::iterator& bl)
5072{
5073 object_locator_t myoloc;
31f18b77 5074 DECODE_START_LEGACY_COMPAT_LEN(17, 8, 8, bl);
7c673cae
FG
5075 map<entity_name_t, watch_info_t> old_watchers;
5076 ::decode(soid, bl);
5077 ::decode(myoloc, bl);
5078 {
5079 string category;
5080 ::decode(category, bl); // no longer used
5081 }
5082 ::decode(version, bl);
5083 ::decode(prior_version, bl);
5084 ::decode(last_reqid, bl);
5085 ::decode(size, bl);
5086 ::decode(mtime, bl);
5087 if (soid.snap == CEPH_NOSNAP) {
5088 osd_reqid_t wrlock_by;
5089 ::decode(wrlock_by, bl);
5090 } else {
5091 ::decode(legacy_snaps, bl);
5092 }
5093 ::decode(truncate_seq, bl);
5094 ::decode(truncate_size, bl);
5095
5096 // if this is struct_v >= 13, we will overwrite this
5097 // below since this field is just here for backwards
5098 // compatibility
5099 __u8 lo;
5100 ::decode(lo, bl);
5101 flags = (flag_t)lo;
5102
5103 ::decode(old_watchers, bl);
5104 eversion_t user_eversion;
5105 ::decode(user_eversion, bl);
5106 user_version = user_eversion.version;
5107
5108 if (struct_v >= 9) {
5109 bool uses_tmap = false;
5110 ::decode(uses_tmap, bl);
5111 if (uses_tmap)
5112 set_flag(FLAG_USES_TMAP);
5113 } else {
5114 set_flag(FLAG_USES_TMAP);
5115 }
5116 if (struct_v < 10)
5117 soid.pool = myoloc.pool;
5118 if (struct_v >= 11) {
5119 ::decode(watchers, bl);
5120 } else {
5121 for (map<entity_name_t, watch_info_t>::iterator i = old_watchers.begin();
5122 i != old_watchers.end();
5123 ++i) {
5124 watchers.insert(
5125 make_pair(
5126 make_pair(i->second.cookie, i->first), i->second));
5127 }
5128 }
5129 if (struct_v >= 13) {
5130 __u32 _flags;
5131 ::decode(_flags, bl);
5132 flags = (flag_t)_flags;
5133 }
5134 if (struct_v >= 14) {
5135 ::decode(local_mtime, bl);
5136 } else {
5137 local_mtime = utime_t();
5138 }
5139 if (struct_v >= 15) {
5140 ::decode(data_digest, bl);
5141 ::decode(omap_digest, bl);
5142 } else {
5143 data_digest = omap_digest = -1;
5144 clear_flag(FLAG_DATA_DIGEST);
5145 clear_flag(FLAG_OMAP_DIGEST);
5146 }
5147 if (struct_v >= 16) {
5148 ::decode(expected_object_size, bl);
5149 ::decode(expected_write_size, bl);
5150 ::decode(alloc_hint_flags, bl);
5151 } else {
5152 expected_object_size = 0;
5153 expected_write_size = 0;
5154 alloc_hint_flags = 0;
5155 }
31f18b77
FG
5156 if (struct_v >= 17) {
5157 if (has_manifest()) {
5158 ::decode(manifest, bl);
5159 }
5160 }
7c673cae
FG
5161 DECODE_FINISH(bl);
5162}
5163
5164void object_info_t::dump(Formatter *f) const
5165{
5166 f->open_object_section("oid");
5167 soid.dump(f);
5168 f->close_section();
5169 f->dump_stream("version") << version;
5170 f->dump_stream("prior_version") << prior_version;
5171 f->dump_stream("last_reqid") << last_reqid;
5172 f->dump_unsigned("user_version", user_version);
5173 f->dump_unsigned("size", size);
5174 f->dump_stream("mtime") << mtime;
5175 f->dump_stream("local_mtime") << local_mtime;
5176 f->dump_unsigned("lost", (int)is_lost());
5177 f->dump_unsigned("flags", (int)flags);
5178 f->open_array_section("legacy_snaps");
5179 for (auto s : legacy_snaps) {
5180 f->dump_unsigned("snap", s);
5181 }
5182 f->close_section();
5183 f->dump_unsigned("truncate_seq", truncate_seq);
5184 f->dump_unsigned("truncate_size", truncate_size);
5185 f->dump_unsigned("data_digest", data_digest);
5186 f->dump_unsigned("omap_digest", omap_digest);
5187 f->dump_unsigned("expected_object_size", expected_object_size);
5188 f->dump_unsigned("expected_write_size", expected_write_size);
5189 f->dump_unsigned("alloc_hint_flags", alloc_hint_flags);
31f18b77 5190 f->dump_object("manifest", manifest);
7c673cae
FG
5191 f->open_object_section("watchers");
5192 for (map<pair<uint64_t, entity_name_t>,watch_info_t>::const_iterator p =
5193 watchers.begin(); p != watchers.end(); ++p) {
5194 stringstream ss;
5195 ss << p->first.second;
5196 f->open_object_section(ss.str().c_str());
5197 p->second.dump(f);
5198 f->close_section();
5199 }
5200 f->close_section();
5201}
5202
5203void object_info_t::generate_test_instances(list<object_info_t*>& o)
5204{
5205 o.push_back(new object_info_t());
5206
5207 // fixme
5208}
5209
5210
5211ostream& operator<<(ostream& out, const object_info_t& oi)
5212{
5213 out << oi.soid << "(" << oi.version
5214 << " " << oi.last_reqid;
5215 if (oi.soid.snap != CEPH_NOSNAP && !oi.legacy_snaps.empty())
5216 out << " " << oi.legacy_snaps;
5217 if (oi.flags)
5218 out << " " << oi.get_flag_string();
5219 out << " s " << oi.size;
5220 out << " uv " << oi.user_version;
5221 if (oi.is_data_digest())
5222 out << " dd " << std::hex << oi.data_digest << std::dec;
5223 if (oi.is_omap_digest())
5224 out << " od " << std::hex << oi.omap_digest << std::dec;
5225 out << " alloc_hint [" << oi.expected_object_size
5226 << " " << oi.expected_write_size
5227 << " " << oi.alloc_hint_flags << "]";
31f18b77
FG
5228 if (oi.has_manifest())
5229 out << " " << oi.manifest;
7c673cae
FG
5230
5231 out << ")";
5232 return out;
5233}
5234
5235// -- ObjectRecovery --
5236void ObjectRecoveryProgress::encode(bufferlist &bl) const
5237{
5238 ENCODE_START(1, 1, bl);
5239 ::encode(first, bl);
5240 ::encode(data_complete, bl);
5241 ::encode(data_recovered_to, bl);
5242 ::encode(omap_recovered_to, bl);
5243 ::encode(omap_complete, bl);
5244 ENCODE_FINISH(bl);
5245}
5246
5247void ObjectRecoveryProgress::decode(bufferlist::iterator &bl)
5248{
5249 DECODE_START(1, bl);
5250 ::decode(first, bl);
5251 ::decode(data_complete, bl);
5252 ::decode(data_recovered_to, bl);
5253 ::decode(omap_recovered_to, bl);
5254 ::decode(omap_complete, bl);
5255 DECODE_FINISH(bl);
5256}
5257
5258ostream &operator<<(ostream &out, const ObjectRecoveryProgress &prog)
5259{
5260 return prog.print(out);
5261}
5262
5263void ObjectRecoveryProgress::generate_test_instances(
5264 list<ObjectRecoveryProgress*>& o)
5265{
5266 o.push_back(new ObjectRecoveryProgress);
5267 o.back()->first = false;
5268 o.back()->data_complete = true;
5269 o.back()->omap_complete = true;
5270 o.back()->data_recovered_to = 100;
5271
5272 o.push_back(new ObjectRecoveryProgress);
5273 o.back()->first = true;
5274 o.back()->data_complete = false;
5275 o.back()->omap_complete = false;
5276 o.back()->data_recovered_to = 0;
5277}
5278
5279ostream &ObjectRecoveryProgress::print(ostream &out) const
5280{
5281 return out << "ObjectRecoveryProgress("
5282 << ( first ? "" : "!" ) << "first, "
5283 << "data_recovered_to:" << data_recovered_to
5284 << ", data_complete:" << ( data_complete ? "true" : "false" )
5285 << ", omap_recovered_to:" << omap_recovered_to
5286 << ", omap_complete:" << ( omap_complete ? "true" : "false" )
224ce89b 5287 << ", error:" << ( error ? "true" : "false" )
7c673cae
FG
5288 << ")";
5289}
5290
5291void ObjectRecoveryProgress::dump(Formatter *f) const
5292{
5293 f->dump_int("first?", first);
5294 f->dump_int("data_complete?", data_complete);
5295 f->dump_unsigned("data_recovered_to", data_recovered_to);
5296 f->dump_int("omap_complete?", omap_complete);
5297 f->dump_string("omap_recovered_to", omap_recovered_to);
5298}
5299
5300void ObjectRecoveryInfo::encode(bufferlist &bl, uint64_t features) const
5301{
5302 ENCODE_START(2, 1, bl);
5303 ::encode(soid, bl);
5304 ::encode(version, bl);
5305 ::encode(size, bl);
5306 ::encode(oi, bl, features);
5307 ::encode(ss, bl);
5308 ::encode(copy_subset, bl);
5309 ::encode(clone_subset, bl);
5310 ENCODE_FINISH(bl);
5311}
5312
5313void ObjectRecoveryInfo::decode(bufferlist::iterator &bl,
5314 int64_t pool)
5315{
5316 DECODE_START(2, bl);
5317 ::decode(soid, bl);
5318 ::decode(version, bl);
5319 ::decode(size, bl);
5320 ::decode(oi, bl);
5321 ::decode(ss, bl);
5322 ::decode(copy_subset, bl);
5323 ::decode(clone_subset, bl);
5324 DECODE_FINISH(bl);
5325
5326 if (struct_v < 2) {
5327 if (!soid.is_max() && soid.pool == -1)
5328 soid.pool = pool;
5329 map<hobject_t, interval_set<uint64_t>> tmp;
5330 tmp.swap(clone_subset);
5331 for (map<hobject_t, interval_set<uint64_t>>::iterator i = tmp.begin();
5332 i != tmp.end();
5333 ++i) {
5334 hobject_t first(i->first);
5335 if (!first.is_max() && first.pool == -1)
5336 first.pool = pool;
5337 clone_subset[first].swap(i->second);
5338 }
5339 }
5340}
5341
5342void ObjectRecoveryInfo::generate_test_instances(
5343 list<ObjectRecoveryInfo*>& o)
5344{
5345 o.push_back(new ObjectRecoveryInfo);
5346 o.back()->soid = hobject_t(sobject_t("key", CEPH_NOSNAP));
5347 o.back()->version = eversion_t(0,0);
5348 o.back()->size = 100;
5349}
5350
5351
5352void ObjectRecoveryInfo::dump(Formatter *f) const
5353{
5354 f->dump_stream("object") << soid;
5355 f->dump_stream("at_version") << version;
5356 f->dump_stream("size") << size;
5357 {
5358 f->open_object_section("object_info");
5359 oi.dump(f);
5360 f->close_section();
5361 }
5362 {
5363 f->open_object_section("snapset");
5364 ss.dump(f);
5365 f->close_section();
5366 }
5367 f->dump_stream("copy_subset") << copy_subset;
5368 f->dump_stream("clone_subset") << clone_subset;
5369}
5370
5371ostream& operator<<(ostream& out, const ObjectRecoveryInfo &inf)
5372{
5373 return inf.print(out);
5374}
5375
5376ostream &ObjectRecoveryInfo::print(ostream &out) const
5377{
5378 return out << "ObjectRecoveryInfo("
5379 << soid << "@" << version
5380 << ", size: " << size
5381 << ", copy_subset: " << copy_subset
5382 << ", clone_subset: " << clone_subset
5383 << ", snapset: " << ss
5384 << ")";
5385}
5386
5387// -- PushReplyOp --
5388void PushReplyOp::generate_test_instances(list<PushReplyOp*> &o)
5389{
5390 o.push_back(new PushReplyOp);
5391 o.push_back(new PushReplyOp);
5392 o.back()->soid = hobject_t(sobject_t("asdf", 2));
5393 o.push_back(new PushReplyOp);
5394 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
5395}
5396
5397void PushReplyOp::encode(bufferlist &bl) const
5398{
5399 ENCODE_START(1, 1, bl);
5400 ::encode(soid, bl);
5401 ENCODE_FINISH(bl);
5402}
5403
5404void PushReplyOp::decode(bufferlist::iterator &bl)
5405{
5406 DECODE_START(1, bl);
5407 ::decode(soid, bl);
5408 DECODE_FINISH(bl);
5409}
5410
5411void PushReplyOp::dump(Formatter *f) const
5412{
5413 f->dump_stream("soid") << soid;
5414}
5415
5416ostream &PushReplyOp::print(ostream &out) const
5417{
5418 return out
5419 << "PushReplyOp(" << soid
5420 << ")";
5421}
5422
5423ostream& operator<<(ostream& out, const PushReplyOp &op)
5424{
5425 return op.print(out);
5426}
5427
5428uint64_t PushReplyOp::cost(CephContext *cct) const
5429{
5430
5431 return cct->_conf->osd_push_per_object_cost +
5432 cct->_conf->osd_recovery_max_chunk;
5433}
5434
5435// -- PullOp --
5436void PullOp::generate_test_instances(list<PullOp*> &o)
5437{
5438 o.push_back(new PullOp);
5439 o.push_back(new PullOp);
5440 o.back()->soid = hobject_t(sobject_t("asdf", 2));
5441 o.back()->recovery_info.version = eversion_t(3, 10);
5442 o.push_back(new PullOp);
5443 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
5444 o.back()->recovery_info.version = eversion_t(0, 0);
5445}
5446
5447void PullOp::encode(bufferlist &bl, uint64_t features) const
5448{
5449 ENCODE_START(1, 1, bl);
5450 ::encode(soid, bl);
5451 ::encode(recovery_info, bl, features);
5452 ::encode(recovery_progress, bl);
5453 ENCODE_FINISH(bl);
5454}
5455
5456void PullOp::decode(bufferlist::iterator &bl)
5457{
5458 DECODE_START(1, bl);
5459 ::decode(soid, bl);
5460 ::decode(recovery_info, bl);
5461 ::decode(recovery_progress, bl);
5462 DECODE_FINISH(bl);
5463}
5464
5465void PullOp::dump(Formatter *f) const
5466{
5467 f->dump_stream("soid") << soid;
5468 {
5469 f->open_object_section("recovery_info");
5470 recovery_info.dump(f);
5471 f->close_section();
5472 }
5473 {
5474 f->open_object_section("recovery_progress");
5475 recovery_progress.dump(f);
5476 f->close_section();
5477 }
5478}
5479
5480ostream &PullOp::print(ostream &out) const
5481{
5482 return out
5483 << "PullOp(" << soid
5484 << ", recovery_info: " << recovery_info
5485 << ", recovery_progress: " << recovery_progress
5486 << ")";
5487}
5488
5489ostream& operator<<(ostream& out, const PullOp &op)
5490{
5491 return op.print(out);
5492}
5493
5494uint64_t PullOp::cost(CephContext *cct) const
5495{
5496 return cct->_conf->osd_push_per_object_cost +
5497 cct->_conf->osd_recovery_max_chunk;
5498}
5499
5500// -- PushOp --
5501void PushOp::generate_test_instances(list<PushOp*> &o)
5502{
5503 o.push_back(new PushOp);
5504 o.push_back(new PushOp);
5505 o.back()->soid = hobject_t(sobject_t("asdf", 2));
5506 o.back()->version = eversion_t(3, 10);
5507 o.push_back(new PushOp);
5508 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
5509 o.back()->version = eversion_t(0, 0);
5510}
5511
5512void PushOp::encode(bufferlist &bl, uint64_t features) const
5513{
5514 ENCODE_START(1, 1, bl);
5515 ::encode(soid, bl);
5516 ::encode(version, bl);
5517 ::encode(data, bl);
5518 ::encode(data_included, bl);
5519 ::encode(omap_header, bl);
5520 ::encode(omap_entries, bl);
5521 ::encode(attrset, bl);
5522 ::encode(recovery_info, bl, features);
5523 ::encode(after_progress, bl);
5524 ::encode(before_progress, bl);
5525 ENCODE_FINISH(bl);
5526}
5527
5528void PushOp::decode(bufferlist::iterator &bl)
5529{
5530 DECODE_START(1, bl);
5531 ::decode(soid, bl);
5532 ::decode(version, bl);
5533 ::decode(data, bl);
5534 ::decode(data_included, bl);
5535 ::decode(omap_header, bl);
5536 ::decode(omap_entries, bl);
5537 ::decode(attrset, bl);
5538 ::decode(recovery_info, bl);
5539 ::decode(after_progress, bl);
5540 ::decode(before_progress, bl);
5541 DECODE_FINISH(bl);
5542}
5543
5544void PushOp::dump(Formatter *f) const
5545{
5546 f->dump_stream("soid") << soid;
5547 f->dump_stream("version") << version;
5548 f->dump_int("data_len", data.length());
5549 f->dump_stream("data_included") << data_included;
5550 f->dump_int("omap_header_len", omap_header.length());
5551 f->dump_int("omap_entries_len", omap_entries.size());
5552 f->dump_int("attrset_len", attrset.size());
5553 {
5554 f->open_object_section("recovery_info");
5555 recovery_info.dump(f);
5556 f->close_section();
5557 }
5558 {
5559 f->open_object_section("after_progress");
5560 after_progress.dump(f);
5561 f->close_section();
5562 }
5563 {
5564 f->open_object_section("before_progress");
5565 before_progress.dump(f);
5566 f->close_section();
5567 }
5568}
5569
5570ostream &PushOp::print(ostream &out) const
5571{
5572 return out
5573 << "PushOp(" << soid
5574 << ", version: " << version
5575 << ", data_included: " << data_included
5576 << ", data_size: " << data.length()
5577 << ", omap_header_size: " << omap_header.length()
5578 << ", omap_entries_size: " << omap_entries.size()
5579 << ", attrset_size: " << attrset.size()
5580 << ", recovery_info: " << recovery_info
5581 << ", after_progress: " << after_progress
5582 << ", before_progress: " << before_progress
5583 << ")";
5584}
5585
5586ostream& operator<<(ostream& out, const PushOp &op)
5587{
5588 return op.print(out);
5589}
5590
5591uint64_t PushOp::cost(CephContext *cct) const
5592{
5593 uint64_t cost = data_included.size();
5594 for (map<string, bufferlist>::const_iterator i =
5595 omap_entries.begin();
5596 i != omap_entries.end();
5597 ++i) {
5598 cost += i->second.length();
5599 }
5600 cost += cct->_conf->osd_push_per_object_cost;
5601 return cost;
5602}
5603
5604// -- ScrubMap --
5605
5606void ScrubMap::merge_incr(const ScrubMap &l)
5607{
5608 assert(valid_through == l.incr_since);
5609 valid_through = l.valid_through;
5610
5611 for (map<hobject_t,object>::const_iterator p = l.objects.begin();
5612 p != l.objects.end();
5613 ++p){
5614 if (p->second.negative) {
5615 map<hobject_t,object>::iterator q = objects.find(p->first);
5616 if (q != objects.end()) {
5617 objects.erase(q);
5618 }
5619 } else {
5620 objects[p->first] = p->second;
5621 }
5622 }
5623}
5624
5625void ScrubMap::encode(bufferlist& bl) const
5626{
5627 ENCODE_START(3, 2, bl);
5628 ::encode(objects, bl);
5629 ::encode((__u32)0, bl); // used to be attrs; now deprecated
5630 bufferlist old_logbl; // not used
5631 ::encode(old_logbl, bl);
5632 ::encode(valid_through, bl);
5633 ::encode(incr_since, bl);
5634 ENCODE_FINISH(bl);
5635}
5636
5637void ScrubMap::decode(bufferlist::iterator& bl, int64_t pool)
5638{
5639 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
5640 ::decode(objects, bl);
5641 {
5642 map<string,string> attrs; // deprecated
5643 ::decode(attrs, bl);
5644 }
5645 bufferlist old_logbl; // not used
5646 ::decode(old_logbl, bl);
5647 ::decode(valid_through, bl);
5648 ::decode(incr_since, bl);
5649 DECODE_FINISH(bl);
5650
5651 // handle hobject_t upgrade
5652 if (struct_v < 3) {
5653 map<hobject_t, object> tmp;
5654 tmp.swap(objects);
5655 for (map<hobject_t, object>::iterator i = tmp.begin();
5656 i != tmp.end();
5657 ++i) {
5658 hobject_t first(i->first);
5659 if (!first.is_max() && first.pool == -1)
5660 first.pool = pool;
5661 objects[first] = i->second;
5662 }
5663 }
5664}
5665
5666void ScrubMap::dump(Formatter *f) const
5667{
5668 f->dump_stream("valid_through") << valid_through;
5669 f->dump_stream("incremental_since") << incr_since;
5670 f->open_array_section("objects");
5671 for (map<hobject_t,object>::const_iterator p = objects.begin(); p != objects.end(); ++p) {
5672 f->open_object_section("object");
5673 f->dump_string("name", p->first.oid.name);
5674 f->dump_unsigned("hash", p->first.get_hash());
5675 f->dump_string("key", p->first.get_key());
5676 f->dump_int("snapid", p->first.snap);
5677 p->second.dump(f);
5678 f->close_section();
5679 }
5680 f->close_section();
5681}
5682
5683void ScrubMap::generate_test_instances(list<ScrubMap*>& o)
5684{
5685 o.push_back(new ScrubMap);
5686 o.push_back(new ScrubMap);
5687 o.back()->valid_through = eversion_t(1, 2);
5688 o.back()->incr_since = eversion_t(3, 4);
5689 list<object*> obj;
5690 object::generate_test_instances(obj);
5691 o.back()->objects[hobject_t(object_t("foo"), "fookey", 123, 456, 0, "")] = *obj.back();
5692 obj.pop_back();
5693 o.back()->objects[hobject_t(object_t("bar"), string(), 123, 456, 0, "")] = *obj.back();
5694}
5695
5696// -- ScrubMap::object --
5697
5698void ScrubMap::object::encode(bufferlist& bl) const
5699{
5700 bool compat_read_error = read_error || ec_hash_mismatch || ec_size_mismatch;
5701 ENCODE_START(8, 7, bl);
5702 ::encode(size, bl);
5703 ::encode(negative, bl);
5704 ::encode(attrs, bl);
5705 ::encode(digest, bl);
5706 ::encode(digest_present, bl);
5707 ::encode((uint32_t)0, bl); // obsolete nlinks
5708 ::encode((uint32_t)0, bl); // snapcolls
5709 ::encode(omap_digest, bl);
5710 ::encode(omap_digest_present, bl);
5711 ::encode(compat_read_error, bl);
5712 ::encode(stat_error, bl);
5713 ::encode(read_error, bl);
5714 ::encode(ec_hash_mismatch, bl);
5715 ::encode(ec_size_mismatch, bl);
5716 ENCODE_FINISH(bl);
5717}
5718
5719void ScrubMap::object::decode(bufferlist::iterator& bl)
5720{
5721 DECODE_START(8, bl);
5722 ::decode(size, bl);
5723 bool tmp, compat_read_error = false;
5724 ::decode(tmp, bl);
5725 negative = tmp;
5726 ::decode(attrs, bl);
5727 ::decode(digest, bl);
5728 ::decode(tmp, bl);
5729 digest_present = tmp;
5730 {
5731 uint32_t nlinks;
5732 ::decode(nlinks, bl);
5733 set<snapid_t> snapcolls;
5734 ::decode(snapcolls, bl);
5735 }
5736 ::decode(omap_digest, bl);
5737 ::decode(tmp, bl);
5738 omap_digest_present = tmp;
5739 ::decode(compat_read_error, bl);
5740 ::decode(tmp, bl);
5741 stat_error = tmp;
5742 if (struct_v >= 8) {
5743 ::decode(tmp, bl);
5744 read_error = tmp;
5745 ::decode(tmp, bl);
5746 ec_hash_mismatch = tmp;
5747 ::decode(tmp, bl);
5748 ec_size_mismatch = tmp;
5749 }
5750 // If older encoder found a read_error, set read_error
5751 if (compat_read_error && !read_error && !ec_hash_mismatch && !ec_size_mismatch)
5752 read_error = true;
5753 DECODE_FINISH(bl);
5754}
5755
5756void ScrubMap::object::dump(Formatter *f) const
5757{
5758 f->dump_int("size", size);
5759 f->dump_int("negative", negative);
5760 f->open_array_section("attrs");
5761 for (map<string,bufferptr>::const_iterator p = attrs.begin(); p != attrs.end(); ++p) {
5762 f->open_object_section("attr");
5763 f->dump_string("name", p->first);
5764 f->dump_int("length", p->second.length());
5765 f->close_section();
5766 }
5767 f->close_section();
5768}
5769
5770void ScrubMap::object::generate_test_instances(list<object*>& o)
5771{
5772 o.push_back(new object);
5773 o.push_back(new object);
5774 o.back()->negative = true;
5775 o.push_back(new object);
5776 o.back()->size = 123;
5777 o.back()->attrs["foo"] = buffer::copy("foo", 3);
5778 o.back()->attrs["bar"] = buffer::copy("barval", 6);
5779}
5780
5781// -- OSDOp --
5782
5783ostream& operator<<(ostream& out, const OSDOp& op)
5784{
5785 out << ceph_osd_op_name(op.op.op);
5786 if (ceph_osd_op_type_data(op.op.op)) {
5787 // data extent
5788 switch (op.op.op) {
5789 case CEPH_OSD_OP_ASSERT_VER:
5790 out << " v" << op.op.assert_ver.ver;
5791 break;
5792 case CEPH_OSD_OP_TRUNCATE:
5793 out << " " << op.op.extent.offset;
5794 break;
5795 case CEPH_OSD_OP_MASKTRUNC:
5796 case CEPH_OSD_OP_TRIMTRUNC:
5797 out << " " << op.op.extent.truncate_seq << "@"
5798 << (int64_t)op.op.extent.truncate_size;
5799 break;
5800 case CEPH_OSD_OP_ROLLBACK:
5801 out << " " << snapid_t(op.op.snap.snapid);
5802 break;
5803 case CEPH_OSD_OP_WATCH:
5804 out << " " << ceph_osd_watch_op_name(op.op.watch.op)
5805 << " cookie " << op.op.watch.cookie;
5806 if (op.op.watch.gen)
5807 out << " gen " << op.op.watch.gen;
5808 break;
5809 case CEPH_OSD_OP_NOTIFY:
5810 case CEPH_OSD_OP_NOTIFY_ACK:
5811 out << " cookie " << op.op.notify.cookie;
5812 break;
5813 case CEPH_OSD_OP_COPY_GET:
5814 out << " max " << op.op.copy_get.max;
5815 break;
5816 case CEPH_OSD_OP_COPY_FROM:
5817 out << " ver " << op.op.copy_from.src_version;
5818 break;
5819 case CEPH_OSD_OP_SETALLOCHINT:
5820 out << " object_size " << op.op.alloc_hint.expected_object_size
5821 << " write_size " << op.op.alloc_hint.expected_write_size;
5822 break;
5823 case CEPH_OSD_OP_READ:
5824 case CEPH_OSD_OP_SPARSE_READ:
5825 case CEPH_OSD_OP_SYNC_READ:
5826 case CEPH_OSD_OP_WRITE:
5827 case CEPH_OSD_OP_WRITEFULL:
5828 case CEPH_OSD_OP_ZERO:
5829 case CEPH_OSD_OP_APPEND:
5830 case CEPH_OSD_OP_MAPEXT:
5831 out << " " << op.op.extent.offset << "~" << op.op.extent.length;
5832 if (op.op.extent.truncate_seq)
5833 out << " [" << op.op.extent.truncate_seq << "@"
5834 << (int64_t)op.op.extent.truncate_size << "]";
5835 if (op.op.flags)
5836 out << " [" << ceph_osd_op_flag_string(op.op.flags) << "]";
5837 default:
5838 // don't show any arg info
5839 break;
5840 }
5841 } else if (ceph_osd_op_type_attr(op.op.op)) {
5842 // xattr name
5843 if (op.op.xattr.name_len && op.indata.length()) {
5844 out << " ";
5845 op.indata.write(0, op.op.xattr.name_len, out);
5846 }
5847 if (op.op.xattr.value_len)
5848 out << " (" << op.op.xattr.value_len << ")";
5849 if (op.op.op == CEPH_OSD_OP_CMPXATTR)
5850 out << " op " << (int)op.op.xattr.cmp_op
5851 << " mode " << (int)op.op.xattr.cmp_mode;
5852 } else if (ceph_osd_op_type_exec(op.op.op)) {
5853 // class.method
5854 if (op.op.cls.class_len && op.indata.length()) {
5855 out << " ";
5856 op.indata.write(0, op.op.cls.class_len, out);
5857 out << ".";
5858 op.indata.write(op.op.cls.class_len, op.op.cls.method_len, out);
5859 }
5860 } else if (ceph_osd_op_type_pg(op.op.op)) {
5861 switch (op.op.op) {
5862 case CEPH_OSD_OP_PGLS:
5863 case CEPH_OSD_OP_PGLS_FILTER:
5864 case CEPH_OSD_OP_PGNLS:
5865 case CEPH_OSD_OP_PGNLS_FILTER:
5866 out << " start_epoch " << op.op.pgls.start_epoch;
5867 break;
5868 case CEPH_OSD_OP_PG_HITSET_LS:
5869 break;
5870 case CEPH_OSD_OP_PG_HITSET_GET:
5871 out << " " << utime_t(op.op.hit_set_get.stamp);
5872 break;
5873 case CEPH_OSD_OP_SCRUBLS:
5874 break;
5875 }
5876 }
5877 return out;
5878}
5879
5880
5881void OSDOp::split_osd_op_vector_in_data(vector<OSDOp>& ops, bufferlist& in)
5882{
5883 bufferlist::iterator datap = in.begin();
5884 for (unsigned i = 0; i < ops.size(); i++) {
5885 if (ops[i].op.payload_len) {
5886 datap.copy(ops[i].op.payload_len, ops[i].indata);
5887 }
5888 }
5889}
5890
5891void OSDOp::merge_osd_op_vector_in_data(vector<OSDOp>& ops, bufferlist& out)
5892{
5893 for (unsigned i = 0; i < ops.size(); i++) {
5894 if (ops[i].indata.length()) {
5895 ops[i].op.payload_len = ops[i].indata.length();
5896 out.append(ops[i].indata);
5897 }
5898 }
5899}
5900
5901void OSDOp::split_osd_op_vector_out_data(vector<OSDOp>& ops, bufferlist& in)
5902{
5903 bufferlist::iterator datap = in.begin();
5904 for (unsigned i = 0; i < ops.size(); i++) {
5905 if (ops[i].op.payload_len) {
5906 datap.copy(ops[i].op.payload_len, ops[i].outdata);
5907 }
5908 }
5909}
5910
5911void OSDOp::merge_osd_op_vector_out_data(vector<OSDOp>& ops, bufferlist& out)
5912{
5913 for (unsigned i = 0; i < ops.size(); i++) {
5914 if (ops[i].outdata.length()) {
5915 ops[i].op.payload_len = ops[i].outdata.length();
5916 out.append(ops[i].outdata);
5917 }
5918 }
5919}
5920
5921bool store_statfs_t::operator==(const store_statfs_t& other) const
5922{
5923 return total == other.total
5924 && available == other.available
5925 && allocated == other.allocated
5926 && stored == other.stored
5927 && compressed == other.compressed
5928 && compressed_allocated == other.compressed_allocated
5929 && compressed_original == other.compressed_original;
5930}
5931
5932void store_statfs_t::dump(Formatter *f) const
5933{
5934 f->dump_int("total", total);
5935 f->dump_int("available", available);
5936 f->dump_int("allocated", allocated);
5937 f->dump_int("stored", stored);
5938 f->dump_int("compressed", compressed);
5939 f->dump_int("compressed_allocated", compressed_allocated);
5940 f->dump_int("compressed_original", compressed_original);
5941}
5942
5943ostream& operator<<(ostream& out, const store_statfs_t &s)
5944{
5945 out << std::hex
5946 << "store_statfs(0x" << s.available
5947 << "/0x" << s.total
5948 << ", stored 0x" << s.stored
5949 << "/0x" << s.allocated
5950 << ", compress 0x" << s.compressed
5951 << "/0x" << s.compressed_allocated
5952 << "/0x" << s.compressed_original
5953 << std::dec
5954 << ")";
5955 return out;
5956}
224ce89b
WB
5957
5958void OSDOp::clear_data(vector<OSDOp>& ops)
5959{
5960 for (unsigned i = 0; i < ops.size(); i++) {
5961 OSDOp& op = ops[i];
5962 op.outdata.clear();
5963 if (ceph_osd_op_type_attr(op.op.op) &&
5964 op.op.xattr.name_len &&
5965 op.indata.length() >= op.op.xattr.name_len) {
5966 bufferptr bp(op.op.xattr.name_len);
5967 bufferlist bl;
5968 bl.append(bp);
5969 bl.copy_in(0, op.op.xattr.name_len, op.indata);
5970 op.indata.claim(bl);
5971 } else if (ceph_osd_op_type_exec(op.op.op) &&
5972 op.op.cls.class_len &&
5973 op.indata.length() >
5974 (op.op.cls.class_len + op.op.cls.method_len)) {
5975 __u8 len = op.op.cls.class_len + op.op.cls.method_len;
5976 bufferptr bp(len);
5977 bufferlist bl;
5978 bl.append(bp);
5979 bl.copy_in(0, len, op.indata);
5980 op.indata.claim(bl);
5981 } else {
5982 op.indata.clear();
5983 }
5984 }
5985}
5986