]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/osd_types.cc
import ceph nautilus 14.2.2
[ceph.git] / ceph / src / osd / osd_types.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2011 New Dream Network
7 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License version 2.1, as published by the Free Software
14 * Foundation. See file COPYING.
15 *
16 */
17
18 #include <boost/assign/list_of.hpp>
19
20 #include "osd_types.h"
21 #include "include/ceph_features.h"
22 #include "include/stringify.h"
23 extern "C" {
24 #include "crush/hash.h"
25 }
26 #include "OSDMap.h"
27
28 const char *ceph_osd_flag_name(unsigned flag)
29 {
30 switch (flag) {
31 case CEPH_OSD_FLAG_ACK: return "ack";
32 case CEPH_OSD_FLAG_ONNVRAM: return "onnvram";
33 case CEPH_OSD_FLAG_ONDISK: return "ondisk";
34 case CEPH_OSD_FLAG_RETRY: return "retry";
35 case CEPH_OSD_FLAG_READ: return "read";
36 case CEPH_OSD_FLAG_WRITE: return "write";
37 case CEPH_OSD_FLAG_ORDERSNAP: return "ordersnap";
38 case CEPH_OSD_FLAG_PEERSTAT_OLD: return "peerstat_old";
39 case CEPH_OSD_FLAG_BALANCE_READS: return "balance_reads";
40 case CEPH_OSD_FLAG_PARALLELEXEC: return "parallelexec";
41 case CEPH_OSD_FLAG_PGOP: return "pgop";
42 case CEPH_OSD_FLAG_EXEC: return "exec";
43 case CEPH_OSD_FLAG_EXEC_PUBLIC: return "exec_public";
44 case CEPH_OSD_FLAG_LOCALIZE_READS: return "localize_reads";
45 case CEPH_OSD_FLAG_RWORDERED: return "rwordered";
46 case CEPH_OSD_FLAG_IGNORE_CACHE: return "ignore_cache";
47 case CEPH_OSD_FLAG_SKIPRWLOCKS: return "skiprwlocks";
48 case CEPH_OSD_FLAG_IGNORE_OVERLAY: return "ignore_overlay";
49 case CEPH_OSD_FLAG_FLUSH: return "flush";
50 case CEPH_OSD_FLAG_MAP_SNAP_CLONE: return "map_snap_clone";
51 case CEPH_OSD_FLAG_ENFORCE_SNAPC: return "enforce_snapc";
52 case CEPH_OSD_FLAG_REDIRECTED: return "redirected";
53 case CEPH_OSD_FLAG_KNOWN_REDIR: return "known_if_redirected";
54 case CEPH_OSD_FLAG_FULL_TRY: return "full_try";
55 case CEPH_OSD_FLAG_FULL_FORCE: return "full_force";
56 case CEPH_OSD_FLAG_IGNORE_REDIRECT: return "ignore_redirect";
57 default: return "???";
58 }
59 }
60
61 string ceph_osd_flag_string(unsigned flags)
62 {
63 string s;
64 for (unsigned i=0; i<32; ++i) {
65 if (flags & (1u<<i)) {
66 if (s.length())
67 s += "+";
68 s += ceph_osd_flag_name(1u << i);
69 }
70 }
71 if (s.length())
72 return s;
73 return string("-");
74 }
75
76 const char * ceph_osd_op_flag_name(unsigned flag)
77 {
78 const char *name;
79
80 switch(flag) {
81 case CEPH_OSD_OP_FLAG_EXCL:
82 name = "excl";
83 break;
84 case CEPH_OSD_OP_FLAG_FAILOK:
85 name = "failok";
86 break;
87 case CEPH_OSD_OP_FLAG_FADVISE_RANDOM:
88 name = "fadvise_random";
89 break;
90 case CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL:
91 name = "fadvise_sequential";
92 break;
93 case CEPH_OSD_OP_FLAG_FADVISE_WILLNEED:
94 name = "favise_willneed";
95 break;
96 case CEPH_OSD_OP_FLAG_FADVISE_DONTNEED:
97 name = "fadvise_dontneed";
98 break;
99 case CEPH_OSD_OP_FLAG_FADVISE_NOCACHE:
100 name = "fadvise_nocache";
101 break;
102 case CEPH_OSD_OP_FLAG_WITH_REFERENCE:
103 name = "with_reference";
104 break;
105 case CEPH_OSD_OP_FLAG_BYPASS_CLEAN_CACHE:
106 name = "bypass_clean_cache";
107 break;
108 default:
109 name = "???";
110 };
111
112 return name;
113 }
114
115 string ceph_osd_op_flag_string(unsigned flags)
116 {
117 string s;
118 for (unsigned i=0; i<32; ++i) {
119 if (flags & (1u<<i)) {
120 if (s.length())
121 s += "+";
122 s += ceph_osd_op_flag_name(1u << i);
123 }
124 }
125 if (s.length())
126 return s;
127 return string("-");
128 }
129
130 string ceph_osd_alloc_hint_flag_string(unsigned flags)
131 {
132 string s;
133 for (unsigned i=0; i<32; ++i) {
134 if (flags & (1u<<i)) {
135 if (s.length())
136 s += "+";
137 s += ceph_osd_alloc_hint_flag_name(1u << i);
138 }
139 }
140 if (s.length())
141 return s;
142 return string("-");
143 }
144
145 void pg_shard_t::encode(bufferlist &bl) const
146 {
147 ENCODE_START(1, 1, bl);
148 encode(osd, bl);
149 encode(shard, bl);
150 ENCODE_FINISH(bl);
151 }
152 void pg_shard_t::decode(bufferlist::const_iterator &bl)
153 {
154 DECODE_START(1, bl);
155 decode(osd, bl);
156 decode(shard, bl);
157 DECODE_FINISH(bl);
158 }
159
160 ostream &operator<<(ostream &lhs, const pg_shard_t &rhs)
161 {
162 if (rhs.is_undefined())
163 return lhs << "?";
164 if (rhs.shard == shard_id_t::NO_SHARD)
165 return lhs << rhs.get_osd();
166 return lhs << rhs.get_osd() << '(' << (unsigned)(rhs.shard) << ')';
167 }
168
169 void dump(Formatter* f, const osd_alerts_t& alerts)
170 {
171 for (auto& a : alerts) {
172 string s0 = " osd: ";
173 s0 += stringify(a.first);
174 string s;
175 for (auto& aa : a.second) {
176 s = s0;
177 s += " ";
178 s += aa.first;
179 s += ":";
180 s += aa.second;
181 f->dump_string("alert", s);
182 }
183 }
184 }
185
186 // -- osd_reqid_t --
187 void osd_reqid_t::dump(Formatter *f) const
188 {
189 f->dump_stream("name") << name;
190 f->dump_int("inc", inc);
191 f->dump_unsigned("tid", tid);
192 }
193
194 void osd_reqid_t::generate_test_instances(list<osd_reqid_t*>& o)
195 {
196 o.push_back(new osd_reqid_t);
197 o.push_back(new osd_reqid_t(entity_name_t::CLIENT(123), 1, 45678));
198 }
199
200 // -- object_locator_t --
201
202 void object_locator_t::encode(bufferlist& bl) const
203 {
204 // verify that nobody's corrupted the locator
205 ceph_assert(hash == -1 || key.empty());
206 __u8 encode_compat = 3;
207 ENCODE_START(6, encode_compat, bl);
208 encode(pool, bl);
209 int32_t preferred = -1; // tell old code there is no preferred osd (-1).
210 encode(preferred, bl);
211 encode(key, bl);
212 encode(nspace, bl);
213 encode(hash, bl);
214 if (hash != -1)
215 encode_compat = std::max<std::uint8_t>(encode_compat, 6); // need to interpret the hash
216 ENCODE_FINISH_NEW_COMPAT(bl, encode_compat);
217 }
218
219 void object_locator_t::decode(bufferlist::const_iterator& p)
220 {
221 DECODE_START_LEGACY_COMPAT_LEN(6, 3, 3, p);
222 if (struct_v < 2) {
223 int32_t op;
224 decode(op, p);
225 pool = op;
226 int16_t pref;
227 decode(pref, p);
228 } else {
229 decode(pool, p);
230 int32_t preferred;
231 decode(preferred, p);
232 }
233 decode(key, p);
234 if (struct_v >= 5)
235 decode(nspace, p);
236 if (struct_v >= 6)
237 decode(hash, p);
238 else
239 hash = -1;
240 DECODE_FINISH(p);
241 // verify that nobody's corrupted the locator
242 ceph_assert(hash == -1 || key.empty());
243 }
244
245 void object_locator_t::dump(Formatter *f) const
246 {
247 f->dump_int("pool", pool);
248 f->dump_string("key", key);
249 f->dump_string("namespace", nspace);
250 f->dump_int("hash", hash);
251 }
252
253 void object_locator_t::generate_test_instances(list<object_locator_t*>& o)
254 {
255 o.push_back(new object_locator_t);
256 o.push_back(new object_locator_t(123));
257 o.push_back(new object_locator_t(123, 876));
258 o.push_back(new object_locator_t(1, "n2"));
259 o.push_back(new object_locator_t(1234, "", "key"));
260 o.push_back(new object_locator_t(12, "n1", "key2"));
261 }
262
263 // -- request_redirect_t --
264 void request_redirect_t::encode(bufferlist& bl) const
265 {
266 ENCODE_START(1, 1, bl);
267 encode(redirect_locator, bl);
268 encode(redirect_object, bl);
269 // legacy of the removed osd_instructions member
270 encode((uint32_t)0, bl);
271 ENCODE_FINISH(bl);
272 }
273
274 void request_redirect_t::decode(bufferlist::const_iterator& bl)
275 {
276 DECODE_START(1, bl);
277 uint32_t legacy_osd_instructions_len;
278 decode(redirect_locator, bl);
279 decode(redirect_object, bl);
280 decode(legacy_osd_instructions_len, bl);
281 if (legacy_osd_instructions_len) {
282 bl.advance(legacy_osd_instructions_len);
283 }
284 DECODE_FINISH(bl);
285 }
286
287 void request_redirect_t::dump(Formatter *f) const
288 {
289 f->dump_string("object", redirect_object);
290 f->open_object_section("locator");
291 redirect_locator.dump(f);
292 f->close_section(); // locator
293 }
294
295 void request_redirect_t::generate_test_instances(list<request_redirect_t*>& o)
296 {
297 object_locator_t loc(1, "redir_obj");
298 o.push_back(new request_redirect_t());
299 o.push_back(new request_redirect_t(loc, 0));
300 o.push_back(new request_redirect_t(loc, "redir_obj"));
301 o.push_back(new request_redirect_t(loc));
302 }
303
304 void objectstore_perf_stat_t::dump(Formatter *f) const
305 {
306 // *_ms values just for compatibility.
307 f->dump_float("commit_latency_ms", os_commit_latency_ns / 1000000.0);
308 f->dump_float("apply_latency_ms", os_apply_latency_ns / 1000000.0);
309 f->dump_unsigned("commit_latency_ns", os_commit_latency_ns);
310 f->dump_unsigned("apply_latency_ns", os_apply_latency_ns);
311 }
312
313 void objectstore_perf_stat_t::encode(bufferlist &bl, uint64_t features) const
314 {
315 uint8_t target_v = 2;
316 if (!HAVE_FEATURE(features, OS_PERF_STAT_NS)) {
317 target_v = 1;
318 }
319 ENCODE_START(target_v, target_v, bl);
320 if (target_v >= 2) {
321 encode(os_commit_latency_ns, bl);
322 encode(os_apply_latency_ns, bl);
323 } else {
324 constexpr auto NS_PER_MS = std::chrono::nanoseconds(1ms).count();
325 uint32_t commit_latency_ms = os_commit_latency_ns / NS_PER_MS;
326 uint32_t apply_latency_ms = os_apply_latency_ns / NS_PER_MS;
327 encode(commit_latency_ms, bl); // for compatibility with older monitor.
328 encode(apply_latency_ms, bl); // for compatibility with older monitor.
329 }
330 ENCODE_FINISH(bl);
331 }
332
333 void objectstore_perf_stat_t::decode(bufferlist::const_iterator &bl)
334 {
335 DECODE_START(2, bl);
336 if (struct_v >= 2) {
337 decode(os_commit_latency_ns, bl);
338 decode(os_apply_latency_ns, bl);
339 } else {
340 uint32_t commit_latency_ms;
341 uint32_t apply_latency_ms;
342 decode(commit_latency_ms, bl);
343 decode(apply_latency_ms, bl);
344 constexpr auto NS_PER_MS = std::chrono::nanoseconds(1ms).count();
345 os_commit_latency_ns = commit_latency_ms * NS_PER_MS;
346 os_apply_latency_ns = apply_latency_ms * NS_PER_MS;
347 }
348 DECODE_FINISH(bl);
349 }
350
351 void objectstore_perf_stat_t::generate_test_instances(std::list<objectstore_perf_stat_t*>& o)
352 {
353 o.push_back(new objectstore_perf_stat_t());
354 o.push_back(new objectstore_perf_stat_t());
355 o.back()->os_commit_latency_ns = 20000000;
356 o.back()->os_apply_latency_ns = 30000000;
357 }
358
359 // -- osd_stat_t --
360 void osd_stat_t::dump(Formatter *f) const
361 {
362 f->dump_unsigned("up_from", up_from);
363 f->dump_unsigned("seq", seq);
364 f->dump_unsigned("num_pgs", num_pgs);
365 f->dump_unsigned("num_osds", num_osds);
366 f->dump_unsigned("num_per_pool_osds", num_per_pool_osds);
367
368 /// dump legacy stats fields to ensure backward compatibility.
369 f->dump_unsigned("kb", statfs.kb());
370 f->dump_unsigned("kb_used", statfs.kb_used_raw());
371 f->dump_unsigned("kb_used_data", statfs.kb_used_data());
372 f->dump_unsigned("kb_used_omap", statfs.kb_used_omap());
373 f->dump_unsigned("kb_used_meta", statfs.kb_used_internal_metadata());
374 f->dump_unsigned("kb_avail", statfs.kb_avail());
375 ////////////////////
376
377 f->open_object_section("statfs");
378 statfs.dump(f);
379 f->close_section();
380 f->open_array_section("hb_peers");
381 for (auto p : hb_peers)
382 f->dump_int("osd", p);
383 f->close_section();
384 f->dump_int("snap_trim_queue_len", snap_trim_queue_len);
385 f->dump_int("num_snap_trimming", num_snap_trimming);
386 f->dump_int("num_shards_repaired", num_shards_repaired);
387 f->open_object_section("op_queue_age_hist");
388 op_queue_age_hist.dump(f);
389 f->close_section();
390 f->open_object_section("perf_stat");
391 os_perf_stat.dump(f);
392 f->close_section();
393 f->open_array_section("alerts");
394 ::dump(f, os_alerts);
395 f->close_section();
396 }
397
398 void osd_stat_t::encode(bufferlist &bl, uint64_t features) const
399 {
400 ENCODE_START(12, 2, bl);
401
402 //////// for compatibility ////////
403 int64_t kb = statfs.kb();
404 int64_t kb_used = statfs.kb_used_raw();
405 int64_t kb_avail = statfs.kb_avail();
406 encode(kb, bl);
407 encode(kb_used, bl);
408 encode(kb_avail, bl);
409 ///////////////////////////////////
410
411 encode(snap_trim_queue_len, bl);
412 encode(num_snap_trimming, bl);
413 encode(hb_peers, bl);
414 encode((uint32_t)0, bl);
415 encode(op_queue_age_hist, bl);
416 encode(os_perf_stat, bl, features);
417 encode(up_from, bl);
418 encode(seq, bl);
419 encode(num_pgs, bl);
420
421 //////// for compatibility ////////
422 int64_t kb_used_data = statfs.kb_used_data();
423 int64_t kb_used_omap = statfs.kb_used_omap();
424 int64_t kb_used_meta = statfs.kb_used_internal_metadata();
425 encode(kb_used_data, bl);
426 encode(kb_used_omap, bl);
427 encode(kb_used_meta, bl);
428 encode(statfs, bl);
429 ///////////////////////////////////
430 encode(os_alerts, bl);
431 encode(num_shards_repaired, bl);
432 encode(num_osds, bl);
433 encode(num_per_pool_osds, bl);
434 ENCODE_FINISH(bl);
435 }
436
437 void osd_stat_t::decode(bufferlist::const_iterator &bl)
438 {
439 int64_t kb, kb_used,kb_avail;
440 int64_t kb_used_data, kb_used_omap, kb_used_meta;
441 DECODE_START_LEGACY_COMPAT_LEN(12, 2, 2, bl);
442 decode(kb, bl);
443 decode(kb_used, bl);
444 decode(kb_avail, bl);
445 decode(snap_trim_queue_len, bl);
446 decode(num_snap_trimming, bl);
447 decode(hb_peers, bl);
448 vector<int> num_hb_out;
449 decode(num_hb_out, bl);
450 if (struct_v >= 3)
451 decode(op_queue_age_hist, bl);
452 if (struct_v >= 4)
453 decode(os_perf_stat, bl);
454 if (struct_v >= 6) {
455 decode(up_from, bl);
456 decode(seq, bl);
457 }
458 if (struct_v >= 7) {
459 decode(num_pgs, bl);
460 }
461 if (struct_v >= 8) {
462 decode(kb_used_data, bl);
463 decode(kb_used_omap, bl);
464 decode(kb_used_meta, bl);
465 } else {
466 kb_used_data = kb_used;
467 kb_used_omap = 0;
468 kb_used_meta = 0;
469 }
470 if (struct_v >= 9) {
471 decode(statfs, bl);
472 } else {
473 statfs.reset();
474 statfs.total = kb << 10;
475 statfs.available = kb_avail << 10;
476 // actually it's totally unexpected to have ststfs.total < statfs.available
477 // here but unfortunately legacy generate_test_instances produced such a
478 // case hence inserting some handling rather than assert
479 statfs.internally_reserved =
480 statfs.total > statfs.available ? statfs.total - statfs.available : 0;
481 kb_used <<= 10;
482 if ((int64_t)statfs.internally_reserved > kb_used) {
483 statfs.internally_reserved -= kb_used;
484 } else {
485 statfs.internally_reserved = 0;
486 }
487 statfs.allocated = kb_used_data << 10;
488 statfs.omap_allocated = kb_used_omap << 10;
489 statfs.internal_metadata = kb_used_meta << 10;
490 }
491 if (struct_v >= 10) {
492 decode(os_alerts, bl);
493 } else {
494 os_alerts.clear();
495 }
496 if (struct_v >= 11) {
497 decode(num_shards_repaired, bl);
498 } else {
499 num_shards_repaired = 0;
500 }
501 if (struct_v >= 12) {
502 decode(num_osds, bl);
503 decode(num_per_pool_osds, bl);
504 } else {
505 num_osds = 0;
506 num_per_pool_osds = 0;
507 }
508 DECODE_FINISH(bl);
509 }
510
511 void osd_stat_t::generate_test_instances(std::list<osd_stat_t*>& o)
512 {
513 o.push_back(new osd_stat_t);
514
515 o.push_back(new osd_stat_t);
516 list<store_statfs_t*> ll;
517 store_statfs_t::generate_test_instances(ll);
518 o.back()->statfs = *ll.back();
519 o.back()->hb_peers.push_back(7);
520 o.back()->snap_trim_queue_len = 8;
521 o.back()->num_snap_trimming = 99;
522 o.back()->num_shards_repaired = 101;
523 o.back()->os_alerts[0].emplace(
524 "some alert", "some alert details");
525 o.back()->os_alerts[1].emplace(
526 "some alert2", "some alert2 details");
527 }
528
529 // -- pg_t --
530
531 int pg_t::print(char *o, int maxlen) const
532 {
533 return snprintf(o, maxlen, "%llu.%x", (unsigned long long)pool(), ps());
534 }
535
536 bool pg_t::parse(const char *s)
537 {
538 uint64_t ppool;
539 uint32_t pseed;
540 int r = sscanf(s, "%llu.%x", (long long unsigned *)&ppool, &pseed);
541 if (r < 2)
542 return false;
543 m_pool = ppool;
544 m_seed = pseed;
545 return true;
546 }
547
548 bool spg_t::parse(const char *s)
549 {
550 shard = shard_id_t::NO_SHARD;
551 uint64_t ppool;
552 uint32_t pseed;
553 uint32_t pshard;
554 int r = sscanf(s, "%llu.%x", (long long unsigned *)&ppool, &pseed);
555 if (r < 2)
556 return false;
557 pgid.set_pool(ppool);
558 pgid.set_ps(pseed);
559
560 const char *p = strchr(s, 's');
561 if (p) {
562 r = sscanf(p, "s%u", &pshard);
563 if (r == 1) {
564 shard = shard_id_t(pshard);
565 } else {
566 return false;
567 }
568 }
569 return true;
570 }
571
572 char *spg_t::calc_name(char *buf, const char *suffix_backwords) const
573 {
574 while (*suffix_backwords)
575 *--buf = *suffix_backwords++;
576
577 if (!is_no_shard()) {
578 buf = ritoa<uint8_t, 10>((uint8_t)shard.id, buf);
579 *--buf = 's';
580 }
581
582 return pgid.calc_name(buf, "");
583 }
584
585 ostream& operator<<(ostream& out, const spg_t &pg)
586 {
587 char buf[spg_t::calc_name_buf_size];
588 buf[spg_t::calc_name_buf_size - 1] = '\0';
589 out << pg.calc_name(buf + spg_t::calc_name_buf_size - 1, "");
590 return out;
591 }
592
593 pg_t pg_t::get_ancestor(unsigned old_pg_num) const
594 {
595 int old_bits = cbits(old_pg_num);
596 int old_mask = (1 << old_bits) - 1;
597 pg_t ret = *this;
598 ret.m_seed = ceph_stable_mod(m_seed, old_pg_num, old_mask);
599 return ret;
600 }
601
602 bool pg_t::is_split(unsigned old_pg_num, unsigned new_pg_num, set<pg_t> *children) const
603 {
604 //ceph_assert(m_seed < old_pg_num);
605 if (m_seed >= old_pg_num) {
606 // degenerate case
607 return false;
608 }
609 if (new_pg_num <= old_pg_num)
610 return false;
611
612 bool split = false;
613 if (true) {
614 unsigned old_bits = cbits(old_pg_num);
615 unsigned old_mask = (1 << old_bits) - 1;
616 for (unsigned n = 1; ; n++) {
617 unsigned next_bit = (n << (old_bits-1));
618 unsigned s = next_bit | m_seed;
619
620 if (s < old_pg_num || s == m_seed)
621 continue;
622 if (s >= new_pg_num)
623 break;
624 if ((unsigned)ceph_stable_mod(s, old_pg_num, old_mask) == m_seed) {
625 split = true;
626 if (children)
627 children->insert(pg_t(s, m_pool));
628 }
629 }
630 }
631 if (false) {
632 // brute force
633 int old_bits = cbits(old_pg_num);
634 int old_mask = (1 << old_bits) - 1;
635 for (unsigned x = old_pg_num; x < new_pg_num; ++x) {
636 unsigned o = ceph_stable_mod(x, old_pg_num, old_mask);
637 if (o == m_seed) {
638 split = true;
639 children->insert(pg_t(x, m_pool));
640 }
641 }
642 }
643 return split;
644 }
645
646 unsigned pg_t::get_split_bits(unsigned pg_num) const {
647 if (pg_num == 1)
648 return 0;
649 ceph_assert(pg_num > 1);
650
651 // Find unique p such that pg_num \in [2^(p-1), 2^p)
652 unsigned p = cbits(pg_num);
653 ceph_assert(p); // silence coverity #751330
654
655 if ((m_seed % (1<<(p-1))) < (pg_num % (1<<(p-1))))
656 return p;
657 else
658 return p - 1;
659 }
660
661 bool pg_t::is_merge_source(
662 unsigned old_pg_num,
663 unsigned new_pg_num,
664 pg_t *parent) const
665 {
666 if (m_seed < old_pg_num &&
667 m_seed >= new_pg_num) {
668 if (parent) {
669 pg_t t = *this;
670 while (t.m_seed >= new_pg_num) {
671 t = t.get_parent();
672 }
673 *parent = t;
674 }
675 return true;
676 }
677 return false;
678 }
679
680 pg_t pg_t::get_parent() const
681 {
682 unsigned bits = cbits(m_seed);
683 ceph_assert(bits);
684 pg_t retval = *this;
685 retval.m_seed &= ~((~0)<<(bits - 1));
686 return retval;
687 }
688
689 hobject_t pg_t::get_hobj_start() const
690 {
691 return hobject_t(object_t(), string(), 0, m_seed, m_pool,
692 string());
693 }
694
695 hobject_t pg_t::get_hobj_end(unsigned pg_num) const
696 {
697 // note: this assumes a bitwise sort; with the legacy nibblewise
698 // sort a PG did not always cover a single contiguous range of the
699 // (bit-reversed) hash range.
700 unsigned bits = get_split_bits(pg_num);
701 uint64_t rev_start = hobject_t::_reverse_bits(m_seed);
702 uint64_t rev_end = (rev_start | (0xffffffff >> bits)) + 1;
703 if (rev_end >= 0x100000000) {
704 ceph_assert(rev_end == 0x100000000);
705 return hobject_t::get_max();
706 } else {
707 return hobject_t(object_t(), string(), CEPH_NOSNAP,
708 hobject_t::_reverse_bits(rev_end), m_pool,
709 string());
710 }
711 }
712
713 void pg_t::dump(Formatter *f) const
714 {
715 f->dump_unsigned("pool", m_pool);
716 f->dump_unsigned("seed", m_seed);
717 }
718
719 void pg_t::generate_test_instances(list<pg_t*>& o)
720 {
721 o.push_back(new pg_t);
722 o.push_back(new pg_t(1, 2));
723 o.push_back(new pg_t(13123, 3));
724 o.push_back(new pg_t(131223, 4));
725 }
726
727 char *pg_t::calc_name(char *buf, const char *suffix_backwords) const
728 {
729 while (*suffix_backwords)
730 *--buf = *suffix_backwords++;
731
732 buf = ritoa<uint32_t, 16>(m_seed, buf);
733
734 *--buf = '.';
735
736 return ritoa<uint64_t, 10>(m_pool, buf);
737 }
738
739 ostream& operator<<(ostream& out, const pg_t &pg)
740 {
741 char buf[pg_t::calc_name_buf_size];
742 buf[pg_t::calc_name_buf_size - 1] = '\0';
743 out << pg.calc_name(buf + pg_t::calc_name_buf_size - 1, "");
744 return out;
745 }
746
747
748 // -- coll_t --
749
750 void coll_t::calc_str()
751 {
752 switch (type) {
753 case TYPE_META:
754 strcpy(_str_buff, "meta");
755 _str = _str_buff;
756 break;
757 case TYPE_PG:
758 _str_buff[spg_t::calc_name_buf_size - 1] = '\0';
759 _str = pgid.calc_name(_str_buff + spg_t::calc_name_buf_size - 1, "daeh_");
760 break;
761 case TYPE_PG_TEMP:
762 _str_buff[spg_t::calc_name_buf_size - 1] = '\0';
763 _str = pgid.calc_name(_str_buff + spg_t::calc_name_buf_size - 1, "PMET_");
764 break;
765 default:
766 ceph_abort_msg("unknown collection type");
767 }
768 }
769
770 bool coll_t::parse(const std::string& s)
771 {
772 if (s == "meta") {
773 type = TYPE_META;
774 pgid = spg_t();
775 removal_seq = 0;
776 calc_str();
777 ceph_assert(s == _str);
778 return true;
779 }
780 if (s.find("_head") == s.length() - 5 &&
781 pgid.parse(s.substr(0, s.length() - 5))) {
782 type = TYPE_PG;
783 removal_seq = 0;
784 calc_str();
785 ceph_assert(s == _str);
786 return true;
787 }
788 if (s.find("_TEMP") == s.length() - 5 &&
789 pgid.parse(s.substr(0, s.length() - 5))) {
790 type = TYPE_PG_TEMP;
791 removal_seq = 0;
792 calc_str();
793 ceph_assert(s == _str);
794 return true;
795 }
796 return false;
797 }
798
799 void coll_t::encode(bufferlist& bl) const
800 {
801 using ceph::encode;
802 // when changing this, remember to update encoded_size() too.
803 if (is_temp()) {
804 // can't express this as v2...
805 __u8 struct_v = 3;
806 encode(struct_v, bl);
807 encode(to_str(), bl);
808 } else {
809 __u8 struct_v = 2;
810 encode(struct_v, bl);
811 encode((__u8)type, bl);
812 encode(pgid, bl);
813 snapid_t snap = CEPH_NOSNAP;
814 encode(snap, bl);
815 }
816 }
817
818 size_t coll_t::encoded_size() const
819 {
820 size_t r = sizeof(__u8);
821 if (is_temp()) {
822 // v3
823 r += sizeof(__u32);
824 if (_str) {
825 r += strlen(_str);
826 }
827 } else {
828 // v2
829 // 1. type
830 r += sizeof(__u8);
831 // 2. pgid
832 // - encoding header
833 r += sizeof(ceph_le32) + 2 * sizeof(__u8);
834 // - pg_t
835 r += sizeof(__u8) + sizeof(uint64_t) + 2 * sizeof(uint32_t);
836 // - shard_id_t
837 r += sizeof(int8_t);
838 // 3. snapid_t
839 r += sizeof(uint64_t);
840 }
841
842 return r;
843 }
844
845 void coll_t::decode(bufferlist::const_iterator& bl)
846 {
847 using ceph::decode;
848 __u8 struct_v;
849 decode(struct_v, bl);
850 switch (struct_v) {
851 case 1:
852 {
853 snapid_t snap;
854 decode(pgid, bl);
855 decode(snap, bl);
856
857 // infer the type
858 if (pgid == spg_t() && snap == 0) {
859 type = TYPE_META;
860 } else {
861 type = TYPE_PG;
862 }
863 removal_seq = 0;
864 }
865 break;
866
867 case 2:
868 {
869 __u8 _type;
870 snapid_t snap;
871 decode(_type, bl);
872 decode(pgid, bl);
873 decode(snap, bl);
874 type = (type_t)_type;
875 removal_seq = 0;
876 }
877 break;
878
879 case 3:
880 {
881 string str;
882 decode(str, bl);
883 bool ok = parse(str);
884 if (!ok)
885 throw std::domain_error(std::string("unable to parse pg ") + str);
886 }
887 break;
888
889 default:
890 {
891 ostringstream oss;
892 oss << "coll_t::decode(): don't know how to decode version "
893 << struct_v;
894 throw std::domain_error(oss.str());
895 }
896 }
897 }
898
899 void coll_t::dump(Formatter *f) const
900 {
901 f->dump_unsigned("type_id", (unsigned)type);
902 if (type != TYPE_META)
903 f->dump_stream("pgid") << pgid;
904 f->dump_string("name", to_str());
905 }
906
907 void coll_t::generate_test_instances(list<coll_t*>& o)
908 {
909 o.push_back(new coll_t());
910 o.push_back(new coll_t(spg_t(pg_t(1, 0), shard_id_t::NO_SHARD)));
911 o.push_back(new coll_t(o.back()->get_temp()));
912 o.push_back(new coll_t(spg_t(pg_t(3, 2), shard_id_t(12))));
913 o.push_back(new coll_t(o.back()->get_temp()));
914 o.push_back(new coll_t());
915 }
916
917 // ---
918
919 std::string pg_vector_string(const vector<int32_t> &a)
920 {
921 ostringstream oss;
922 oss << "[";
923 for (vector<int32_t>::const_iterator i = a.begin(); i != a.end(); ++i) {
924 if (i != a.begin())
925 oss << ",";
926 if (*i != CRUSH_ITEM_NONE)
927 oss << *i;
928 else
929 oss << "NONE";
930 }
931 oss << "]";
932 return oss.str();
933 }
934
935 std::string pg_state_string(uint64_t state)
936 {
937 ostringstream oss;
938 if (state & PG_STATE_STALE)
939 oss << "stale+";
940 if (state & PG_STATE_CREATING)
941 oss << "creating+";
942 if (state & PG_STATE_ACTIVE)
943 oss << "active+";
944 if (state & PG_STATE_ACTIVATING)
945 oss << "activating+";
946 if (state & PG_STATE_CLEAN)
947 oss << "clean+";
948 if (state & PG_STATE_RECOVERY_WAIT)
949 oss << "recovery_wait+";
950 if (state & PG_STATE_RECOVERY_TOOFULL)
951 oss << "recovery_toofull+";
952 if (state & PG_STATE_RECOVERING)
953 oss << "recovering+";
954 if (state & PG_STATE_FORCED_RECOVERY)
955 oss << "forced_recovery+";
956 if (state & PG_STATE_DOWN)
957 oss << "down+";
958 if (state & PG_STATE_RECOVERY_UNFOUND)
959 oss << "recovery_unfound+";
960 if (state & PG_STATE_BACKFILL_UNFOUND)
961 oss << "backfill_unfound+";
962 if (state & PG_STATE_UNDERSIZED)
963 oss << "undersized+";
964 if (state & PG_STATE_DEGRADED)
965 oss << "degraded+";
966 if (state & PG_STATE_REMAPPED)
967 oss << "remapped+";
968 if (state & PG_STATE_PREMERGE)
969 oss << "premerge+";
970 if (state & PG_STATE_SCRUBBING)
971 oss << "scrubbing+";
972 if (state & PG_STATE_DEEP_SCRUB)
973 oss << "deep+";
974 if (state & PG_STATE_INCONSISTENT)
975 oss << "inconsistent+";
976 if (state & PG_STATE_PEERING)
977 oss << "peering+";
978 if (state & PG_STATE_REPAIR)
979 oss << "repair+";
980 if (state & PG_STATE_BACKFILL_WAIT)
981 oss << "backfill_wait+";
982 if (state & PG_STATE_BACKFILLING)
983 oss << "backfilling+";
984 if (state & PG_STATE_FORCED_BACKFILL)
985 oss << "forced_backfill+";
986 if (state & PG_STATE_BACKFILL_TOOFULL)
987 oss << "backfill_toofull+";
988 if (state & PG_STATE_INCOMPLETE)
989 oss << "incomplete+";
990 if (state & PG_STATE_PEERED)
991 oss << "peered+";
992 if (state & PG_STATE_SNAPTRIM)
993 oss << "snaptrim+";
994 if (state & PG_STATE_SNAPTRIM_WAIT)
995 oss << "snaptrim_wait+";
996 if (state & PG_STATE_SNAPTRIM_ERROR)
997 oss << "snaptrim_error+";
998 if (state & PG_STATE_FAILED_REPAIR)
999 oss << "failed_repair+";
1000 string ret(oss.str());
1001 if (ret.length() > 0)
1002 ret.resize(ret.length() - 1);
1003 else
1004 ret = "unknown";
1005 return ret;
1006 }
1007
1008 boost::optional<uint64_t> pg_string_state(const std::string& state)
1009 {
1010 boost::optional<uint64_t> type;
1011 if (state == "active")
1012 type = PG_STATE_ACTIVE;
1013 else if (state == "clean")
1014 type = PG_STATE_CLEAN;
1015 else if (state == "down")
1016 type = PG_STATE_DOWN;
1017 else if (state == "recovery_unfound")
1018 type = PG_STATE_RECOVERY_UNFOUND;
1019 else if (state == "backfill_unfound")
1020 type = PG_STATE_BACKFILL_UNFOUND;
1021 else if (state == "premerge")
1022 type = PG_STATE_PREMERGE;
1023 else if (state == "scrubbing")
1024 type = PG_STATE_SCRUBBING;
1025 else if (state == "degraded")
1026 type = PG_STATE_DEGRADED;
1027 else if (state == "inconsistent")
1028 type = PG_STATE_INCONSISTENT;
1029 else if (state == "peering")
1030 type = PG_STATE_PEERING;
1031 else if (state == "repair")
1032 type = PG_STATE_REPAIR;
1033 else if (state == "recovering")
1034 type = PG_STATE_RECOVERING;
1035 else if (state == "forced_recovery")
1036 type = PG_STATE_FORCED_RECOVERY;
1037 else if (state == "backfill_wait")
1038 type = PG_STATE_BACKFILL_WAIT;
1039 else if (state == "incomplete")
1040 type = PG_STATE_INCOMPLETE;
1041 else if (state == "stale")
1042 type = PG_STATE_STALE;
1043 else if (state == "remapped")
1044 type = PG_STATE_REMAPPED;
1045 else if (state == "deep")
1046 type = PG_STATE_DEEP_SCRUB;
1047 else if (state == "backfilling")
1048 type = PG_STATE_BACKFILLING;
1049 else if (state == "forced_backfill")
1050 type = PG_STATE_FORCED_BACKFILL;
1051 else if (state == "backfill_toofull")
1052 type = PG_STATE_BACKFILL_TOOFULL;
1053 else if (state == "recovery_wait")
1054 type = PG_STATE_RECOVERY_WAIT;
1055 else if (state == "recovery_toofull")
1056 type = PG_STATE_RECOVERY_TOOFULL;
1057 else if (state == "undersized")
1058 type = PG_STATE_UNDERSIZED;
1059 else if (state == "activating")
1060 type = PG_STATE_ACTIVATING;
1061 else if (state == "peered")
1062 type = PG_STATE_PEERED;
1063 else if (state == "snaptrim")
1064 type = PG_STATE_SNAPTRIM;
1065 else if (state == "snaptrim_wait")
1066 type = PG_STATE_SNAPTRIM_WAIT;
1067 else if (state == "snaptrim_error")
1068 type = PG_STATE_SNAPTRIM_ERROR;
1069 else if (state == "creating")
1070 type = PG_STATE_CREATING;
1071 else if (state == "failed_repair")
1072 type = PG_STATE_FAILED_REPAIR;
1073 else if (state == "unknown")
1074 type = 0;
1075 else
1076 type = boost::none;
1077 return type;
1078 }
1079
1080 // -- eversion_t --
1081 string eversion_t::get_key_name() const
1082 {
1083 std::string key(32, ' ');
1084 get_key_name(&key[0]);
1085 key.resize(31); // remove the null terminator
1086 return key;
1087 }
1088
1089 // -- pool_snap_info_t --
1090 void pool_snap_info_t::dump(Formatter *f) const
1091 {
1092 f->dump_unsigned("snapid", snapid);
1093 f->dump_stream("stamp") << stamp;
1094 f->dump_string("name", name);
1095 }
1096
1097 void pool_snap_info_t::encode(bufferlist& bl, uint64_t features) const
1098 {
1099 using ceph::encode;
1100 if ((features & CEPH_FEATURE_PGPOOL3) == 0) {
1101 __u8 struct_v = 1;
1102 encode(struct_v, bl);
1103 encode(snapid, bl);
1104 encode(stamp, bl);
1105 encode(name, bl);
1106 return;
1107 }
1108 ENCODE_START(2, 2, bl);
1109 encode(snapid, bl);
1110 encode(stamp, bl);
1111 encode(name, bl);
1112 ENCODE_FINISH(bl);
1113 }
1114
1115 void pool_snap_info_t::decode(bufferlist::const_iterator& bl)
1116 {
1117 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
1118 decode(snapid, bl);
1119 decode(stamp, bl);
1120 decode(name, bl);
1121 DECODE_FINISH(bl);
1122 }
1123
1124 void pool_snap_info_t::generate_test_instances(list<pool_snap_info_t*>& o)
1125 {
1126 o.push_back(new pool_snap_info_t);
1127 o.push_back(new pool_snap_info_t);
1128 o.back()->snapid = 1;
1129 o.back()->stamp = utime_t(1, 2);
1130 o.back()->name = "foo";
1131 }
1132
1133 // -- pool_opts_t --
1134
1135 typedef std::map<std::string, pool_opts_t::opt_desc_t> opt_mapping_t;
1136 static opt_mapping_t opt_mapping = boost::assign::map_list_of
1137 ("scrub_min_interval", pool_opts_t::opt_desc_t(
1138 pool_opts_t::SCRUB_MIN_INTERVAL, pool_opts_t::DOUBLE))
1139 ("scrub_max_interval", pool_opts_t::opt_desc_t(
1140 pool_opts_t::SCRUB_MAX_INTERVAL, pool_opts_t::DOUBLE))
1141 ("deep_scrub_interval", pool_opts_t::opt_desc_t(
1142 pool_opts_t::DEEP_SCRUB_INTERVAL, pool_opts_t::DOUBLE))
1143 ("recovery_priority", pool_opts_t::opt_desc_t(
1144 pool_opts_t::RECOVERY_PRIORITY, pool_opts_t::INT))
1145 ("recovery_op_priority", pool_opts_t::opt_desc_t(
1146 pool_opts_t::RECOVERY_OP_PRIORITY, pool_opts_t::INT))
1147 ("scrub_priority", pool_opts_t::opt_desc_t(
1148 pool_opts_t::SCRUB_PRIORITY, pool_opts_t::INT))
1149 ("compression_mode", pool_opts_t::opt_desc_t(
1150 pool_opts_t::COMPRESSION_MODE, pool_opts_t::STR))
1151 ("compression_algorithm", pool_opts_t::opt_desc_t(
1152 pool_opts_t::COMPRESSION_ALGORITHM, pool_opts_t::STR))
1153 ("compression_required_ratio", pool_opts_t::opt_desc_t(
1154 pool_opts_t::COMPRESSION_REQUIRED_RATIO, pool_opts_t::DOUBLE))
1155 ("compression_max_blob_size", pool_opts_t::opt_desc_t(
1156 pool_opts_t::COMPRESSION_MAX_BLOB_SIZE, pool_opts_t::INT))
1157 ("compression_min_blob_size", pool_opts_t::opt_desc_t(
1158 pool_opts_t::COMPRESSION_MIN_BLOB_SIZE, pool_opts_t::INT))
1159 ("csum_type", pool_opts_t::opt_desc_t(
1160 pool_opts_t::CSUM_TYPE, pool_opts_t::INT))
1161 ("csum_max_block", pool_opts_t::opt_desc_t(
1162 pool_opts_t::CSUM_MAX_BLOCK, pool_opts_t::INT))
1163 ("csum_min_block", pool_opts_t::opt_desc_t(
1164 pool_opts_t::CSUM_MIN_BLOCK, pool_opts_t::INT))
1165 ("fingerprint_algorithm", pool_opts_t::opt_desc_t(
1166 pool_opts_t::FINGERPRINT_ALGORITHM, pool_opts_t::STR))
1167 ("pg_num_min", pool_opts_t::opt_desc_t(
1168 pool_opts_t::PG_NUM_MIN, pool_opts_t::INT))
1169 ("target_size_bytes", pool_opts_t::opt_desc_t(
1170 pool_opts_t::TARGET_SIZE_BYTES, pool_opts_t::INT))
1171 ("target_size_ratio", pool_opts_t::opt_desc_t(
1172 pool_opts_t::TARGET_SIZE_RATIO, pool_opts_t::DOUBLE))
1173 ("pg_autoscale_bias", pool_opts_t::opt_desc_t(
1174 pool_opts_t::PG_AUTOSCALE_BIAS, pool_opts_t::DOUBLE));
1175
1176 bool pool_opts_t::is_opt_name(const std::string& name)
1177 {
1178 return opt_mapping.count(name);
1179 }
1180
1181 pool_opts_t::opt_desc_t pool_opts_t::get_opt_desc(const std::string& name)
1182 {
1183 opt_mapping_t::iterator i = opt_mapping.find(name);
1184 ceph_assert(i != opt_mapping.end());
1185 return i->second;
1186 }
1187
1188 bool pool_opts_t::is_set(pool_opts_t::key_t key) const
1189 {
1190 return opts.count(key);
1191 }
1192
1193 const pool_opts_t::value_t& pool_opts_t::get(pool_opts_t::key_t key) const
1194 {
1195 opts_t::const_iterator i = opts.find(key);
1196 ceph_assert(i != opts.end());
1197 return i->second;
1198 }
1199
1200 bool pool_opts_t::unset(pool_opts_t::key_t key) {
1201 return opts.erase(key) > 0;
1202 }
1203
1204 class pool_opts_dumper_t : public boost::static_visitor<> {
1205 public:
1206 pool_opts_dumper_t(const std::string& name_, Formatter* f_) :
1207 name(name_.c_str()), f(f_) {}
1208
1209 void operator()(std::string s) const {
1210 f->dump_string(name, s);
1211 }
1212 void operator()(int64_t i) const {
1213 f->dump_int(name, i);
1214 }
1215 void operator()(double d) const {
1216 f->dump_float(name, d);
1217 }
1218
1219 private:
1220 const char* name;
1221 Formatter* f;
1222 };
1223
1224 void pool_opts_t::dump(const std::string& name, Formatter* f) const
1225 {
1226 const opt_desc_t& desc = get_opt_desc(name);
1227 opts_t::const_iterator i = opts.find(desc.key);
1228 if (i == opts.end()) {
1229 return;
1230 }
1231 boost::apply_visitor(pool_opts_dumper_t(name, f), i->second);
1232 }
1233
1234 void pool_opts_t::dump(Formatter* f) const
1235 {
1236 for (opt_mapping_t::iterator i = opt_mapping.begin(); i != opt_mapping.end();
1237 ++i) {
1238 const std::string& name = i->first;
1239 const opt_desc_t& desc = i->second;
1240 opts_t::const_iterator j = opts.find(desc.key);
1241 if (j == opts.end()) {
1242 continue;
1243 }
1244 boost::apply_visitor(pool_opts_dumper_t(name, f), j->second);
1245 }
1246 }
1247
1248 class pool_opts_encoder_t : public boost::static_visitor<> {
1249 public:
1250 explicit pool_opts_encoder_t(bufferlist& bl_, uint64_t features)
1251 : bl(bl_),
1252 features(features) {}
1253
1254 void operator()(const std::string &s) const {
1255 encode(static_cast<int32_t>(pool_opts_t::STR), bl);
1256 encode(s, bl);
1257 }
1258 void operator()(int64_t i) const {
1259 encode(static_cast<int32_t>(pool_opts_t::INT), bl);
1260 if (HAVE_FEATURE(features, SERVER_NAUTILUS)) {
1261 encode(i, bl);
1262 } else {
1263 encode(static_cast<int32_t>(i), bl);
1264 }
1265 }
1266 void operator()(double d) const {
1267 encode(static_cast<int32_t>(pool_opts_t::DOUBLE), bl);
1268 encode(d, bl);
1269 }
1270
1271 private:
1272 bufferlist& bl;
1273 uint64_t features;
1274 };
1275
1276 void pool_opts_t::encode(bufferlist& bl, uint64_t features) const
1277 {
1278 unsigned v = 2;
1279 if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
1280 v = 1;
1281 }
1282 ENCODE_START(v, 1, bl);
1283 uint32_t n = static_cast<uint32_t>(opts.size());
1284 encode(n, bl);
1285 for (opts_t::const_iterator i = opts.begin(); i != opts.end(); ++i) {
1286 encode(static_cast<int32_t>(i->first), bl);
1287 boost::apply_visitor(pool_opts_encoder_t(bl, features), i->second);
1288 }
1289 ENCODE_FINISH(bl);
1290 }
1291
1292 void pool_opts_t::decode(bufferlist::const_iterator& bl)
1293 {
1294 DECODE_START(1, bl);
1295 __u32 n;
1296 decode(n, bl);
1297 opts.clear();
1298 while (n--) {
1299 int32_t k, t;
1300 decode(k, bl);
1301 decode(t, bl);
1302 if (t == STR) {
1303 std::string s;
1304 decode(s, bl);
1305 opts[static_cast<key_t>(k)] = s;
1306 } else if (t == INT) {
1307 int64_t i;
1308 if (struct_v >= 2) {
1309 decode(i, bl);
1310 } else {
1311 int ii;
1312 decode(ii, bl);
1313 i = ii;
1314 }
1315 opts[static_cast<key_t>(k)] = i;
1316 } else if (t == DOUBLE) {
1317 double d;
1318 decode(d, bl);
1319 opts[static_cast<key_t>(k)] = d;
1320 } else {
1321 ceph_assert(!"invalid type");
1322 }
1323 }
1324 DECODE_FINISH(bl);
1325 }
1326
1327 ostream& operator<<(ostream& out, const pool_opts_t& opts)
1328 {
1329 for (opt_mapping_t::iterator i = opt_mapping.begin(); i != opt_mapping.end();
1330 ++i) {
1331 const std::string& name = i->first;
1332 const pool_opts_t::opt_desc_t& desc = i->second;
1333 pool_opts_t::opts_t::const_iterator j = opts.opts.find(desc.key);
1334 if (j == opts.opts.end()) {
1335 continue;
1336 }
1337 out << " " << name << " " << j->second;
1338 }
1339 return out;
1340 }
1341
1342 // -- pg_pool_t --
1343
1344 const char *pg_pool_t::APPLICATION_NAME_CEPHFS("cephfs");
1345 const char *pg_pool_t::APPLICATION_NAME_RBD("rbd");
1346 const char *pg_pool_t::APPLICATION_NAME_RGW("rgw");
1347
1348 void pg_pool_t::dump(Formatter *f) const
1349 {
1350 f->dump_stream("create_time") << get_create_time();
1351 f->dump_unsigned("flags", get_flags());
1352 f->dump_string("flags_names", get_flags_string());
1353 f->dump_int("type", get_type());
1354 f->dump_int("size", get_size());
1355 f->dump_int("min_size", get_min_size());
1356 f->dump_int("crush_rule", get_crush_rule());
1357 f->dump_int("object_hash", get_object_hash());
1358 f->dump_string("pg_autoscale_mode",
1359 get_pg_autoscale_mode_name(pg_autoscale_mode));
1360 f->dump_unsigned("pg_num", get_pg_num());
1361 f->dump_unsigned("pg_placement_num", get_pgp_num());
1362 f->dump_unsigned("pg_placement_num_target", get_pgp_num_target());
1363 f->dump_unsigned("pg_num_target", get_pg_num_target());
1364 f->dump_unsigned("pg_num_pending", get_pg_num_pending());
1365 f->dump_object("last_pg_merge_meta", last_pg_merge_meta);
1366 f->dump_stream("last_change") << get_last_change();
1367 f->dump_stream("last_force_op_resend") << get_last_force_op_resend();
1368 f->dump_stream("last_force_op_resend_prenautilus")
1369 << get_last_force_op_resend_prenautilus();
1370 f->dump_stream("last_force_op_resend_preluminous")
1371 << get_last_force_op_resend_preluminous();
1372 f->dump_unsigned("auid", get_auid());
1373 f->dump_string("snap_mode", is_pool_snaps_mode() ? "pool" : "selfmanaged");
1374 f->dump_unsigned("snap_seq", get_snap_seq());
1375 f->dump_unsigned("snap_epoch", get_snap_epoch());
1376 f->open_array_section("pool_snaps");
1377 for (map<snapid_t, pool_snap_info_t>::const_iterator p = snaps.begin(); p != snaps.end(); ++p) {
1378 f->open_object_section("pool_snap_info");
1379 p->second.dump(f);
1380 f->close_section();
1381 }
1382 f->close_section();
1383 f->dump_stream("removed_snaps") << removed_snaps;
1384 f->dump_unsigned("quota_max_bytes", quota_max_bytes);
1385 f->dump_unsigned("quota_max_objects", quota_max_objects);
1386 f->open_array_section("tiers");
1387 for (set<uint64_t>::const_iterator p = tiers.begin(); p != tiers.end(); ++p)
1388 f->dump_unsigned("pool_id", *p);
1389 f->close_section();
1390 f->dump_int("tier_of", tier_of);
1391 f->dump_int("read_tier", read_tier);
1392 f->dump_int("write_tier", write_tier);
1393 f->dump_string("cache_mode", get_cache_mode_name());
1394 f->dump_unsigned("target_max_bytes", target_max_bytes);
1395 f->dump_unsigned("target_max_objects", target_max_objects);
1396 f->dump_unsigned("cache_target_dirty_ratio_micro",
1397 cache_target_dirty_ratio_micro);
1398 f->dump_unsigned("cache_target_dirty_high_ratio_micro",
1399 cache_target_dirty_high_ratio_micro);
1400 f->dump_unsigned("cache_target_full_ratio_micro",
1401 cache_target_full_ratio_micro);
1402 f->dump_unsigned("cache_min_flush_age", cache_min_flush_age);
1403 f->dump_unsigned("cache_min_evict_age", cache_min_evict_age);
1404 f->dump_string("erasure_code_profile", erasure_code_profile);
1405 f->open_object_section("hit_set_params");
1406 hit_set_params.dump(f);
1407 f->close_section(); // hit_set_params
1408 f->dump_unsigned("hit_set_period", hit_set_period);
1409 f->dump_unsigned("hit_set_count", hit_set_count);
1410 f->dump_bool("use_gmt_hitset", use_gmt_hitset);
1411 f->dump_unsigned("min_read_recency_for_promote", min_read_recency_for_promote);
1412 f->dump_unsigned("min_write_recency_for_promote", min_write_recency_for_promote);
1413 f->dump_unsigned("hit_set_grade_decay_rate", hit_set_grade_decay_rate);
1414 f->dump_unsigned("hit_set_search_last_n", hit_set_search_last_n);
1415 f->open_array_section("grade_table");
1416 for (unsigned i = 0; i < hit_set_count; ++i)
1417 f->dump_unsigned("value", get_grade(i));
1418 f->close_section();
1419 f->dump_unsigned("stripe_width", get_stripe_width());
1420 f->dump_unsigned("expected_num_objects", expected_num_objects);
1421 f->dump_bool("fast_read", fast_read);
1422 f->open_object_section("options");
1423 opts.dump(f);
1424 f->close_section(); // options
1425 f->open_object_section("application_metadata");
1426 for (auto &app_pair : application_metadata) {
1427 f->open_object_section(app_pair.first.c_str());
1428 for (auto &kv_pair : app_pair.second) {
1429 f->dump_string(kv_pair.first.c_str(), kv_pair.second);
1430 }
1431 f->close_section(); // application
1432 }
1433 f->close_section(); // application_metadata
1434 }
1435
1436 void pg_pool_t::convert_to_pg_shards(const vector<int> &from, set<pg_shard_t>* to) const {
1437 for (size_t i = 0; i < from.size(); ++i) {
1438 if (from[i] != CRUSH_ITEM_NONE) {
1439 to->insert(
1440 pg_shard_t(
1441 from[i],
1442 is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD));
1443 }
1444 }
1445 }
1446
1447 void pg_pool_t::calc_pg_masks()
1448 {
1449 pg_num_mask = (1 << cbits(pg_num-1)) - 1;
1450 pgp_num_mask = (1 << cbits(pgp_num-1)) - 1;
1451 }
1452
1453 unsigned pg_pool_t::get_pg_num_divisor(pg_t pgid) const
1454 {
1455 if (pg_num == pg_num_mask + 1)
1456 return pg_num; // power-of-2 split
1457 unsigned mask = pg_num_mask >> 1;
1458 if ((pgid.ps() & mask) < (pg_num & mask))
1459 return pg_num_mask + 1; // smaller bin size (already split)
1460 else
1461 return (pg_num_mask + 1) >> 1; // bigger bin (not yet split)
1462 }
1463
1464 bool pg_pool_t::is_pending_merge(pg_t pgid, bool *target) const
1465 {
1466 if (pg_num_pending >= pg_num) {
1467 return false;
1468 }
1469 if (pgid.ps() >= pg_num_pending && pgid.ps() < pg_num) {
1470 if (target) {
1471 *target = false;
1472 }
1473 return true;
1474 }
1475 for (unsigned ps = pg_num_pending; ps < pg_num; ++ps) {
1476 if (pg_t(ps, pgid.pool()).get_parent() == pgid) {
1477 if (target) {
1478 *target = true;
1479 }
1480 return true;
1481 }
1482 }
1483 return false;
1484 }
1485
1486 /*
1487 * we have two snap modes:
1488 * - pool snaps
1489 * - snap existence/non-existence defined by snaps[] and snap_seq
1490 * - user managed snaps
1491 * - existence tracked by librados user
1492 */
1493 bool pg_pool_t::is_pool_snaps_mode() const
1494 {
1495 return has_flag(FLAG_POOL_SNAPS);
1496 }
1497
1498 bool pg_pool_t::is_unmanaged_snaps_mode() const
1499 {
1500 return has_flag(FLAG_SELFMANAGED_SNAPS);
1501 }
1502
1503 bool pg_pool_t::is_removed_snap(snapid_t s) const
1504 {
1505 if (is_pool_snaps_mode())
1506 return s <= get_snap_seq() && snaps.count(s) == 0;
1507 else
1508 return removed_snaps.contains(s);
1509 }
1510
1511 /*
1512 * build set of known-removed sets from either pool snaps or
1513 * explicit removed_snaps set.
1514 */
1515 void pg_pool_t::build_removed_snaps(interval_set<snapid_t>& rs) const
1516 {
1517 if (is_pool_snaps_mode()) {
1518 rs.clear();
1519 for (snapid_t s = 1; s <= get_snap_seq(); s = s + 1)
1520 if (snaps.count(s) == 0)
1521 rs.insert(s);
1522 } else {
1523 rs = removed_snaps;
1524 }
1525 }
1526
1527 bool pg_pool_t::maybe_updated_removed_snaps(const interval_set<snapid_t>& cached) const
1528 {
1529 if (is_unmanaged_snaps_mode()) { // remove_unmanaged_snap increments range_end
1530 if (removed_snaps.empty() || cached.empty()) // range_end is undefined
1531 return removed_snaps.empty() != cached.empty();
1532 return removed_snaps.range_end() != cached.range_end();
1533 }
1534 return true;
1535 }
1536
1537 snapid_t pg_pool_t::snap_exists(const char *s) const
1538 {
1539 for (map<snapid_t,pool_snap_info_t>::const_iterator p = snaps.begin();
1540 p != snaps.end();
1541 ++p)
1542 if (p->second.name == s)
1543 return p->second.snapid;
1544 return 0;
1545 }
1546
1547 void pg_pool_t::add_snap(const char *n, utime_t stamp)
1548 {
1549 ceph_assert(!is_unmanaged_snaps_mode());
1550 flags |= FLAG_POOL_SNAPS;
1551 snapid_t s = get_snap_seq() + 1;
1552 snap_seq = s;
1553 snaps[s].snapid = s;
1554 snaps[s].name = n;
1555 snaps[s].stamp = stamp;
1556 }
1557
1558 void pg_pool_t::add_unmanaged_snap(uint64_t& snapid)
1559 {
1560 ceph_assert(!is_pool_snaps_mode());
1561 if (snap_seq == 0) {
1562 // kludge for pre-mimic tracking of pool vs selfmanaged snaps. after
1563 // mimic this field is not decoded but our flag is set; pre-mimic, we
1564 // have a non-empty removed_snaps to signifiy a non-pool-snaps pool.
1565 removed_snaps.insert(snapid_t(1));
1566 snap_seq = 1;
1567 }
1568 flags |= FLAG_SELFMANAGED_SNAPS;
1569 snapid = snap_seq = snap_seq + 1;
1570 }
1571
1572 void pg_pool_t::remove_snap(snapid_t s)
1573 {
1574 ceph_assert(snaps.count(s));
1575 snaps.erase(s);
1576 snap_seq = snap_seq + 1;
1577 }
1578
1579 void pg_pool_t::remove_unmanaged_snap(snapid_t s)
1580 {
1581 ceph_assert(is_unmanaged_snaps_mode());
1582 removed_snaps.insert(s);
1583 snap_seq = snap_seq + 1;
1584 // try to add in the new seq, just to try to keep the interval_set contiguous
1585 if (!removed_snaps.contains(get_snap_seq())) {
1586 removed_snaps.insert(get_snap_seq());
1587 }
1588 }
1589
1590 SnapContext pg_pool_t::get_snap_context() const
1591 {
1592 vector<snapid_t> s(snaps.size());
1593 unsigned i = 0;
1594 for (map<snapid_t, pool_snap_info_t>::const_reverse_iterator p = snaps.rbegin();
1595 p != snaps.rend();
1596 ++p)
1597 s[i++] = p->first;
1598 return SnapContext(get_snap_seq(), s);
1599 }
1600
1601 uint32_t pg_pool_t::hash_key(const string& key, const string& ns) const
1602 {
1603 if (ns.empty())
1604 return ceph_str_hash(object_hash, key.data(), key.length());
1605 int nsl = ns.length();
1606 int len = key.length() + nsl + 1;
1607 char buf[len];
1608 memcpy(&buf[0], ns.data(), nsl);
1609 buf[nsl] = '\037';
1610 memcpy(&buf[nsl+1], key.data(), key.length());
1611 return ceph_str_hash(object_hash, &buf[0], len);
1612 }
1613
1614 uint32_t pg_pool_t::raw_hash_to_pg(uint32_t v) const
1615 {
1616 return ceph_stable_mod(v, pg_num, pg_num_mask);
1617 }
1618
1619 /*
1620 * map a raw pg (with full precision ps) into an actual pg, for storage
1621 */
1622 pg_t pg_pool_t::raw_pg_to_pg(pg_t pg) const
1623 {
1624 pg.set_ps(ceph_stable_mod(pg.ps(), pg_num, pg_num_mask));
1625 return pg;
1626 }
1627
1628 /*
1629 * map raw pg (full precision ps) into a placement seed. include
1630 * pool id in that value so that different pools don't use the same
1631 * seeds.
1632 */
1633 ps_t pg_pool_t::raw_pg_to_pps(pg_t pg) const
1634 {
1635 if (flags & FLAG_HASHPSPOOL) {
1636 // Hash the pool id so that pool PGs do not overlap.
1637 return
1638 crush_hash32_2(CRUSH_HASH_RJENKINS1,
1639 ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask),
1640 pg.pool());
1641 } else {
1642 // Legacy behavior; add ps and pool together. This is not a great
1643 // idea because the PGs from each pool will essentially overlap on
1644 // top of each other: 0.5 == 1.4 == 2.3 == ...
1645 return
1646 ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask) +
1647 pg.pool();
1648 }
1649 }
1650
1651 uint32_t pg_pool_t::get_random_pg_position(pg_t pg, uint32_t seed) const
1652 {
1653 uint32_t r = crush_hash32_2(CRUSH_HASH_RJENKINS1, seed, 123);
1654 if (pg_num == pg_num_mask + 1) {
1655 r &= ~pg_num_mask;
1656 } else {
1657 unsigned smaller_mask = pg_num_mask >> 1;
1658 if ((pg.ps() & smaller_mask) < (pg_num & smaller_mask)) {
1659 r &= ~pg_num_mask;
1660 } else {
1661 r &= ~smaller_mask;
1662 }
1663 }
1664 r |= pg.ps();
1665 return r;
1666 }
1667
1668 void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
1669 {
1670 using ceph::encode;
1671 if ((features & CEPH_FEATURE_PGPOOL3) == 0) {
1672 // this encoding matches the old struct ceph_pg_pool
1673 __u8 struct_v = 2;
1674 encode(struct_v, bl);
1675 encode(type, bl);
1676 encode(size, bl);
1677 encode(crush_rule, bl);
1678 encode(object_hash, bl);
1679 encode(pg_num, bl);
1680 encode(pgp_num, bl);
1681 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
1682 encode(lpg_num, bl);
1683 encode(lpgp_num, bl);
1684 encode(last_change, bl);
1685 encode(snap_seq, bl);
1686 encode(snap_epoch, bl);
1687
1688 __u32 n = snaps.size();
1689 encode(n, bl);
1690 n = removed_snaps.num_intervals();
1691 encode(n, bl);
1692
1693 encode(auid, bl);
1694
1695 encode_nohead(snaps, bl, features);
1696 encode_nohead(removed_snaps, bl);
1697 return;
1698 }
1699
1700 if ((features & CEPH_FEATURE_OSDENC) == 0) {
1701 __u8 struct_v = 4;
1702 encode(struct_v, bl);
1703 encode(type, bl);
1704 encode(size, bl);
1705 encode(crush_rule, bl);
1706 encode(object_hash, bl);
1707 encode(pg_num, bl);
1708 encode(pgp_num, bl);
1709 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
1710 encode(lpg_num, bl);
1711 encode(lpgp_num, bl);
1712 encode(last_change, bl);
1713 encode(snap_seq, bl);
1714 encode(snap_epoch, bl);
1715 encode(snaps, bl, features);
1716 encode(removed_snaps, bl);
1717 encode(auid, bl);
1718 encode(flags, bl);
1719 encode((uint32_t)0, bl); // crash_replay_interval
1720 return;
1721 }
1722
1723 if ((features & CEPH_FEATURE_OSD_POOLRESEND) == 0) {
1724 // we simply added last_force_op_resend here, which is a fully
1725 // backward compatible change. however, encoding the same map
1726 // differently between monitors triggers scrub noise (even though
1727 // they are decodable without the feature), so let's be pendantic
1728 // about it.
1729 ENCODE_START(14, 5, bl);
1730 encode(type, bl);
1731 encode(size, bl);
1732 encode(crush_rule, bl);
1733 encode(object_hash, bl);
1734 encode(pg_num, bl);
1735 encode(pgp_num, bl);
1736 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
1737 encode(lpg_num, bl);
1738 encode(lpgp_num, bl);
1739 encode(last_change, bl);
1740 encode(snap_seq, bl);
1741 encode(snap_epoch, bl);
1742 encode(snaps, bl, features);
1743 encode(removed_snaps, bl);
1744 encode(auid, bl);
1745 encode(flags, bl);
1746 encode((uint32_t)0, bl); // crash_replay_interval
1747 encode(min_size, bl);
1748 encode(quota_max_bytes, bl);
1749 encode(quota_max_objects, bl);
1750 encode(tiers, bl);
1751 encode(tier_of, bl);
1752 __u8 c = cache_mode;
1753 encode(c, bl);
1754 encode(read_tier, bl);
1755 encode(write_tier, bl);
1756 encode(properties, bl);
1757 encode(hit_set_params, bl);
1758 encode(hit_set_period, bl);
1759 encode(hit_set_count, bl);
1760 encode(stripe_width, bl);
1761 encode(target_max_bytes, bl);
1762 encode(target_max_objects, bl);
1763 encode(cache_target_dirty_ratio_micro, bl);
1764 encode(cache_target_full_ratio_micro, bl);
1765 encode(cache_min_flush_age, bl);
1766 encode(cache_min_evict_age, bl);
1767 encode(erasure_code_profile, bl);
1768 ENCODE_FINISH(bl);
1769 return;
1770 }
1771
1772 uint8_t v = 29;
1773 // NOTE: any new encoding dependencies must be reflected by
1774 // SIGNIFICANT_FEATURES
1775 if (!(features & CEPH_FEATURE_NEW_OSDOP_ENCODING)) {
1776 // this was the first post-hammer thing we added; if it's missing, encode
1777 // like hammer.
1778 v = 21;
1779 } else if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
1780 v = 24;
1781 } else if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
1782 v = 26;
1783 } else if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
1784 v = 27;
1785 }
1786
1787 ENCODE_START(v, 5, bl);
1788 encode(type, bl);
1789 encode(size, bl);
1790 encode(crush_rule, bl);
1791 encode(object_hash, bl);
1792 encode(pg_num, bl);
1793 encode(pgp_num, bl);
1794 __u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
1795 encode(lpg_num, bl);
1796 encode(lpgp_num, bl);
1797 encode(last_change, bl);
1798 encode(snap_seq, bl);
1799 encode(snap_epoch, bl);
1800 encode(snaps, bl, features);
1801 encode(removed_snaps, bl);
1802 encode(auid, bl);
1803 if (v >= 27) {
1804 encode(flags, bl);
1805 } else {
1806 auto tmp = flags;
1807 tmp &= ~(FLAG_SELFMANAGED_SNAPS | FLAG_POOL_SNAPS | FLAG_CREATING);
1808 encode(tmp, bl);
1809 }
1810 encode((uint32_t)0, bl); // crash_replay_interval
1811 encode(min_size, bl);
1812 encode(quota_max_bytes, bl);
1813 encode(quota_max_objects, bl);
1814 encode(tiers, bl);
1815 encode(tier_of, bl);
1816 __u8 c = cache_mode;
1817 encode(c, bl);
1818 encode(read_tier, bl);
1819 encode(write_tier, bl);
1820 encode(properties, bl);
1821 encode(hit_set_params, bl);
1822 encode(hit_set_period, bl);
1823 encode(hit_set_count, bl);
1824 encode(stripe_width, bl);
1825 encode(target_max_bytes, bl);
1826 encode(target_max_objects, bl);
1827 encode(cache_target_dirty_ratio_micro, bl);
1828 encode(cache_target_full_ratio_micro, bl);
1829 encode(cache_min_flush_age, bl);
1830 encode(cache_min_evict_age, bl);
1831 encode(erasure_code_profile, bl);
1832 encode(last_force_op_resend_preluminous, bl);
1833 encode(min_read_recency_for_promote, bl);
1834 encode(expected_num_objects, bl);
1835 if (v >= 19) {
1836 encode(cache_target_dirty_high_ratio_micro, bl);
1837 }
1838 if (v >= 20) {
1839 encode(min_write_recency_for_promote, bl);
1840 }
1841 if (v >= 21) {
1842 encode(use_gmt_hitset, bl);
1843 }
1844 if (v >= 22) {
1845 encode(fast_read, bl);
1846 }
1847 if (v >= 23) {
1848 encode(hit_set_grade_decay_rate, bl);
1849 encode(hit_set_search_last_n, bl);
1850 }
1851 if (v >= 24) {
1852 encode(opts, bl, features);
1853 }
1854 if (v >= 25) {
1855 encode(last_force_op_resend_prenautilus, bl);
1856 }
1857 if (v >= 26) {
1858 encode(application_metadata, bl);
1859 }
1860 if (v >= 27) {
1861 encode(create_time, bl);
1862 }
1863 if (v >= 28) {
1864 encode(pg_num_target, bl);
1865 encode(pgp_num_target, bl);
1866 encode(pg_num_pending, bl);
1867 encode((epoch_t)0, bl); // pg_num_dec_last_epoch_started from 14.1.[01]
1868 encode((epoch_t)0, bl); // pg_num_dec_last_epoch_clean from 14.1.[01]
1869 encode(last_force_op_resend, bl);
1870 encode(pg_autoscale_mode, bl);
1871 }
1872 if (v >= 29) {
1873 encode(last_pg_merge_meta, bl);
1874 }
1875 ENCODE_FINISH(bl);
1876 }
1877
1878 void pg_pool_t::decode(bufferlist::const_iterator& bl)
1879 {
1880 DECODE_START_LEGACY_COMPAT_LEN(29, 5, 5, bl);
1881 decode(type, bl);
1882 decode(size, bl);
1883 decode(crush_rule, bl);
1884 decode(object_hash, bl);
1885 decode(pg_num, bl);
1886 decode(pgp_num, bl);
1887 {
1888 __u32 lpg_num, lpgp_num;
1889 decode(lpg_num, bl);
1890 decode(lpgp_num, bl);
1891 }
1892 decode(last_change, bl);
1893 decode(snap_seq, bl);
1894 decode(snap_epoch, bl);
1895
1896 if (struct_v >= 3) {
1897 decode(snaps, bl);
1898 decode(removed_snaps, bl);
1899 decode(auid, bl);
1900 } else {
1901 __u32 n, m;
1902 decode(n, bl);
1903 decode(m, bl);
1904 decode(auid, bl);
1905 decode_nohead(n, snaps, bl);
1906 decode_nohead(m, removed_snaps, bl);
1907 }
1908
1909 if (struct_v >= 4) {
1910 decode(flags, bl);
1911 uint32_t crash_replay_interval;
1912 decode(crash_replay_interval, bl);
1913 } else {
1914 flags = 0;
1915 }
1916 // upgrade path for selfmanaged vs pool snaps
1917 if (snap_seq > 0 && (flags & (FLAG_SELFMANAGED_SNAPS|FLAG_POOL_SNAPS)) == 0) {
1918 if (!removed_snaps.empty()) {
1919 flags |= FLAG_SELFMANAGED_SNAPS;
1920 } else {
1921 flags |= FLAG_POOL_SNAPS;
1922 }
1923 }
1924 if (struct_v >= 7) {
1925 decode(min_size, bl);
1926 } else {
1927 min_size = size - size/2;
1928 }
1929 if (struct_v >= 8) {
1930 decode(quota_max_bytes, bl);
1931 decode(quota_max_objects, bl);
1932 }
1933 if (struct_v >= 9) {
1934 decode(tiers, bl);
1935 decode(tier_of, bl);
1936 __u8 v;
1937 decode(v, bl);
1938 cache_mode = (cache_mode_t)v;
1939 decode(read_tier, bl);
1940 decode(write_tier, bl);
1941 }
1942 if (struct_v >= 10) {
1943 decode(properties, bl);
1944 }
1945 if (struct_v >= 11) {
1946 decode(hit_set_params, bl);
1947 decode(hit_set_period, bl);
1948 decode(hit_set_count, bl);
1949 } else {
1950 pg_pool_t def;
1951 hit_set_period = def.hit_set_period;
1952 hit_set_count = def.hit_set_count;
1953 }
1954 if (struct_v >= 12) {
1955 decode(stripe_width, bl);
1956 } else {
1957 set_stripe_width(0);
1958 }
1959 if (struct_v >= 13) {
1960 decode(target_max_bytes, bl);
1961 decode(target_max_objects, bl);
1962 decode(cache_target_dirty_ratio_micro, bl);
1963 decode(cache_target_full_ratio_micro, bl);
1964 decode(cache_min_flush_age, bl);
1965 decode(cache_min_evict_age, bl);
1966 } else {
1967 target_max_bytes = 0;
1968 target_max_objects = 0;
1969 cache_target_dirty_ratio_micro = 0;
1970 cache_target_full_ratio_micro = 0;
1971 cache_min_flush_age = 0;
1972 cache_min_evict_age = 0;
1973 }
1974 if (struct_v >= 14) {
1975 decode(erasure_code_profile, bl);
1976 }
1977 if (struct_v >= 15) {
1978 decode(last_force_op_resend_preluminous, bl);
1979 } else {
1980 last_force_op_resend_preluminous = 0;
1981 }
1982 if (struct_v >= 16) {
1983 decode(min_read_recency_for_promote, bl);
1984 } else {
1985 min_read_recency_for_promote = 1;
1986 }
1987 if (struct_v >= 17) {
1988 decode(expected_num_objects, bl);
1989 } else {
1990 expected_num_objects = 0;
1991 }
1992 if (struct_v >= 19) {
1993 decode(cache_target_dirty_high_ratio_micro, bl);
1994 } else {
1995 cache_target_dirty_high_ratio_micro = cache_target_dirty_ratio_micro;
1996 }
1997 if (struct_v >= 20) {
1998 decode(min_write_recency_for_promote, bl);
1999 } else {
2000 min_write_recency_for_promote = 1;
2001 }
2002 if (struct_v >= 21) {
2003 decode(use_gmt_hitset, bl);
2004 } else {
2005 use_gmt_hitset = false;
2006 }
2007 if (struct_v >= 22) {
2008 decode(fast_read, bl);
2009 } else {
2010 fast_read = false;
2011 }
2012 if (struct_v >= 23) {
2013 decode(hit_set_grade_decay_rate, bl);
2014 decode(hit_set_search_last_n, bl);
2015 } else {
2016 hit_set_grade_decay_rate = 0;
2017 hit_set_search_last_n = 1;
2018 }
2019 if (struct_v >= 24) {
2020 decode(opts, bl);
2021 }
2022 if (struct_v >= 25) {
2023 decode(last_force_op_resend_prenautilus, bl);
2024 } else {
2025 last_force_op_resend_prenautilus = last_force_op_resend_preluminous;
2026 }
2027 if (struct_v >= 26) {
2028 decode(application_metadata, bl);
2029 }
2030 if (struct_v >= 27) {
2031 decode(create_time, bl);
2032 }
2033 if (struct_v >= 28) {
2034 decode(pg_num_target, bl);
2035 decode(pgp_num_target, bl);
2036 decode(pg_num_pending, bl);
2037 epoch_t old_merge_last_epoch_clean, old_merge_last_epoch_started;
2038 decode(old_merge_last_epoch_started, bl);
2039 decode(old_merge_last_epoch_clean, bl);
2040 decode(last_force_op_resend, bl);
2041 decode(pg_autoscale_mode, bl);
2042 if (struct_v >= 29) {
2043 decode(last_pg_merge_meta, bl);
2044 } else {
2045 last_pg_merge_meta.last_epoch_clean = old_merge_last_epoch_clean;
2046 last_pg_merge_meta.last_epoch_started = old_merge_last_epoch_started;
2047 }
2048 } else {
2049 pg_num_target = pg_num;
2050 pgp_num_target = pgp_num;
2051 pg_num_pending = pg_num;
2052 last_force_op_resend = last_force_op_resend_prenautilus;
2053 pg_autoscale_mode = PG_AUTOSCALE_MODE_WARN; // default to warn on upgrade
2054 }
2055 DECODE_FINISH(bl);
2056 calc_pg_masks();
2057 calc_grade_table();
2058 }
2059
2060 void pg_pool_t::generate_test_instances(list<pg_pool_t*>& o)
2061 {
2062 pg_pool_t a;
2063 o.push_back(new pg_pool_t(a));
2064
2065 a.create_time = utime_t(4,5);
2066 a.type = TYPE_REPLICATED;
2067 a.size = 2;
2068 a.crush_rule = 3;
2069 a.object_hash = 4;
2070 a.pg_num = 6;
2071 a.pgp_num = 4;
2072 a.pgp_num_target = 4;
2073 a.pg_num_target = 5;
2074 a.pg_num_pending = 5;
2075 a.last_pg_merge_meta.last_epoch_started = 2;
2076 a.last_pg_merge_meta.last_epoch_clean = 2;
2077 a.last_change = 9;
2078 a.last_force_op_resend = 123823;
2079 a.last_force_op_resend_preluminous = 123824;
2080 a.snap_seq = 10;
2081 a.snap_epoch = 11;
2082 a.flags = FLAG_POOL_SNAPS;
2083 a.auid = 12;
2084 a.quota_max_bytes = 473;
2085 a.quota_max_objects = 474;
2086 o.push_back(new pg_pool_t(a));
2087
2088 a.snaps[3].name = "asdf";
2089 a.snaps[3].snapid = 3;
2090 a.snaps[3].stamp = utime_t(123, 4);
2091 a.snaps[6].name = "qwer";
2092 a.snaps[6].snapid = 6;
2093 a.snaps[6].stamp = utime_t(23423, 4);
2094 o.push_back(new pg_pool_t(a));
2095
2096 a.flags = FLAG_SELFMANAGED_SNAPS;
2097 a.snaps.clear();
2098 a.removed_snaps.insert(2);
2099 a.quota_max_bytes = 2473;
2100 a.quota_max_objects = 4374;
2101 a.tiers.insert(0);
2102 a.tiers.insert(1);
2103 a.tier_of = 2;
2104 a.cache_mode = CACHEMODE_WRITEBACK;
2105 a.read_tier = 1;
2106 a.write_tier = 1;
2107 a.hit_set_params = HitSet::Params(new BloomHitSet::Params);
2108 a.hit_set_period = 3600;
2109 a.hit_set_count = 8;
2110 a.min_read_recency_for_promote = 1;
2111 a.min_write_recency_for_promote = 1;
2112 a.hit_set_grade_decay_rate = 50;
2113 a.hit_set_search_last_n = 1;
2114 a.calc_grade_table();
2115 a.set_stripe_width(12345);
2116 a.target_max_bytes = 1238132132;
2117 a.target_max_objects = 1232132;
2118 a.cache_target_dirty_ratio_micro = 187232;
2119 a.cache_target_dirty_high_ratio_micro = 309856;
2120 a.cache_target_full_ratio_micro = 987222;
2121 a.cache_min_flush_age = 231;
2122 a.cache_min_evict_age = 2321;
2123 a.erasure_code_profile = "profile in osdmap";
2124 a.expected_num_objects = 123456;
2125 a.fast_read = false;
2126 a.application_metadata = {{"rbd", {{"key", "value"}}}};
2127 o.push_back(new pg_pool_t(a));
2128 }
2129
2130 ostream& operator<<(ostream& out, const pg_pool_t& p)
2131 {
2132 out << p.get_type_name()
2133 << " size " << p.get_size()
2134 << " min_size " << p.get_min_size()
2135 << " crush_rule " << p.get_crush_rule()
2136 << " object_hash " << p.get_object_hash_name()
2137 << " pg_num " << p.get_pg_num()
2138 << " pgp_num " << p.get_pgp_num();
2139 if (p.get_pg_num_target() != p.get_pg_num()) {
2140 out << " pg_num_target " << p.get_pg_num_target();
2141 }
2142 if (p.get_pgp_num_target() != p.get_pgp_num()) {
2143 out << " pgp_num_target " << p.get_pgp_num_target();
2144 }
2145 if (p.get_pg_num_pending() != p.get_pg_num()) {
2146 out << " pg_num_pending " << p.get_pg_num_pending();
2147 }
2148 if (p.pg_autoscale_mode) {
2149 out << " autoscale_mode " << p.get_pg_autoscale_mode_name(p.pg_autoscale_mode);
2150 }
2151 out << " last_change " << p.get_last_change();
2152 if (p.get_last_force_op_resend() ||
2153 p.get_last_force_op_resend_prenautilus() ||
2154 p.get_last_force_op_resend_preluminous())
2155 out << " lfor " << p.get_last_force_op_resend() << "/"
2156 << p.get_last_force_op_resend_prenautilus() << "/"
2157 << p.get_last_force_op_resend_preluminous();
2158 if (p.get_auid())
2159 out << " owner " << p.get_auid();
2160 if (p.flags)
2161 out << " flags " << p.get_flags_string();
2162 if (p.quota_max_bytes)
2163 out << " max_bytes " << p.quota_max_bytes;
2164 if (p.quota_max_objects)
2165 out << " max_objects " << p.quota_max_objects;
2166 if (!p.tiers.empty())
2167 out << " tiers " << p.tiers;
2168 if (p.is_tier())
2169 out << " tier_of " << p.tier_of;
2170 if (p.has_read_tier())
2171 out << " read_tier " << p.read_tier;
2172 if (p.has_write_tier())
2173 out << " write_tier " << p.write_tier;
2174 if (p.cache_mode)
2175 out << " cache_mode " << p.get_cache_mode_name();
2176 if (p.target_max_bytes)
2177 out << " target_bytes " << p.target_max_bytes;
2178 if (p.target_max_objects)
2179 out << " target_objects " << p.target_max_objects;
2180 if (p.hit_set_params.get_type() != HitSet::TYPE_NONE) {
2181 out << " hit_set " << p.hit_set_params
2182 << " " << p.hit_set_period << "s"
2183 << " x" << p.hit_set_count << " decay_rate "
2184 << p.hit_set_grade_decay_rate
2185 << " search_last_n " << p.hit_set_search_last_n;
2186 }
2187 if (p.min_read_recency_for_promote)
2188 out << " min_read_recency_for_promote " << p.min_read_recency_for_promote;
2189 if (p.min_write_recency_for_promote)
2190 out << " min_write_recency_for_promote " << p.min_write_recency_for_promote;
2191 out << " stripe_width " << p.get_stripe_width();
2192 if (p.expected_num_objects)
2193 out << " expected_num_objects " << p.expected_num_objects;
2194 if (p.fast_read)
2195 out << " fast_read " << p.fast_read;
2196 out << p.opts;
2197 if (!p.application_metadata.empty()) {
2198 out << " application ";
2199 for (auto it = p.application_metadata.begin();
2200 it != p.application_metadata.end(); ++it) {
2201 if (it != p.application_metadata.begin())
2202 out << ",";
2203 out << it->first;
2204 }
2205 }
2206 return out;
2207 }
2208
2209
2210 // -- object_stat_sum_t --
2211
2212 void object_stat_sum_t::dump(Formatter *f) const
2213 {
2214 f->dump_int("num_bytes", num_bytes);
2215 f->dump_int("num_objects", num_objects);
2216 f->dump_int("num_object_clones", num_object_clones);
2217 f->dump_int("num_object_copies", num_object_copies);
2218 f->dump_int("num_objects_missing_on_primary", num_objects_missing_on_primary);
2219 f->dump_int("num_objects_missing", num_objects_missing);
2220 f->dump_int("num_objects_degraded", num_objects_degraded);
2221 f->dump_int("num_objects_misplaced", num_objects_misplaced);
2222 f->dump_int("num_objects_unfound", num_objects_unfound);
2223 f->dump_int("num_objects_dirty", num_objects_dirty);
2224 f->dump_int("num_whiteouts", num_whiteouts);
2225 f->dump_int("num_read", num_rd);
2226 f->dump_int("num_read_kb", num_rd_kb);
2227 f->dump_int("num_write", num_wr);
2228 f->dump_int("num_write_kb", num_wr_kb);
2229 f->dump_int("num_scrub_errors", num_scrub_errors);
2230 f->dump_int("num_shallow_scrub_errors", num_shallow_scrub_errors);
2231 f->dump_int("num_deep_scrub_errors", num_deep_scrub_errors);
2232 f->dump_int("num_objects_recovered", num_objects_recovered);
2233 f->dump_int("num_bytes_recovered", num_bytes_recovered);
2234 f->dump_int("num_keys_recovered", num_keys_recovered);
2235 f->dump_int("num_objects_omap", num_objects_omap);
2236 f->dump_int("num_objects_hit_set_archive", num_objects_hit_set_archive);
2237 f->dump_int("num_bytes_hit_set_archive", num_bytes_hit_set_archive);
2238 f->dump_int("num_flush", num_flush);
2239 f->dump_int("num_flush_kb", num_flush_kb);
2240 f->dump_int("num_evict", num_evict);
2241 f->dump_int("num_evict_kb", num_evict_kb);
2242 f->dump_int("num_promote", num_promote);
2243 f->dump_int("num_flush_mode_high", num_flush_mode_high);
2244 f->dump_int("num_flush_mode_low", num_flush_mode_low);
2245 f->dump_int("num_evict_mode_some", num_evict_mode_some);
2246 f->dump_int("num_evict_mode_full", num_evict_mode_full);
2247 f->dump_int("num_objects_pinned", num_objects_pinned);
2248 f->dump_int("num_legacy_snapsets", num_legacy_snapsets);
2249 f->dump_int("num_large_omap_objects", num_large_omap_objects);
2250 f->dump_int("num_objects_manifest", num_objects_manifest);
2251 f->dump_int("num_omap_bytes", num_omap_bytes);
2252 f->dump_int("num_omap_keys", num_omap_keys);
2253 f->dump_int("num_objects_repaired", num_objects_repaired);
2254 }
2255
2256 void object_stat_sum_t::encode(bufferlist& bl) const
2257 {
2258 ENCODE_START(20, 14, bl);
2259 #if defined(CEPH_LITTLE_ENDIAN)
2260 bl.append((char *)(&num_bytes), sizeof(object_stat_sum_t));
2261 #else
2262 encode(num_bytes, bl);
2263 encode(num_objects, bl);
2264 encode(num_object_clones, bl);
2265 encode(num_object_copies, bl);
2266 encode(num_objects_missing_on_primary, bl);
2267 encode(num_objects_degraded, bl);
2268 encode(num_objects_unfound, bl);
2269 encode(num_rd, bl);
2270 encode(num_rd_kb, bl);
2271 encode(num_wr, bl);
2272 encode(num_wr_kb, bl);
2273 encode(num_scrub_errors, bl);
2274 encode(num_objects_recovered, bl);
2275 encode(num_bytes_recovered, bl);
2276 encode(num_keys_recovered, bl);
2277 encode(num_shallow_scrub_errors, bl);
2278 encode(num_deep_scrub_errors, bl);
2279 encode(num_objects_dirty, bl);
2280 encode(num_whiteouts, bl);
2281 encode(num_objects_omap, bl);
2282 encode(num_objects_hit_set_archive, bl);
2283 encode(num_objects_misplaced, bl);
2284 encode(num_bytes_hit_set_archive, bl);
2285 encode(num_flush, bl);
2286 encode(num_flush_kb, bl);
2287 encode(num_evict, bl);
2288 encode(num_evict_kb, bl);
2289 encode(num_promote, bl);
2290 encode(num_flush_mode_high, bl);
2291 encode(num_flush_mode_low, bl);
2292 encode(num_evict_mode_some, bl);
2293 encode(num_evict_mode_full, bl);
2294 encode(num_objects_pinned, bl);
2295 encode(num_objects_missing, bl);
2296 encode(num_legacy_snapsets, bl);
2297 encode(num_large_omap_objects, bl);
2298 encode(num_objects_manifest, bl);
2299 encode(num_omap_bytes, bl);
2300 encode(num_omap_keys, bl);
2301 encode(num_objects_repaired, bl);
2302 #endif
2303 ENCODE_FINISH(bl);
2304 }
2305
2306 void object_stat_sum_t::decode(bufferlist::const_iterator& bl)
2307 {
2308 bool decode_finish = false;
2309 static const int STAT_SUM_DECODE_VERSION = 20;
2310 DECODE_START(STAT_SUM_DECODE_VERSION, bl);
2311 #if defined(CEPH_LITTLE_ENDIAN)
2312 if (struct_v == STAT_SUM_DECODE_VERSION) {
2313 bl.copy(sizeof(object_stat_sum_t), (char*)(&num_bytes));
2314 decode_finish = true;
2315 }
2316 #endif
2317 if (!decode_finish) {
2318 decode(num_bytes, bl);
2319 decode(num_objects, bl);
2320 decode(num_object_clones, bl);
2321 decode(num_object_copies, bl);
2322 decode(num_objects_missing_on_primary, bl);
2323 decode(num_objects_degraded, bl);
2324 decode(num_objects_unfound, bl);
2325 decode(num_rd, bl);
2326 decode(num_rd_kb, bl);
2327 decode(num_wr, bl);
2328 decode(num_wr_kb, bl);
2329 decode(num_scrub_errors, bl);
2330 decode(num_objects_recovered, bl);
2331 decode(num_bytes_recovered, bl);
2332 decode(num_keys_recovered, bl);
2333 decode(num_shallow_scrub_errors, bl);
2334 decode(num_deep_scrub_errors, bl);
2335 decode(num_objects_dirty, bl);
2336 decode(num_whiteouts, bl);
2337 decode(num_objects_omap, bl);
2338 decode(num_objects_hit_set_archive, bl);
2339 decode(num_objects_misplaced, bl);
2340 decode(num_bytes_hit_set_archive, bl);
2341 decode(num_flush, bl);
2342 decode(num_flush_kb, bl);
2343 decode(num_evict, bl);
2344 decode(num_evict_kb, bl);
2345 decode(num_promote, bl);
2346 decode(num_flush_mode_high, bl);
2347 decode(num_flush_mode_low, bl);
2348 decode(num_evict_mode_some, bl);
2349 decode(num_evict_mode_full, bl);
2350 decode(num_objects_pinned, bl);
2351 decode(num_objects_missing, bl);
2352 if (struct_v >= 16) {
2353 decode(num_legacy_snapsets, bl);
2354 } else {
2355 num_legacy_snapsets = num_object_clones; // upper bound
2356 }
2357 if (struct_v >= 17) {
2358 decode(num_large_omap_objects, bl);
2359 }
2360 if (struct_v >= 18) {
2361 decode(num_objects_manifest, bl);
2362 }
2363 if (struct_v >= 19) {
2364 decode(num_omap_bytes, bl);
2365 decode(num_omap_keys, bl);
2366 }
2367 if (struct_v >= 20) {
2368 decode(num_objects_repaired, bl);
2369 }
2370 }
2371 DECODE_FINISH(bl);
2372 }
2373
2374 void object_stat_sum_t::generate_test_instances(list<object_stat_sum_t*>& o)
2375 {
2376 object_stat_sum_t a;
2377
2378 a.num_bytes = 1;
2379 a.num_objects = 3;
2380 a.num_object_clones = 4;
2381 a.num_object_copies = 5;
2382 a.num_objects_missing_on_primary = 6;
2383 a.num_objects_missing = 123;
2384 a.num_objects_degraded = 7;
2385 a.num_objects_unfound = 8;
2386 a.num_rd = 9; a.num_rd_kb = 10;
2387 a.num_wr = 11; a.num_wr_kb = 12;
2388 a.num_objects_recovered = 14;
2389 a.num_bytes_recovered = 15;
2390 a.num_keys_recovered = 16;
2391 a.num_deep_scrub_errors = 17;
2392 a.num_shallow_scrub_errors = 18;
2393 a.num_scrub_errors = a.num_deep_scrub_errors + a.num_shallow_scrub_errors;
2394 a.num_objects_dirty = 21;
2395 a.num_whiteouts = 22;
2396 a.num_objects_misplaced = 1232;
2397 a.num_objects_hit_set_archive = 2;
2398 a.num_bytes_hit_set_archive = 27;
2399 a.num_flush = 5;
2400 a.num_flush_kb = 6;
2401 a.num_evict = 7;
2402 a.num_evict_kb = 8;
2403 a.num_promote = 9;
2404 a.num_flush_mode_high = 0;
2405 a.num_flush_mode_low = 1;
2406 a.num_evict_mode_some = 1;
2407 a.num_evict_mode_full = 0;
2408 a.num_objects_pinned = 20;
2409 a.num_large_omap_objects = 5;
2410 a.num_objects_manifest = 2;
2411 a.num_omap_bytes = 20000;
2412 a.num_omap_keys = 200;
2413 a.num_objects_repaired = 300;
2414 o.push_back(new object_stat_sum_t(a));
2415 }
2416
2417 void object_stat_sum_t::add(const object_stat_sum_t& o)
2418 {
2419 num_bytes += o.num_bytes;
2420 num_objects += o.num_objects;
2421 num_object_clones += o.num_object_clones;
2422 num_object_copies += o.num_object_copies;
2423 num_objects_missing_on_primary += o.num_objects_missing_on_primary;
2424 num_objects_missing += o.num_objects_missing;
2425 num_objects_degraded += o.num_objects_degraded;
2426 num_objects_misplaced += o.num_objects_misplaced;
2427 num_rd += o.num_rd;
2428 num_rd_kb += o.num_rd_kb;
2429 num_wr += o.num_wr;
2430 num_wr_kb += o.num_wr_kb;
2431 num_objects_unfound += o.num_objects_unfound;
2432 num_scrub_errors += o.num_scrub_errors;
2433 num_shallow_scrub_errors += o.num_shallow_scrub_errors;
2434 num_deep_scrub_errors += o.num_deep_scrub_errors;
2435 num_objects_recovered += o.num_objects_recovered;
2436 num_bytes_recovered += o.num_bytes_recovered;
2437 num_keys_recovered += o.num_keys_recovered;
2438 num_objects_dirty += o.num_objects_dirty;
2439 num_whiteouts += o.num_whiteouts;
2440 num_objects_omap += o.num_objects_omap;
2441 num_objects_hit_set_archive += o.num_objects_hit_set_archive;
2442 num_bytes_hit_set_archive += o.num_bytes_hit_set_archive;
2443 num_flush += o.num_flush;
2444 num_flush_kb += o.num_flush_kb;
2445 num_evict += o.num_evict;
2446 num_evict_kb += o.num_evict_kb;
2447 num_promote += o.num_promote;
2448 num_flush_mode_high += o.num_flush_mode_high;
2449 num_flush_mode_low += o.num_flush_mode_low;
2450 num_evict_mode_some += o.num_evict_mode_some;
2451 num_evict_mode_full += o.num_evict_mode_full;
2452 num_objects_pinned += o.num_objects_pinned;
2453 num_legacy_snapsets += o.num_legacy_snapsets;
2454 num_large_omap_objects += o.num_large_omap_objects;
2455 num_objects_manifest += o.num_objects_manifest;
2456 num_omap_bytes += o.num_omap_bytes;
2457 num_omap_keys += o.num_omap_keys;
2458 num_objects_repaired += o.num_objects_repaired;
2459 }
2460
2461 void object_stat_sum_t::sub(const object_stat_sum_t& o)
2462 {
2463 num_bytes -= o.num_bytes;
2464 num_objects -= o.num_objects;
2465 num_object_clones -= o.num_object_clones;
2466 num_object_copies -= o.num_object_copies;
2467 num_objects_missing_on_primary -= o.num_objects_missing_on_primary;
2468 num_objects_missing -= o.num_objects_missing;
2469 num_objects_degraded -= o.num_objects_degraded;
2470 num_objects_misplaced -= o.num_objects_misplaced;
2471 num_rd -= o.num_rd;
2472 num_rd_kb -= o.num_rd_kb;
2473 num_wr -= o.num_wr;
2474 num_wr_kb -= o.num_wr_kb;
2475 num_objects_unfound -= o.num_objects_unfound;
2476 num_scrub_errors -= o.num_scrub_errors;
2477 num_shallow_scrub_errors -= o.num_shallow_scrub_errors;
2478 num_deep_scrub_errors -= o.num_deep_scrub_errors;
2479 num_objects_recovered -= o.num_objects_recovered;
2480 num_bytes_recovered -= o.num_bytes_recovered;
2481 num_keys_recovered -= o.num_keys_recovered;
2482 num_objects_dirty -= o.num_objects_dirty;
2483 num_whiteouts -= o.num_whiteouts;
2484 num_objects_omap -= o.num_objects_omap;
2485 num_objects_hit_set_archive -= o.num_objects_hit_set_archive;
2486 num_bytes_hit_set_archive -= o.num_bytes_hit_set_archive;
2487 num_flush -= o.num_flush;
2488 num_flush_kb -= o.num_flush_kb;
2489 num_evict -= o.num_evict;
2490 num_evict_kb -= o.num_evict_kb;
2491 num_promote -= o.num_promote;
2492 num_flush_mode_high -= o.num_flush_mode_high;
2493 num_flush_mode_low -= o.num_flush_mode_low;
2494 num_evict_mode_some -= o.num_evict_mode_some;
2495 num_evict_mode_full -= o.num_evict_mode_full;
2496 num_objects_pinned -= o.num_objects_pinned;
2497 num_legacy_snapsets -= o.num_legacy_snapsets;
2498 num_large_omap_objects -= o.num_large_omap_objects;
2499 num_objects_manifest -= o.num_objects_manifest;
2500 num_omap_bytes -= o.num_omap_bytes;
2501 num_omap_keys -= o.num_omap_keys;
2502 num_objects_repaired -= o.num_objects_repaired;
2503 }
2504
2505 bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r)
2506 {
2507 return
2508 l.num_bytes == r.num_bytes &&
2509 l.num_objects == r.num_objects &&
2510 l.num_object_clones == r.num_object_clones &&
2511 l.num_object_copies == r.num_object_copies &&
2512 l.num_objects_missing_on_primary == r.num_objects_missing_on_primary &&
2513 l.num_objects_missing == r.num_objects_missing &&
2514 l.num_objects_degraded == r.num_objects_degraded &&
2515 l.num_objects_misplaced == r.num_objects_misplaced &&
2516 l.num_objects_unfound == r.num_objects_unfound &&
2517 l.num_rd == r.num_rd &&
2518 l.num_rd_kb == r.num_rd_kb &&
2519 l.num_wr == r.num_wr &&
2520 l.num_wr_kb == r.num_wr_kb &&
2521 l.num_scrub_errors == r.num_scrub_errors &&
2522 l.num_shallow_scrub_errors == r.num_shallow_scrub_errors &&
2523 l.num_deep_scrub_errors == r.num_deep_scrub_errors &&
2524 l.num_objects_recovered == r.num_objects_recovered &&
2525 l.num_bytes_recovered == r.num_bytes_recovered &&
2526 l.num_keys_recovered == r.num_keys_recovered &&
2527 l.num_objects_dirty == r.num_objects_dirty &&
2528 l.num_whiteouts == r.num_whiteouts &&
2529 l.num_objects_omap == r.num_objects_omap &&
2530 l.num_objects_hit_set_archive == r.num_objects_hit_set_archive &&
2531 l.num_bytes_hit_set_archive == r.num_bytes_hit_set_archive &&
2532 l.num_flush == r.num_flush &&
2533 l.num_flush_kb == r.num_flush_kb &&
2534 l.num_evict == r.num_evict &&
2535 l.num_evict_kb == r.num_evict_kb &&
2536 l.num_promote == r.num_promote &&
2537 l.num_flush_mode_high == r.num_flush_mode_high &&
2538 l.num_flush_mode_low == r.num_flush_mode_low &&
2539 l.num_evict_mode_some == r.num_evict_mode_some &&
2540 l.num_evict_mode_full == r.num_evict_mode_full &&
2541 l.num_objects_pinned == r.num_objects_pinned &&
2542 l.num_legacy_snapsets == r.num_legacy_snapsets &&
2543 l.num_large_omap_objects == r.num_large_omap_objects &&
2544 l.num_objects_manifest == r.num_objects_manifest &&
2545 l.num_omap_bytes == r.num_omap_bytes &&
2546 l.num_omap_keys == r.num_omap_keys &&
2547 l.num_objects_repaired == r.num_objects_repaired;
2548 }
2549
2550 // -- object_stat_collection_t --
2551
2552 void object_stat_collection_t::dump(Formatter *f) const
2553 {
2554 f->open_object_section("stat_sum");
2555 sum.dump(f);
2556 f->close_section();
2557 }
2558
2559 void object_stat_collection_t::encode(bufferlist& bl) const
2560 {
2561 ENCODE_START(2, 2, bl);
2562 encode(sum, bl);
2563 encode((__u32)0, bl);
2564 ENCODE_FINISH(bl);
2565 }
2566
2567 void object_stat_collection_t::decode(bufferlist::const_iterator& bl)
2568 {
2569 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
2570 decode(sum, bl);
2571 {
2572 map<string,object_stat_sum_t> cat_sum;
2573 decode(cat_sum, bl);
2574 }
2575 DECODE_FINISH(bl);
2576 }
2577
2578 void object_stat_collection_t::generate_test_instances(list<object_stat_collection_t*>& o)
2579 {
2580 object_stat_collection_t a;
2581 o.push_back(new object_stat_collection_t(a));
2582 list<object_stat_sum_t*> l;
2583 object_stat_sum_t::generate_test_instances(l);
2584 for (list<object_stat_sum_t*>::iterator p = l.begin(); p != l.end(); ++p) {
2585 a.add(**p);
2586 o.push_back(new object_stat_collection_t(a));
2587 }
2588 }
2589
2590
2591 // -- pg_stat_t --
2592
2593 bool pg_stat_t::is_acting_osd(int32_t osd, bool primary) const
2594 {
2595 if (primary && osd == acting_primary) {
2596 return true;
2597 } else if (!primary) {
2598 for(vector<int32_t>::const_iterator it = acting.begin();
2599 it != acting.end(); ++it)
2600 {
2601 if (*it == osd)
2602 return true;
2603 }
2604 }
2605 return false;
2606 }
2607
2608 void pg_stat_t::dump(Formatter *f) const
2609 {
2610 f->dump_stream("version") << version;
2611 f->dump_stream("reported_seq") << reported_seq;
2612 f->dump_stream("reported_epoch") << reported_epoch;
2613 f->dump_string("state", pg_state_string(state));
2614 f->dump_stream("last_fresh") << last_fresh;
2615 f->dump_stream("last_change") << last_change;
2616 f->dump_stream("last_active") << last_active;
2617 f->dump_stream("last_peered") << last_peered;
2618 f->dump_stream("last_clean") << last_clean;
2619 f->dump_stream("last_became_active") << last_became_active;
2620 f->dump_stream("last_became_peered") << last_became_peered;
2621 f->dump_stream("last_unstale") << last_unstale;
2622 f->dump_stream("last_undegraded") << last_undegraded;
2623 f->dump_stream("last_fullsized") << last_fullsized;
2624 f->dump_unsigned("mapping_epoch", mapping_epoch);
2625 f->dump_stream("log_start") << log_start;
2626 f->dump_stream("ondisk_log_start") << ondisk_log_start;
2627 f->dump_unsigned("created", created);
2628 f->dump_unsigned("last_epoch_clean", last_epoch_clean);
2629 f->dump_stream("parent") << parent;
2630 f->dump_unsigned("parent_split_bits", parent_split_bits);
2631 f->dump_stream("last_scrub") << last_scrub;
2632 f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
2633 f->dump_stream("last_deep_scrub") << last_deep_scrub;
2634 f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
2635 f->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp;
2636 f->dump_int("log_size", log_size);
2637 f->dump_int("ondisk_log_size", ondisk_log_size);
2638 f->dump_bool("stats_invalid", stats_invalid);
2639 f->dump_bool("dirty_stats_invalid", dirty_stats_invalid);
2640 f->dump_bool("omap_stats_invalid", omap_stats_invalid);
2641 f->dump_bool("hitset_stats_invalid", hitset_stats_invalid);
2642 f->dump_bool("hitset_bytes_stats_invalid", hitset_bytes_stats_invalid);
2643 f->dump_bool("pin_stats_invalid", pin_stats_invalid);
2644 f->dump_bool("manifest_stats_invalid", manifest_stats_invalid);
2645 f->dump_unsigned("snaptrimq_len", snaptrimq_len);
2646 stats.dump(f);
2647 f->open_array_section("up");
2648 for (vector<int32_t>::const_iterator p = up.begin(); p != up.end(); ++p)
2649 f->dump_int("osd", *p);
2650 f->close_section();
2651 f->open_array_section("acting");
2652 for (vector<int32_t>::const_iterator p = acting.begin(); p != acting.end(); ++p)
2653 f->dump_int("osd", *p);
2654 f->close_section();
2655 f->open_array_section("avail_no_missing");
2656 for (auto p = avail_no_missing.cbegin(); p != avail_no_missing.cend(); ++p)
2657 f->dump_stream("shard") << *p;
2658 f->close_section();
2659 f->open_array_section("object_location_counts");
2660 for (auto p = object_location_counts.cbegin(); p != object_location_counts.cend(); ++p) {
2661 f->open_object_section("entry");
2662 f->dump_stream("shards") << p->first;
2663 f->dump_int("objects", p->second);
2664 f->close_section();
2665 }
2666 f->close_section();
2667 f->open_array_section("blocked_by");
2668 for (vector<int32_t>::const_iterator p = blocked_by.begin();
2669 p != blocked_by.end(); ++p)
2670 f->dump_int("osd", *p);
2671 f->close_section();
2672 f->dump_int("up_primary", up_primary);
2673 f->dump_int("acting_primary", acting_primary);
2674 f->open_array_section("purged_snaps");
2675 for (interval_set<snapid_t>::const_iterator i = purged_snaps.begin();
2676 i != purged_snaps.end();
2677 ++i) {
2678 f->open_object_section("interval");
2679 f->dump_stream("start") << i.get_start();
2680 f->dump_stream("length") << i.get_len();
2681 f->close_section();
2682 }
2683 f->close_section();
2684 }
2685
2686 void pg_stat_t::dump_brief(Formatter *f) const
2687 {
2688 f->dump_string("state", pg_state_string(state));
2689 f->open_array_section("up");
2690 for (vector<int32_t>::const_iterator p = up.begin(); p != up.end(); ++p)
2691 f->dump_int("osd", *p);
2692 f->close_section();
2693 f->open_array_section("acting");
2694 for (vector<int32_t>::const_iterator p = acting.begin(); p != acting.end(); ++p)
2695 f->dump_int("osd", *p);
2696 f->close_section();
2697 f->dump_int("up_primary", up_primary);
2698 f->dump_int("acting_primary", acting_primary);
2699 }
2700
2701 void pg_stat_t::encode(bufferlist &bl) const
2702 {
2703 ENCODE_START(26, 22, bl);
2704 encode(version, bl);
2705 encode(reported_seq, bl);
2706 encode(reported_epoch, bl);
2707 encode((__u32)state, bl); // for older peers
2708 encode(log_start, bl);
2709 encode(ondisk_log_start, bl);
2710 encode(created, bl);
2711 encode(last_epoch_clean, bl);
2712 encode(parent, bl);
2713 encode(parent_split_bits, bl);
2714 encode(last_scrub, bl);
2715 encode(last_scrub_stamp, bl);
2716 encode(stats, bl);
2717 encode(log_size, bl);
2718 encode(ondisk_log_size, bl);
2719 encode(up, bl);
2720 encode(acting, bl);
2721 encode(last_fresh, bl);
2722 encode(last_change, bl);
2723 encode(last_active, bl);
2724 encode(last_clean, bl);
2725 encode(last_unstale, bl);
2726 encode(mapping_epoch, bl);
2727 encode(last_deep_scrub, bl);
2728 encode(last_deep_scrub_stamp, bl);
2729 encode(stats_invalid, bl);
2730 encode(last_clean_scrub_stamp, bl);
2731 encode(last_became_active, bl);
2732 encode(dirty_stats_invalid, bl);
2733 encode(up_primary, bl);
2734 encode(acting_primary, bl);
2735 encode(omap_stats_invalid, bl);
2736 encode(hitset_stats_invalid, bl);
2737 encode(blocked_by, bl);
2738 encode(last_undegraded, bl);
2739 encode(last_fullsized, bl);
2740 encode(hitset_bytes_stats_invalid, bl);
2741 encode(last_peered, bl);
2742 encode(last_became_peered, bl);
2743 encode(pin_stats_invalid, bl);
2744 encode(snaptrimq_len, bl);
2745 __u32 top_state = (state >> 32);
2746 encode(top_state, bl);
2747 encode(purged_snaps, bl);
2748 encode(manifest_stats_invalid, bl);
2749 encode(avail_no_missing, bl);
2750 encode(object_location_counts, bl);
2751 ENCODE_FINISH(bl);
2752 }
2753
2754 void pg_stat_t::decode(bufferlist::const_iterator &bl)
2755 {
2756 bool tmp;
2757 uint32_t old_state;
2758 DECODE_START(26, bl);
2759 decode(version, bl);
2760 decode(reported_seq, bl);
2761 decode(reported_epoch, bl);
2762 decode(old_state, bl);
2763 decode(log_start, bl);
2764 decode(ondisk_log_start, bl);
2765 decode(created, bl);
2766 decode(last_epoch_clean, bl);
2767 decode(parent, bl);
2768 decode(parent_split_bits, bl);
2769 decode(last_scrub, bl);
2770 decode(last_scrub_stamp, bl);
2771 decode(stats, bl);
2772 decode(log_size, bl);
2773 decode(ondisk_log_size, bl);
2774 decode(up, bl);
2775 decode(acting, bl);
2776 decode(last_fresh, bl);
2777 decode(last_change, bl);
2778 decode(last_active, bl);
2779 decode(last_clean, bl);
2780 decode(last_unstale, bl);
2781 decode(mapping_epoch, bl);
2782 decode(last_deep_scrub, bl);
2783 decode(last_deep_scrub_stamp, bl);
2784 decode(tmp, bl);
2785 stats_invalid = tmp;
2786 decode(last_clean_scrub_stamp, bl);
2787 decode(last_became_active, bl);
2788 decode(tmp, bl);
2789 dirty_stats_invalid = tmp;
2790 decode(up_primary, bl);
2791 decode(acting_primary, bl);
2792 decode(tmp, bl);
2793 omap_stats_invalid = tmp;
2794 decode(tmp, bl);
2795 hitset_stats_invalid = tmp;
2796 decode(blocked_by, bl);
2797 decode(last_undegraded, bl);
2798 decode(last_fullsized, bl);
2799 decode(tmp, bl);
2800 hitset_bytes_stats_invalid = tmp;
2801 decode(last_peered, bl);
2802 decode(last_became_peered, bl);
2803 decode(tmp, bl);
2804 pin_stats_invalid = tmp;
2805 if (struct_v >= 23) {
2806 decode(snaptrimq_len, bl);
2807 if (struct_v >= 24) {
2808 __u32 top_state;
2809 decode(top_state, bl);
2810 state = (uint64_t)old_state | ((uint64_t)top_state << 32);
2811 decode(purged_snaps, bl);
2812 } else {
2813 state = old_state;
2814 }
2815 if (struct_v >= 25) {
2816 decode(tmp, bl);
2817 manifest_stats_invalid = tmp;
2818 } else {
2819 manifest_stats_invalid = true;
2820 }
2821 if (struct_v >= 26) {
2822 decode(avail_no_missing, bl);
2823 decode(object_location_counts, bl);
2824 }
2825 }
2826 DECODE_FINISH(bl);
2827 }
2828
2829 void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o)
2830 {
2831 pg_stat_t a;
2832 o.push_back(new pg_stat_t(a));
2833
2834 a.version = eversion_t(1, 3);
2835 a.reported_epoch = 1;
2836 a.reported_seq = 2;
2837 a.state = 123;
2838 a.mapping_epoch = 998;
2839 a.last_fresh = utime_t(1002, 1);
2840 a.last_change = utime_t(1002, 2);
2841 a.last_active = utime_t(1002, 3);
2842 a.last_clean = utime_t(1002, 4);
2843 a.last_unstale = utime_t(1002, 5);
2844 a.last_undegraded = utime_t(1002, 7);
2845 a.last_fullsized = utime_t(1002, 8);
2846 a.log_start = eversion_t(1, 4);
2847 a.ondisk_log_start = eversion_t(1, 5);
2848 a.created = 6;
2849 a.last_epoch_clean = 7;
2850 a.parent = pg_t(1, 2);
2851 a.parent_split_bits = 12;
2852 a.last_scrub = eversion_t(9, 10);
2853 a.last_scrub_stamp = utime_t(11, 12);
2854 a.last_deep_scrub = eversion_t(13, 14);
2855 a.last_deep_scrub_stamp = utime_t(15, 16);
2856 a.last_clean_scrub_stamp = utime_t(17, 18);
2857 a.snaptrimq_len = 1048576;
2858 list<object_stat_collection_t*> l;
2859 object_stat_collection_t::generate_test_instances(l);
2860 a.stats = *l.back();
2861 a.log_size = 99;
2862 a.ondisk_log_size = 88;
2863 a.up.push_back(123);
2864 a.up_primary = 123;
2865 a.acting.push_back(456);
2866 a.avail_no_missing.push_back(pg_shard_t(456, shard_id_t::NO_SHARD));
2867 set<pg_shard_t> sset = { pg_shard_t(0), pg_shard_t(1) };
2868 a.object_location_counts.insert(make_pair(sset, 10));
2869 sset.insert(pg_shard_t(2));
2870 a.object_location_counts.insert(make_pair(sset, 5));
2871 a.acting_primary = 456;
2872 o.push_back(new pg_stat_t(a));
2873
2874 a.up.push_back(124);
2875 a.up_primary = 124;
2876 a.acting.push_back(124);
2877 a.acting_primary = 124;
2878 a.blocked_by.push_back(155);
2879 a.blocked_by.push_back(156);
2880 o.push_back(new pg_stat_t(a));
2881 }
2882
2883 bool operator==(const pg_stat_t& l, const pg_stat_t& r)
2884 {
2885 return
2886 l.version == r.version &&
2887 l.reported_seq == r.reported_seq &&
2888 l.reported_epoch == r.reported_epoch &&
2889 l.state == r.state &&
2890 l.last_fresh == r.last_fresh &&
2891 l.last_change == r.last_change &&
2892 l.last_active == r.last_active &&
2893 l.last_peered == r.last_peered &&
2894 l.last_clean == r.last_clean &&
2895 l.last_unstale == r.last_unstale &&
2896 l.last_undegraded == r.last_undegraded &&
2897 l.last_fullsized == r.last_fullsized &&
2898 l.log_start == r.log_start &&
2899 l.ondisk_log_start == r.ondisk_log_start &&
2900 l.created == r.created &&
2901 l.last_epoch_clean == r.last_epoch_clean &&
2902 l.parent == r.parent &&
2903 l.parent_split_bits == r.parent_split_bits &&
2904 l.last_scrub == r.last_scrub &&
2905 l.last_deep_scrub == r.last_deep_scrub &&
2906 l.last_scrub_stamp == r.last_scrub_stamp &&
2907 l.last_deep_scrub_stamp == r.last_deep_scrub_stamp &&
2908 l.last_clean_scrub_stamp == r.last_clean_scrub_stamp &&
2909 l.stats == r.stats &&
2910 l.stats_invalid == r.stats_invalid &&
2911 l.log_size == r.log_size &&
2912 l.ondisk_log_size == r.ondisk_log_size &&
2913 l.up == r.up &&
2914 l.acting == r.acting &&
2915 l.avail_no_missing == r.avail_no_missing &&
2916 l.object_location_counts == r.object_location_counts &&
2917 l.mapping_epoch == r.mapping_epoch &&
2918 l.blocked_by == r.blocked_by &&
2919 l.last_became_active == r.last_became_active &&
2920 l.last_became_peered == r.last_became_peered &&
2921 l.dirty_stats_invalid == r.dirty_stats_invalid &&
2922 l.omap_stats_invalid == r.omap_stats_invalid &&
2923 l.hitset_stats_invalid == r.hitset_stats_invalid &&
2924 l.hitset_bytes_stats_invalid == r.hitset_bytes_stats_invalid &&
2925 l.up_primary == r.up_primary &&
2926 l.acting_primary == r.acting_primary &&
2927 l.pin_stats_invalid == r.pin_stats_invalid &&
2928 l.manifest_stats_invalid == r.manifest_stats_invalid &&
2929 l.purged_snaps == r.purged_snaps &&
2930 l.snaptrimq_len == r.snaptrimq_len;
2931 }
2932
2933 // -- store_statfs_t --
2934
2935 bool store_statfs_t::operator==(const store_statfs_t& other) const
2936 {
2937 return total == other.total
2938 && available == other.available
2939 && allocated == other.allocated
2940 && internally_reserved == other.internally_reserved
2941 && data_stored == other.data_stored
2942 && data_compressed == other.data_compressed
2943 && data_compressed_allocated == other.data_compressed_allocated
2944 && data_compressed_original == other.data_compressed_original
2945 && omap_allocated == other.omap_allocated
2946 && internal_metadata == other.internal_metadata;
2947 }
2948
2949 void store_statfs_t::dump(Formatter *f) const
2950 {
2951 f->dump_int("total", total);
2952 f->dump_int("available", available);
2953 f->dump_int("internally_reserved", internally_reserved);
2954 f->dump_int("allocated", allocated);
2955 f->dump_int("data_stored", data_stored);
2956 f->dump_int("data_compressed", data_compressed);
2957 f->dump_int("data_compressed_allocated", data_compressed_allocated);
2958 f->dump_int("data_compressed_original", data_compressed_original);
2959 f->dump_int("omap_allocated", omap_allocated);
2960 f->dump_int("internal_metadata", internal_metadata);
2961 }
2962
2963 ostream& operator<<(ostream& out, const store_statfs_t &s)
2964 {
2965 out << std::hex
2966 << "store_statfs(0x" << s.available
2967 << "/0x" << s.internally_reserved
2968 << "/0x" << s.total
2969 << ", data 0x" << s.data_stored
2970 << "/0x" << s.allocated
2971 << ", compress 0x" << s.data_compressed
2972 << "/0x" << s.data_compressed_allocated
2973 << "/0x" << s.data_compressed_original
2974 << ", omap 0x" << s.omap_allocated
2975 << ", meta 0x" << s.internal_metadata
2976 << std::dec
2977 << ")";
2978 return out;
2979 }
2980
2981 void store_statfs_t::generate_test_instances(list<store_statfs_t*>& o)
2982 {
2983 store_statfs_t a;
2984 o.push_back(new store_statfs_t(a));
2985 a.total = 234;
2986 a.available = 123;
2987 a.internally_reserved = 33;
2988 a.allocated = 32;
2989 a.data_stored = 44;
2990 a.data_compressed = 21;
2991 a.data_compressed_allocated = 12;
2992 a.data_compressed_original = 13;
2993 a.omap_allocated = 14;
2994 a.internal_metadata = 15;
2995 o.push_back(new store_statfs_t(a));
2996 }
2997
2998 // -- pool_stat_t --
2999
3000 void pool_stat_t::dump(Formatter *f) const
3001 {
3002 stats.dump(f);
3003 f->open_object_section("store_stats");
3004 store_stats.dump(f);
3005 f->close_section();
3006 f->dump_int("log_size", log_size);
3007 f->dump_int("ondisk_log_size", ondisk_log_size);
3008 f->dump_int("up", up);
3009 f->dump_int("acting", acting);
3010 f->dump_int("num_store_stats", acting);
3011 }
3012
3013 void pool_stat_t::encode(bufferlist &bl, uint64_t features) const
3014 {
3015 using ceph::encode;
3016 if ((features & CEPH_FEATURE_OSDENC) == 0) {
3017 __u8 v = 4;
3018 encode(v, bl);
3019 encode(stats, bl);
3020 encode(log_size, bl);
3021 encode(ondisk_log_size, bl);
3022 return;
3023 }
3024
3025 ENCODE_START(7, 5, bl);
3026 encode(stats, bl);
3027 encode(log_size, bl);
3028 encode(ondisk_log_size, bl);
3029 encode(up, bl);
3030 encode(acting, bl);
3031 encode(store_stats, bl);
3032 encode(num_store_stats, bl);
3033 ENCODE_FINISH(bl);
3034 }
3035
3036 void pool_stat_t::decode(bufferlist::const_iterator &bl)
3037 {
3038 DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl);
3039 if (struct_v >= 4) {
3040 decode(stats, bl);
3041 decode(log_size, bl);
3042 decode(ondisk_log_size, bl);
3043 if (struct_v >= 6) {
3044 decode(up, bl);
3045 decode(acting, bl);
3046 } else {
3047 up = 0;
3048 acting = 0;
3049 }
3050 if (struct_v >= 7) {
3051 decode(store_stats, bl);
3052 decode(num_store_stats, bl);
3053 } else {
3054 store_stats.reset();
3055 num_store_stats = 0;
3056 }
3057
3058 } else {
3059 decode(stats.sum.num_bytes, bl);
3060 uint64_t num_kb;
3061 decode(num_kb, bl);
3062 decode(stats.sum.num_objects, bl);
3063 decode(stats.sum.num_object_clones, bl);
3064 decode(stats.sum.num_object_copies, bl);
3065 decode(stats.sum.num_objects_missing_on_primary, bl);
3066 decode(stats.sum.num_objects_degraded, bl);
3067 decode(log_size, bl);
3068 decode(ondisk_log_size, bl);
3069 if (struct_v >= 2) {
3070 decode(stats.sum.num_rd, bl);
3071 decode(stats.sum.num_rd_kb, bl);
3072 decode(stats.sum.num_wr, bl);
3073 decode(stats.sum.num_wr_kb, bl);
3074 }
3075 if (struct_v >= 3) {
3076 decode(stats.sum.num_objects_unfound, bl);
3077 }
3078 }
3079 DECODE_FINISH(bl);
3080 }
3081
3082 void pool_stat_t::generate_test_instances(list<pool_stat_t*>& o)
3083 {
3084 pool_stat_t a;
3085 o.push_back(new pool_stat_t(a));
3086
3087 list<object_stat_collection_t*> l;
3088 object_stat_collection_t::generate_test_instances(l);
3089 list<store_statfs_t*> ll;
3090 store_statfs_t::generate_test_instances(ll);
3091 a.stats = *l.back();
3092 a.store_stats = *ll.back();
3093 a.log_size = 123;
3094 a.ondisk_log_size = 456;
3095 a.acting = 3;
3096 a.up = 4;
3097 a.num_store_stats = 1;
3098 o.push_back(new pool_stat_t(a));
3099 }
3100
3101
3102 // -- pg_history_t --
3103
3104 void pg_history_t::encode(bufferlist &bl) const
3105 {
3106 ENCODE_START(9, 4, bl);
3107 encode(epoch_created, bl);
3108 encode(last_epoch_started, bl);
3109 encode(last_epoch_clean, bl);
3110 encode(last_epoch_split, bl);
3111 encode(same_interval_since, bl);
3112 encode(same_up_since, bl);
3113 encode(same_primary_since, bl);
3114 encode(last_scrub, bl);
3115 encode(last_scrub_stamp, bl);
3116 encode(last_deep_scrub, bl);
3117 encode(last_deep_scrub_stamp, bl);
3118 encode(last_clean_scrub_stamp, bl);
3119 encode(last_epoch_marked_full, bl);
3120 encode(last_interval_started, bl);
3121 encode(last_interval_clean, bl);
3122 encode(epoch_pool_created, bl);
3123 ENCODE_FINISH(bl);
3124 }
3125
3126 void pg_history_t::decode(bufferlist::const_iterator &bl)
3127 {
3128 DECODE_START_LEGACY_COMPAT_LEN(9, 4, 4, bl);
3129 decode(epoch_created, bl);
3130 decode(last_epoch_started, bl);
3131 if (struct_v >= 3)
3132 decode(last_epoch_clean, bl);
3133 else
3134 last_epoch_clean = last_epoch_started; // careful, it's a lie!
3135 decode(last_epoch_split, bl);
3136 decode(same_interval_since, bl);
3137 decode(same_up_since, bl);
3138 decode(same_primary_since, bl);
3139 if (struct_v >= 2) {
3140 decode(last_scrub, bl);
3141 decode(last_scrub_stamp, bl);
3142 }
3143 if (struct_v >= 5) {
3144 decode(last_deep_scrub, bl);
3145 decode(last_deep_scrub_stamp, bl);
3146 }
3147 if (struct_v >= 6) {
3148 decode(last_clean_scrub_stamp, bl);
3149 }
3150 if (struct_v >= 7) {
3151 decode(last_epoch_marked_full, bl);
3152 }
3153 if (struct_v >= 8) {
3154 decode(last_interval_started, bl);
3155 decode(last_interval_clean, bl);
3156 } else {
3157 if (last_epoch_started >= same_interval_since) {
3158 last_interval_started = same_interval_since;
3159 } else {
3160 last_interval_started = last_epoch_started; // best guess
3161 }
3162 if (last_epoch_clean >= same_interval_since) {
3163 last_interval_clean = same_interval_since;
3164 } else {
3165 last_interval_clean = last_epoch_clean; // best guess
3166 }
3167 }
3168 if (struct_v >= 9) {
3169 decode(epoch_pool_created, bl);
3170 } else {
3171 epoch_pool_created = epoch_created;
3172 }
3173 DECODE_FINISH(bl);
3174 }
3175
3176 void pg_history_t::dump(Formatter *f) const
3177 {
3178 f->dump_int("epoch_created", epoch_created);
3179 f->dump_int("epoch_pool_created", epoch_pool_created);
3180 f->dump_int("last_epoch_started", last_epoch_started);
3181 f->dump_int("last_interval_started", last_interval_started);
3182 f->dump_int("last_epoch_clean", last_epoch_clean);
3183 f->dump_int("last_interval_clean", last_interval_clean);
3184 f->dump_int("last_epoch_split", last_epoch_split);
3185 f->dump_int("last_epoch_marked_full", last_epoch_marked_full);
3186 f->dump_int("same_up_since", same_up_since);
3187 f->dump_int("same_interval_since", same_interval_since);
3188 f->dump_int("same_primary_since", same_primary_since);
3189 f->dump_stream("last_scrub") << last_scrub;
3190 f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
3191 f->dump_stream("last_deep_scrub") << last_deep_scrub;
3192 f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
3193 f->dump_stream("last_clean_scrub_stamp") << last_clean_scrub_stamp;
3194 }
3195
3196 void pg_history_t::generate_test_instances(list<pg_history_t*>& o)
3197 {
3198 o.push_back(new pg_history_t);
3199 o.push_back(new pg_history_t);
3200 o.back()->epoch_created = 1;
3201 o.back()->epoch_pool_created = 1;
3202 o.back()->last_epoch_started = 2;
3203 o.back()->last_interval_started = 2;
3204 o.back()->last_epoch_clean = 3;
3205 o.back()->last_interval_clean = 2;
3206 o.back()->last_epoch_split = 4;
3207 o.back()->same_up_since = 5;
3208 o.back()->same_interval_since = 6;
3209 o.back()->same_primary_since = 7;
3210 o.back()->last_scrub = eversion_t(8, 9);
3211 o.back()->last_scrub_stamp = utime_t(10, 11);
3212 o.back()->last_deep_scrub = eversion_t(12, 13);
3213 o.back()->last_deep_scrub_stamp = utime_t(14, 15);
3214 o.back()->last_clean_scrub_stamp = utime_t(16, 17);
3215 o.back()->last_epoch_marked_full = 18;
3216 }
3217
3218
3219 // -- pg_info_t --
3220
3221 void pg_info_t::encode(bufferlist &bl) const
3222 {
3223 ENCODE_START(32, 26, bl);
3224 encode(pgid.pgid, bl);
3225 encode(last_update, bl);
3226 encode(last_complete, bl);
3227 encode(log_tail, bl);
3228 if (last_backfill_bitwise && !last_backfill.is_max()) {
3229 encode(hobject_t(), bl);
3230 } else {
3231 encode(last_backfill, bl);
3232 }
3233 encode(stats, bl);
3234 history.encode(bl);
3235 encode(purged_snaps, bl);
3236 encode(last_epoch_started, bl);
3237 encode(last_user_version, bl);
3238 encode(hit_set, bl);
3239 encode(pgid.shard, bl);
3240 encode(last_backfill, bl);
3241 encode(last_backfill_bitwise, bl);
3242 encode(last_interval_started, bl);
3243 ENCODE_FINISH(bl);
3244 }
3245
3246 void pg_info_t::decode(bufferlist::const_iterator &bl)
3247 {
3248 DECODE_START(32, bl);
3249 decode(pgid.pgid, bl);
3250 decode(last_update, bl);
3251 decode(last_complete, bl);
3252 decode(log_tail, bl);
3253 {
3254 hobject_t old_last_backfill;
3255 decode(old_last_backfill, bl);
3256 }
3257 decode(stats, bl);
3258 history.decode(bl);
3259 decode(purged_snaps, bl);
3260 decode(last_epoch_started, bl);
3261 decode(last_user_version, bl);
3262 decode(hit_set, bl);
3263 decode(pgid.shard, bl);
3264 decode(last_backfill, bl);
3265 decode(last_backfill_bitwise, bl);
3266 if (struct_v >= 32) {
3267 decode(last_interval_started, bl);
3268 } else {
3269 last_interval_started = last_epoch_started;
3270 }
3271 DECODE_FINISH(bl);
3272 }
3273
3274 // -- pg_info_t --
3275
3276 void pg_info_t::dump(Formatter *f) const
3277 {
3278 f->dump_stream("pgid") << pgid;
3279 f->dump_stream("last_update") << last_update;
3280 f->dump_stream("last_complete") << last_complete;
3281 f->dump_stream("log_tail") << log_tail;
3282 f->dump_int("last_user_version", last_user_version);
3283 f->dump_stream("last_backfill") << last_backfill;
3284 f->dump_int("last_backfill_bitwise", (int)last_backfill_bitwise);
3285 f->open_array_section("purged_snaps");
3286 for (interval_set<snapid_t>::const_iterator i=purged_snaps.begin();
3287 i != purged_snaps.end();
3288 ++i) {
3289 f->open_object_section("purged_snap_interval");
3290 f->dump_stream("start") << i.get_start();
3291 f->dump_stream("length") << i.get_len();
3292 f->close_section();
3293 }
3294 f->close_section();
3295 f->open_object_section("history");
3296 history.dump(f);
3297 f->close_section();
3298 f->open_object_section("stats");
3299 stats.dump(f);
3300 f->close_section();
3301
3302 f->dump_int("empty", is_empty());
3303 f->dump_int("dne", dne());
3304 f->dump_int("incomplete", is_incomplete());
3305 f->dump_int("last_epoch_started", last_epoch_started);
3306
3307 f->open_object_section("hit_set_history");
3308 hit_set.dump(f);
3309 f->close_section();
3310 }
3311
3312 void pg_info_t::generate_test_instances(list<pg_info_t*>& o)
3313 {
3314 o.push_back(new pg_info_t);
3315 o.push_back(new pg_info_t);
3316 list<pg_history_t*> h;
3317 pg_history_t::generate_test_instances(h);
3318 o.back()->history = *h.back();
3319 o.back()->pgid = spg_t(pg_t(1, 2), shard_id_t::NO_SHARD);
3320 o.back()->last_update = eversion_t(3, 4);
3321 o.back()->last_complete = eversion_t(5, 6);
3322 o.back()->last_user_version = 2;
3323 o.back()->log_tail = eversion_t(7, 8);
3324 o.back()->last_backfill = hobject_t(object_t("objname"), "key", 123, 456, -1, "");
3325 o.back()->last_backfill_bitwise = true;
3326 {
3327 list<pg_stat_t*> s;
3328 pg_stat_t::generate_test_instances(s);
3329 o.back()->stats = *s.back();
3330 }
3331 {
3332 list<pg_hit_set_history_t*> s;
3333 pg_hit_set_history_t::generate_test_instances(s);
3334 o.back()->hit_set = *s.back();
3335 }
3336 }
3337
3338 // -- pg_notify_t --
3339 void pg_notify_t::encode(bufferlist &bl) const
3340 {
3341 ENCODE_START(2, 2, bl);
3342 encode(query_epoch, bl);
3343 encode(epoch_sent, bl);
3344 encode(info, bl);
3345 encode(to, bl);
3346 encode(from, bl);
3347 ENCODE_FINISH(bl);
3348 }
3349
3350 void pg_notify_t::decode(bufferlist::const_iterator &bl)
3351 {
3352 DECODE_START(2, bl);
3353 decode(query_epoch, bl);
3354 decode(epoch_sent, bl);
3355 decode(info, bl);
3356 decode(to, bl);
3357 decode(from, bl);
3358 DECODE_FINISH(bl);
3359 }
3360
3361 void pg_notify_t::dump(Formatter *f) const
3362 {
3363 f->dump_int("from", from);
3364 f->dump_int("to", to);
3365 f->dump_unsigned("query_epoch", query_epoch);
3366 f->dump_unsigned("epoch_sent", epoch_sent);
3367 {
3368 f->open_object_section("info");
3369 info.dump(f);
3370 f->close_section();
3371 }
3372 }
3373
3374 void pg_notify_t::generate_test_instances(list<pg_notify_t*>& o)
3375 {
3376 o.push_back(new pg_notify_t(shard_id_t(3), shard_id_t::NO_SHARD, 1, 1, pg_info_t()));
3377 o.push_back(new pg_notify_t(shard_id_t(0), shard_id_t(0), 3, 10, pg_info_t()));
3378 }
3379
3380 ostream &operator<<(ostream &lhs, const pg_notify_t &notify)
3381 {
3382 lhs << "(query:" << notify.query_epoch
3383 << " sent:" << notify.epoch_sent
3384 << " " << notify.info;
3385 if (notify.from != shard_id_t::NO_SHARD ||
3386 notify.to != shard_id_t::NO_SHARD)
3387 lhs << " " << (unsigned)notify.from
3388 << "->" << (unsigned)notify.to;
3389 return lhs << ")";
3390 }
3391
3392 // -- pg_interval_t --
3393
3394 void PastIntervals::pg_interval_t::encode(bufferlist& bl) const
3395 {
3396 ENCODE_START(4, 2, bl);
3397 encode(first, bl);
3398 encode(last, bl);
3399 encode(up, bl);
3400 encode(acting, bl);
3401 encode(maybe_went_rw, bl);
3402 encode(primary, bl);
3403 encode(up_primary, bl);
3404 ENCODE_FINISH(bl);
3405 }
3406
3407 void PastIntervals::pg_interval_t::decode(bufferlist::const_iterator& bl)
3408 {
3409 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
3410 decode(first, bl);
3411 decode(last, bl);
3412 decode(up, bl);
3413 decode(acting, bl);
3414 decode(maybe_went_rw, bl);
3415 if (struct_v >= 3) {
3416 decode(primary, bl);
3417 } else {
3418 if (acting.size())
3419 primary = acting[0];
3420 }
3421 if (struct_v >= 4) {
3422 decode(up_primary, bl);
3423 } else {
3424 if (up.size())
3425 up_primary = up[0];
3426 }
3427 DECODE_FINISH(bl);
3428 }
3429
3430 void PastIntervals::pg_interval_t::dump(Formatter *f) const
3431 {
3432 f->dump_unsigned("first", first);
3433 f->dump_unsigned("last", last);
3434 f->dump_int("maybe_went_rw", maybe_went_rw ? 1 : 0);
3435 f->open_array_section("up");
3436 for (vector<int>::const_iterator p = up.begin(); p != up.end(); ++p)
3437 f->dump_int("osd", *p);
3438 f->close_section();
3439 f->open_array_section("acting");
3440 for (vector<int>::const_iterator p = acting.begin(); p != acting.end(); ++p)
3441 f->dump_int("osd", *p);
3442 f->close_section();
3443 f->dump_int("primary", primary);
3444 f->dump_int("up_primary", up_primary);
3445 }
3446
3447 void PastIntervals::pg_interval_t::generate_test_instances(list<pg_interval_t*>& o)
3448 {
3449 o.push_back(new pg_interval_t);
3450 o.push_back(new pg_interval_t);
3451 o.back()->up.push_back(1);
3452 o.back()->acting.push_back(2);
3453 o.back()->acting.push_back(3);
3454 o.back()->first = 4;
3455 o.back()->last = 5;
3456 o.back()->maybe_went_rw = true;
3457 }
3458
3459 WRITE_CLASS_ENCODER(PastIntervals::pg_interval_t)
3460
3461
3462 /**
3463 * pi_compact_rep
3464 *
3465 * PastIntervals only needs to be able to answer two questions:
3466 * 1) Where should the primary look for unfound objects?
3467 * 2) List a set of subsets of the OSDs such that contacting at least
3468 * one from each subset guarantees we speak to at least one witness
3469 * of any completed write.
3470 *
3471 * Crucially, 2) does not require keeping *all* past intervals. Certainly,
3472 * we don't need to keep any where maybe_went_rw would be false. We also
3473 * needn't keep two intervals where the actingset in one is a subset
3474 * of the other (only need to keep the smaller of the two sets). In order
3475 * to accurately trim the set of intervals as last_epoch_started changes
3476 * without rebuilding the set from scratch, we'll retain the larger set
3477 * if it in an older interval.
3478 */
3479 struct compact_interval_t {
3480 epoch_t first;
3481 epoch_t last;
3482 set<pg_shard_t> acting;
3483 bool supersedes(const compact_interval_t &other) {
3484 for (auto &&i: acting) {
3485 if (!other.acting.count(i))
3486 return false;
3487 }
3488 return true;
3489 }
3490 void dump(Formatter *f) const {
3491 f->open_object_section("compact_interval_t");
3492 f->dump_stream("first") << first;
3493 f->dump_stream("last") << last;
3494 f->dump_stream("acting") << acting;
3495 f->close_section();
3496 }
3497 void encode(bufferlist &bl) const {
3498 ENCODE_START(1, 1, bl);
3499 encode(first, bl);
3500 encode(last, bl);
3501 encode(acting, bl);
3502 ENCODE_FINISH(bl);
3503 }
3504 void decode(bufferlist::const_iterator &bl) {
3505 DECODE_START(1, bl);
3506 decode(first, bl);
3507 decode(last, bl);
3508 decode(acting, bl);
3509 DECODE_FINISH(bl);
3510 }
3511 static void generate_test_instances(list<compact_interval_t*> & o) {
3512 /* Not going to be used, we'll generate pi_compact_rep directly */
3513 }
3514 };
3515 ostream &operator<<(ostream &o, const compact_interval_t &rhs)
3516 {
3517 return o << "([" << rhs.first << "," << rhs.last
3518 << "] acting " << rhs.acting << ")";
3519 }
3520 WRITE_CLASS_ENCODER(compact_interval_t)
3521
3522 class pi_compact_rep : public PastIntervals::interval_rep {
3523 epoch_t first = 0;
3524 epoch_t last = 0; // inclusive
3525 set<pg_shard_t> all_participants;
3526 list<compact_interval_t> intervals;
3527 pi_compact_rep(
3528 bool ec_pool,
3529 std::list<PastIntervals::pg_interval_t> &&intervals) {
3530 for (auto &&i: intervals)
3531 add_interval(ec_pool, i);
3532 }
3533 public:
3534 pi_compact_rep() = default;
3535 pi_compact_rep(const pi_compact_rep &) = default;
3536 pi_compact_rep(pi_compact_rep &&) = default;
3537 pi_compact_rep &operator=(const pi_compact_rep &) = default;
3538 pi_compact_rep &operator=(pi_compact_rep &&) = default;
3539
3540 size_t size() const override { return intervals.size(); }
3541 bool empty() const override {
3542 return first > last || (first == 0 && last == 0);
3543 }
3544 void clear() override {
3545 *this = pi_compact_rep();
3546 }
3547 pair<epoch_t, epoch_t> get_bounds() const override {
3548 return make_pair(first, last + 1);
3549 }
3550 void adjust_start_backwards(epoch_t last_epoch_clean) {
3551 first = last_epoch_clean;
3552 }
3553
3554 set<pg_shard_t> get_all_participants(
3555 bool ec_pool) const override {
3556 return all_participants;
3557 }
3558 void add_interval(
3559 bool ec_pool, const PastIntervals::pg_interval_t &interval) override {
3560 if (first == 0)
3561 first = interval.first;
3562 ceph_assert(interval.last > last);
3563 last = interval.last;
3564 set<pg_shard_t> acting;
3565 for (unsigned i = 0; i < interval.acting.size(); ++i) {
3566 if (interval.acting[i] == CRUSH_ITEM_NONE)
3567 continue;
3568 acting.insert(
3569 pg_shard_t(
3570 interval.acting[i],
3571 ec_pool ? shard_id_t(i) : shard_id_t::NO_SHARD));
3572 }
3573 all_participants.insert(acting.begin(), acting.end());
3574 if (!interval.maybe_went_rw)
3575 return;
3576 intervals.push_back(
3577 compact_interval_t{interval.first, interval.last, acting});
3578 auto plast = intervals.end();
3579 --plast;
3580 for (auto cur = intervals.begin(); cur != plast; ) {
3581 if (plast->supersedes(*cur)) {
3582 intervals.erase(cur++);
3583 } else {
3584 ++cur;
3585 }
3586 }
3587 }
3588 unique_ptr<PastIntervals::interval_rep> clone() const override {
3589 return unique_ptr<PastIntervals::interval_rep>(new pi_compact_rep(*this));
3590 }
3591 ostream &print(ostream &out) const override {
3592 return out << "([" << first << "," << last
3593 << "] intervals=" << intervals << ")";
3594 }
3595 void encode(bufferlist &bl) const override {
3596 ENCODE_START(1, 1, bl);
3597 encode(first, bl);
3598 encode(last, bl);
3599 encode(all_participants, bl);
3600 encode(intervals, bl);
3601 ENCODE_FINISH(bl);
3602 }
3603 void decode(bufferlist::const_iterator &bl) override {
3604 DECODE_START(1, bl);
3605 decode(first, bl);
3606 decode(last, bl);
3607 decode(all_participants, bl);
3608 decode(intervals, bl);
3609 DECODE_FINISH(bl);
3610 }
3611 void dump(Formatter *f) const override {
3612 f->open_object_section("PastIntervals::compact_rep");
3613 f->dump_stream("first") << first;
3614 f->dump_stream("last") << last;
3615 f->open_array_section("all_participants");
3616 for (auto& i : all_participants) {
3617 f->dump_object("pg_shard", i);
3618 }
3619 f->close_section();
3620 f->open_array_section("intervals");
3621 for (auto &&i: intervals) {
3622 i.dump(f);
3623 }
3624 f->close_section();
3625 f->close_section();
3626 }
3627 static void generate_test_instances(list<pi_compact_rep*> &o) {
3628 using ival = PastIntervals::pg_interval_t;
3629 using ivallst = std::list<ival>;
3630 o.push_back(
3631 new pi_compact_rep(
3632 true, ivallst
3633 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3634 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3635 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3636 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3637 }));
3638 o.push_back(
3639 new pi_compact_rep(
3640 false, ivallst
3641 { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
3642 , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
3643 , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
3644 , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
3645 }));
3646 o.push_back(
3647 new pi_compact_rep(
3648 true, ivallst
3649 { ival{{2, 1, 0}, {2, 1, 0}, 10, 20, true, 1, 1}
3650 , ival{{ 0, 2}, { 0, 2}, 21, 30, true, 0, 0}
3651 , ival{{ 0, 2}, {2, 0}, 31, 35, true, 2, 2}
3652 , ival{{ 0, 2}, { 0, 2}, 36, 50, true, 0, 0}
3653 }));
3654 }
3655 void iterate_mayberw_back_to(
3656 epoch_t les,
3657 std::function<void(epoch_t, const set<pg_shard_t> &)> &&f) const override {
3658 for (auto i = intervals.rbegin(); i != intervals.rend(); ++i) {
3659 if (i->last < les)
3660 break;
3661 f(i->first, i->acting);
3662 }
3663 }
3664 virtual ~pi_compact_rep() override {}
3665 };
3666 WRITE_CLASS_ENCODER(pi_compact_rep)
3667
3668 PastIntervals::PastIntervals()
3669 {
3670 past_intervals.reset(new pi_compact_rep);
3671 }
3672
3673 PastIntervals::PastIntervals(const PastIntervals &rhs)
3674 : past_intervals(rhs.past_intervals ?
3675 rhs.past_intervals->clone() :
3676 nullptr) {}
3677
3678 PastIntervals &PastIntervals::operator=(const PastIntervals &rhs)
3679 {
3680 PastIntervals other(rhs);
3681 swap(other);
3682 return *this;
3683 }
3684
3685 ostream& operator<<(ostream& out, const PastIntervals &i)
3686 {
3687 if (i.past_intervals) {
3688 return i.past_intervals->print(out);
3689 } else {
3690 return out << "(empty)";
3691 }
3692 }
3693
3694 ostream& operator<<(ostream& out, const PastIntervals::PriorSet &i)
3695 {
3696 return out << "PriorSet("
3697 << "ec_pool: " << i.ec_pool
3698 << ", probe: " << i.probe
3699 << ", down: " << i.down
3700 << ", blocked_by: " << i.blocked_by
3701 << ", pg_down: " << i.pg_down
3702 << ")";
3703 }
3704
3705 void PastIntervals::decode(bufferlist::const_iterator &bl)
3706 {
3707 DECODE_START(1, bl);
3708 __u8 type = 0;
3709 decode(type, bl);
3710 switch (type) {
3711 case 0:
3712 break;
3713 case 1:
3714 ceph_abort_msg("pi_simple_rep support removed post-luminous");
3715 break;
3716 case 2:
3717 past_intervals.reset(new pi_compact_rep);
3718 past_intervals->decode(bl);
3719 break;
3720 }
3721 DECODE_FINISH(bl);
3722 }
3723
3724 void PastIntervals::generate_test_instances(list<PastIntervals*> &o)
3725 {
3726 {
3727 list<pi_compact_rep *> compact;
3728 pi_compact_rep::generate_test_instances(compact);
3729 for (auto &&i: compact) {
3730 // takes ownership of contents
3731 o.push_back(new PastIntervals(i));
3732 }
3733 }
3734 return;
3735 }
3736
3737 bool PastIntervals::is_new_interval(
3738 int old_acting_primary,
3739 int new_acting_primary,
3740 const vector<int> &old_acting,
3741 const vector<int> &new_acting,
3742 int old_up_primary,
3743 int new_up_primary,
3744 const vector<int> &old_up,
3745 const vector<int> &new_up,
3746 int old_size,
3747 int new_size,
3748 int old_min_size,
3749 int new_min_size,
3750 unsigned old_pg_num,
3751 unsigned new_pg_num,
3752 unsigned old_pg_num_pending,
3753 unsigned new_pg_num_pending,
3754 bool old_sort_bitwise,
3755 bool new_sort_bitwise,
3756 bool old_recovery_deletes,
3757 bool new_recovery_deletes,
3758 pg_t pgid) {
3759 return old_acting_primary != new_acting_primary ||
3760 new_acting != old_acting ||
3761 old_up_primary != new_up_primary ||
3762 new_up != old_up ||
3763 old_min_size != new_min_size ||
3764 old_size != new_size ||
3765 pgid.is_split(old_pg_num, new_pg_num, 0) ||
3766 // (is or was) pre-merge source
3767 pgid.is_merge_source(old_pg_num_pending, new_pg_num_pending, 0) ||
3768 pgid.is_merge_source(new_pg_num_pending, old_pg_num_pending, 0) ||
3769 // merge source
3770 pgid.is_merge_source(old_pg_num, new_pg_num, 0) ||
3771 // (is or was) pre-merge target
3772 pgid.is_merge_target(old_pg_num_pending, new_pg_num_pending) ||
3773 pgid.is_merge_target(new_pg_num_pending, old_pg_num_pending) ||
3774 // merge target
3775 pgid.is_merge_target(old_pg_num, new_pg_num) ||
3776 old_sort_bitwise != new_sort_bitwise ||
3777 old_recovery_deletes != new_recovery_deletes;
3778 }
3779
3780 bool PastIntervals::is_new_interval(
3781 int old_acting_primary,
3782 int new_acting_primary,
3783 const vector<int> &old_acting,
3784 const vector<int> &new_acting,
3785 int old_up_primary,
3786 int new_up_primary,
3787 const vector<int> &old_up,
3788 const vector<int> &new_up,
3789 OSDMapRef osdmap,
3790 OSDMapRef lastmap,
3791 pg_t pgid)
3792 {
3793 const pg_pool_t *plast = lastmap->get_pg_pool(pgid.pool());
3794 if (!plast) {
3795 return false; // after pool is deleted there are no more interval changes
3796 }
3797 const pg_pool_t *pi = osdmap->get_pg_pool(pgid.pool());
3798 if (!pi) {
3799 return true; // pool was deleted this epoch -> (final!) interval change
3800 }
3801 return
3802 is_new_interval(old_acting_primary,
3803 new_acting_primary,
3804 old_acting,
3805 new_acting,
3806 old_up_primary,
3807 new_up_primary,
3808 old_up,
3809 new_up,
3810 plast->size,
3811 pi->size,
3812 plast->min_size,
3813 pi->min_size,
3814 plast->get_pg_num(),
3815 pi->get_pg_num(),
3816 plast->get_pg_num_pending(),
3817 pi->get_pg_num_pending(),
3818 lastmap->test_flag(CEPH_OSDMAP_SORTBITWISE),
3819 osdmap->test_flag(CEPH_OSDMAP_SORTBITWISE),
3820 lastmap->test_flag(CEPH_OSDMAP_RECOVERY_DELETES),
3821 osdmap->test_flag(CEPH_OSDMAP_RECOVERY_DELETES),
3822 pgid);
3823 }
3824
3825 bool PastIntervals::check_new_interval(
3826 int old_acting_primary,
3827 int new_acting_primary,
3828 const vector<int> &old_acting,
3829 const vector<int> &new_acting,
3830 int old_up_primary,
3831 int new_up_primary,
3832 const vector<int> &old_up,
3833 const vector<int> &new_up,
3834 epoch_t same_interval_since,
3835 epoch_t last_epoch_clean,
3836 OSDMapRef osdmap,
3837 OSDMapRef lastmap,
3838 pg_t pgid,
3839 IsPGRecoverablePredicate *could_have_gone_active,
3840 PastIntervals *past_intervals,
3841 std::ostream *out)
3842 {
3843 /*
3844 * We have to be careful to gracefully deal with situations like
3845 * so. Say we have a power outage or something that takes out both
3846 * OSDs, but the monitor doesn't mark them down in the same epoch.
3847 * The history may look like
3848 *
3849 * 1: A B
3850 * 2: B
3851 * 3: let's say B dies for good, too (say, from the power spike)
3852 * 4: A
3853 *
3854 * which makes it look like B may have applied updates to the PG
3855 * that we need in order to proceed. This sucks...
3856 *
3857 * To minimize the risk of this happening, we CANNOT go active if
3858 * _any_ OSDs in the prior set are down until we send an MOSDAlive
3859 * to the monitor such that the OSDMap sets osd_up_thru to an epoch.
3860 * Then, we have something like
3861 *
3862 * 1: A B
3863 * 2: B up_thru[B]=0
3864 * 3:
3865 * 4: A
3866 *
3867 * -> we can ignore B, bc it couldn't have gone active (up_thru still 0).
3868 *
3869 * or,
3870 *
3871 * 1: A B
3872 * 2: B up_thru[B]=0
3873 * 3: B up_thru[B]=2
3874 * 4:
3875 * 5: A
3876 *
3877 * -> we must wait for B, bc it was alive through 2, and could have
3878 * written to the pg.
3879 *
3880 * If B is really dead, then an administrator will need to manually
3881 * intervene by marking the OSD as "lost."
3882 */
3883
3884 // remember past interval
3885 // NOTE: a change in the up set primary triggers an interval
3886 // change, even though the interval members in the pg_interval_t
3887 // do not change.
3888 ceph_assert(past_intervals);
3889 ceph_assert(past_intervals->past_intervals);
3890 if (is_new_interval(
3891 old_acting_primary,
3892 new_acting_primary,
3893 old_acting,
3894 new_acting,
3895 old_up_primary,
3896 new_up_primary,
3897 old_up,
3898 new_up,
3899 osdmap,
3900 lastmap,
3901 pgid)) {
3902 pg_interval_t i;
3903 i.first = same_interval_since;
3904 i.last = osdmap->get_epoch() - 1;
3905 ceph_assert(i.first <= i.last);
3906 i.acting = old_acting;
3907 i.up = old_up;
3908 i.primary = old_acting_primary;
3909 i.up_primary = old_up_primary;
3910
3911 unsigned num_acting = 0;
3912 for (vector<int>::const_iterator p = i.acting.begin(); p != i.acting.end();
3913 ++p)
3914 if (*p != CRUSH_ITEM_NONE)
3915 ++num_acting;
3916
3917 ceph_assert(lastmap->get_pools().count(pgid.pool()));
3918 const pg_pool_t& old_pg_pool = lastmap->get_pools().find(pgid.pool())->second;
3919 set<pg_shard_t> old_acting_shards;
3920 old_pg_pool.convert_to_pg_shards(old_acting, &old_acting_shards);
3921
3922 if (num_acting &&
3923 i.primary != -1 &&
3924 num_acting >= old_pg_pool.min_size &&
3925 (*could_have_gone_active)(old_acting_shards)) {
3926 if (out)
3927 *out << __func__ << " " << i
3928 << " up_thru " << lastmap->get_up_thru(i.primary)
3929 << " up_from " << lastmap->get_up_from(i.primary)
3930 << " last_epoch_clean " << last_epoch_clean;
3931 if (lastmap->get_up_thru(i.primary) >= i.first &&
3932 lastmap->get_up_from(i.primary) <= i.first) {
3933 i.maybe_went_rw = true;
3934 if (out)
3935 *out << " " << i
3936 << " : primary up " << lastmap->get_up_from(i.primary)
3937 << "-" << lastmap->get_up_thru(i.primary)
3938 << " includes interval"
3939 << std::endl;
3940 } else if (last_epoch_clean >= i.first &&
3941 last_epoch_clean <= i.last) {
3942 // If the last_epoch_clean is included in this interval, then
3943 // the pg must have been rw (for recovery to have completed).
3944 // This is important because we won't know the _real_
3945 // first_epoch because we stop at last_epoch_clean, and we
3946 // don't want the oldest interval to randomly have
3947 // maybe_went_rw false depending on the relative up_thru vs
3948 // last_epoch_clean timing.
3949 i.maybe_went_rw = true;
3950 if (out)
3951 *out << " " << i
3952 << " : includes last_epoch_clean " << last_epoch_clean
3953 << " and presumed to have been rw"
3954 << std::endl;
3955 } else {
3956 i.maybe_went_rw = false;
3957 if (out)
3958 *out << " " << i
3959 << " : primary up " << lastmap->get_up_from(i.primary)
3960 << "-" << lastmap->get_up_thru(i.primary)
3961 << " does not include interval"
3962 << std::endl;
3963 }
3964 } else {
3965 i.maybe_went_rw = false;
3966 if (out)
3967 *out << __func__ << " " << i << " : acting set is too small" << std::endl;
3968 }
3969 past_intervals->past_intervals->add_interval(old_pg_pool.is_erasure(), i);
3970 return true;
3971 } else {
3972 return false;
3973 }
3974 }
3975
3976
3977 // true if the given map affects the prior set
3978 bool PastIntervals::PriorSet::affected_by_map(
3979 const OSDMap &osdmap,
3980 const DoutPrefixProvider *dpp) const
3981 {
3982 for (set<pg_shard_t>::iterator p = probe.begin();
3983 p != probe.end();
3984 ++p) {
3985 int o = p->osd;
3986
3987 // did someone in the prior set go down?
3988 if (osdmap.is_down(o) && down.count(o) == 0) {
3989 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now down" << dendl;
3990 return true;
3991 }
3992
3993 // did a down osd in cur get (re)marked as lost?
3994 map<int, epoch_t>::const_iterator r = blocked_by.find(o);
3995 if (r != blocked_by.end()) {
3996 if (!osdmap.exists(o)) {
3997 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
3998 return true;
3999 }
4000 if (osdmap.get_info(o).lost_at != r->second) {
4001 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
4002 return true;
4003 }
4004 }
4005 }
4006
4007 // did someone in the prior down set go up?
4008 for (set<int>::const_iterator p = down.begin();
4009 p != down.end();
4010 ++p) {
4011 int o = *p;
4012
4013 if (osdmap.is_up(o)) {
4014 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " now up" << dendl;
4015 return true;
4016 }
4017
4018 // did someone in the prior set get lost or destroyed?
4019 if (!osdmap.exists(o)) {
4020 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " no longer exists" << dendl;
4021 return true;
4022 }
4023 // did a down osd in down get (re)marked as lost?
4024 map<int, epoch_t>::const_iterator r = blocked_by.find(o);
4025 if (r != blocked_by.end()) {
4026 if (osdmap.get_info(o).lost_at != r->second) {
4027 ldpp_dout(dpp, 10) << "affected_by_map osd." << o << " (re)marked as lost" << dendl;
4028 return true;
4029 }
4030 }
4031 }
4032
4033 return false;
4034 }
4035
4036 ostream& operator<<(ostream& out, const PastIntervals::pg_interval_t& i)
4037 {
4038 out << "interval(" << i.first << "-" << i.last
4039 << " up " << i.up << "(" << i.up_primary << ")"
4040 << " acting " << i.acting << "(" << i.primary << ")";
4041 if (i.maybe_went_rw)
4042 out << " maybe_went_rw";
4043 out << ")";
4044 return out;
4045 }
4046
4047
4048
4049 // -- pg_query_t --
4050
4051 void pg_query_t::encode(bufferlist &bl, uint64_t features) const {
4052 ENCODE_START(3, 3, bl);
4053 encode(type, bl);
4054 encode(since, bl);
4055 history.encode(bl);
4056 encode(epoch_sent, bl);
4057 encode(to, bl);
4058 encode(from, bl);
4059 ENCODE_FINISH(bl);
4060 }
4061
4062 void pg_query_t::decode(bufferlist::const_iterator &bl) {
4063 DECODE_START(3, bl);
4064 decode(type, bl);
4065 decode(since, bl);
4066 history.decode(bl);
4067 decode(epoch_sent, bl);
4068 decode(to, bl);
4069 decode(from, bl);
4070 DECODE_FINISH(bl);
4071 }
4072
4073 void pg_query_t::dump(Formatter *f) const
4074 {
4075 f->dump_int("from", from);
4076 f->dump_int("to", to);
4077 f->dump_string("type", get_type_name());
4078 f->dump_stream("since") << since;
4079 f->dump_stream("epoch_sent") << epoch_sent;
4080 f->open_object_section("history");
4081 history.dump(f);
4082 f->close_section();
4083 }
4084 void pg_query_t::generate_test_instances(list<pg_query_t*>& o)
4085 {
4086 o.push_back(new pg_query_t());
4087 list<pg_history_t*> h;
4088 pg_history_t::generate_test_instances(h);
4089 o.push_back(new pg_query_t(pg_query_t::INFO, shard_id_t(1), shard_id_t(2), *h.back(), 4));
4090 o.push_back(new pg_query_t(pg_query_t::MISSING, shard_id_t(2), shard_id_t(3), *h.back(), 4));
4091 o.push_back(new pg_query_t(pg_query_t::LOG, shard_id_t(0), shard_id_t(0),
4092 eversion_t(4, 5), *h.back(), 4));
4093 o.push_back(new pg_query_t(pg_query_t::FULLLOG,
4094 shard_id_t::NO_SHARD, shard_id_t::NO_SHARD,
4095 *h.back(), 5));
4096 }
4097
4098 // -- ObjectModDesc --
4099 void ObjectModDesc::visit(Visitor *visitor) const
4100 {
4101 auto bp = bl.cbegin();
4102 try {
4103 while (!bp.end()) {
4104 DECODE_START(max_required_version, bp);
4105 uint8_t code;
4106 decode(code, bp);
4107 switch (code) {
4108 case APPEND: {
4109 uint64_t size;
4110 decode(size, bp);
4111 visitor->append(size);
4112 break;
4113 }
4114 case SETATTRS: {
4115 map<string, boost::optional<bufferlist> > attrs;
4116 decode(attrs, bp);
4117 visitor->setattrs(attrs);
4118 break;
4119 }
4120 case DELETE: {
4121 version_t old_version;
4122 decode(old_version, bp);
4123 visitor->rmobject(old_version);
4124 break;
4125 }
4126 case CREATE: {
4127 visitor->create();
4128 break;
4129 }
4130 case UPDATE_SNAPS: {
4131 set<snapid_t> snaps;
4132 decode(snaps, bp);
4133 visitor->update_snaps(snaps);
4134 break;
4135 }
4136 case TRY_DELETE: {
4137 version_t old_version;
4138 decode(old_version, bp);
4139 visitor->try_rmobject(old_version);
4140 break;
4141 }
4142 case ROLLBACK_EXTENTS: {
4143 vector<pair<uint64_t, uint64_t> > extents;
4144 version_t gen;
4145 decode(gen, bp);
4146 decode(extents, bp);
4147 visitor->rollback_extents(gen,extents);
4148 break;
4149 }
4150 default:
4151 ceph_abort_msg("Invalid rollback code");
4152 }
4153 DECODE_FINISH(bp);
4154 }
4155 } catch (...) {
4156 ceph_abort_msg("Invalid encoding");
4157 }
4158 }
4159
4160 struct DumpVisitor : public ObjectModDesc::Visitor {
4161 Formatter *f;
4162 explicit DumpVisitor(Formatter *f) : f(f) {}
4163 void append(uint64_t old_size) override {
4164 f->open_object_section("op");
4165 f->dump_string("code", "APPEND");
4166 f->dump_unsigned("old_size", old_size);
4167 f->close_section();
4168 }
4169 void setattrs(map<string, boost::optional<bufferlist> > &attrs) override {
4170 f->open_object_section("op");
4171 f->dump_string("code", "SETATTRS");
4172 f->open_array_section("attrs");
4173 for (map<string, boost::optional<bufferlist> >::iterator i = attrs.begin();
4174 i != attrs.end();
4175 ++i) {
4176 f->dump_string("attr_name", i->first);
4177 }
4178 f->close_section();
4179 f->close_section();
4180 }
4181 void rmobject(version_t old_version) override {
4182 f->open_object_section("op");
4183 f->dump_string("code", "RMOBJECT");
4184 f->dump_unsigned("old_version", old_version);
4185 f->close_section();
4186 }
4187 void try_rmobject(version_t old_version) override {
4188 f->open_object_section("op");
4189 f->dump_string("code", "TRY_RMOBJECT");
4190 f->dump_unsigned("old_version", old_version);
4191 f->close_section();
4192 }
4193 void create() override {
4194 f->open_object_section("op");
4195 f->dump_string("code", "CREATE");
4196 f->close_section();
4197 }
4198 void update_snaps(const set<snapid_t> &snaps) override {
4199 f->open_object_section("op");
4200 f->dump_string("code", "UPDATE_SNAPS");
4201 f->dump_stream("snaps") << snaps;
4202 f->close_section();
4203 }
4204 void rollback_extents(
4205 version_t gen,
4206 const vector<pair<uint64_t, uint64_t> > &extents) override {
4207 f->open_object_section("op");
4208 f->dump_string("code", "ROLLBACK_EXTENTS");
4209 f->dump_unsigned("gen", gen);
4210 f->dump_stream("snaps") << extents;
4211 f->close_section();
4212 }
4213 };
4214
4215 void ObjectModDesc::dump(Formatter *f) const
4216 {
4217 f->open_object_section("object_mod_desc");
4218 f->dump_bool("can_local_rollback", can_local_rollback);
4219 f->dump_bool("rollback_info_completed", rollback_info_completed);
4220 {
4221 f->open_array_section("ops");
4222 DumpVisitor vis(f);
4223 visit(&vis);
4224 f->close_section();
4225 }
4226 f->close_section();
4227 }
4228
4229 void ObjectModDesc::generate_test_instances(list<ObjectModDesc*>& o)
4230 {
4231 map<string, boost::optional<bufferlist> > attrs;
4232 attrs[OI_ATTR];
4233 attrs[SS_ATTR];
4234 attrs["asdf"];
4235 o.push_back(new ObjectModDesc());
4236 o.back()->append(100);
4237 o.back()->setattrs(attrs);
4238 o.push_back(new ObjectModDesc());
4239 o.back()->rmobject(1001);
4240 o.push_back(new ObjectModDesc());
4241 o.back()->create();
4242 o.back()->setattrs(attrs);
4243 o.push_back(new ObjectModDesc());
4244 o.back()->create();
4245 o.back()->setattrs(attrs);
4246 o.back()->mark_unrollbackable();
4247 o.back()->append(1000);
4248 }
4249
4250 void ObjectModDesc::encode(bufferlist &_bl) const
4251 {
4252 ENCODE_START(max_required_version, max_required_version, _bl);
4253 encode(can_local_rollback, _bl);
4254 encode(rollback_info_completed, _bl);
4255 encode(bl, _bl);
4256 ENCODE_FINISH(_bl);
4257 }
4258 void ObjectModDesc::decode(bufferlist::const_iterator &_bl)
4259 {
4260 DECODE_START(2, _bl);
4261 max_required_version = struct_v;
4262 decode(can_local_rollback, _bl);
4263 decode(rollback_info_completed, _bl);
4264 decode(bl, _bl);
4265 // ensure bl does not pin a larger buffer in memory
4266 bl.rebuild();
4267 bl.reassign_to_mempool(mempool::mempool_osd_pglog);
4268 DECODE_FINISH(_bl);
4269 }
4270
4271 // -- pg_log_entry_t --
4272
4273 string pg_log_entry_t::get_key_name() const
4274 {
4275 return version.get_key_name();
4276 }
4277
4278 void pg_log_entry_t::encode_with_checksum(bufferlist& bl) const
4279 {
4280 using ceph::encode;
4281 bufferlist ebl(sizeof(*this)*2);
4282 this->encode(ebl);
4283 __u32 crc = ebl.crc32c(0);
4284 encode(ebl, bl);
4285 encode(crc, bl);
4286 }
4287
4288 void pg_log_entry_t::decode_with_checksum(bufferlist::const_iterator& p)
4289 {
4290 using ceph::decode;
4291 bufferlist bl;
4292 decode(bl, p);
4293 __u32 crc;
4294 decode(crc, p);
4295 if (crc != bl.crc32c(0))
4296 throw buffer::malformed_input("bad checksum on pg_log_entry_t");
4297 auto q = bl.cbegin();
4298 this->decode(q);
4299 }
4300
4301 void pg_log_entry_t::encode(bufferlist &bl) const
4302 {
4303 ENCODE_START(12, 4, bl);
4304 encode(op, bl);
4305 encode(soid, bl);
4306 encode(version, bl);
4307
4308 /**
4309 * Added with reverting_to:
4310 * Previous code used prior_version to encode
4311 * what we now call reverting_to. This will
4312 * allow older code to decode reverting_to
4313 * into prior_version as expected.
4314 */
4315 if (op == LOST_REVERT)
4316 encode(reverting_to, bl);
4317 else
4318 encode(prior_version, bl);
4319
4320 encode(reqid, bl);
4321 encode(mtime, bl);
4322 if (op == LOST_REVERT)
4323 encode(prior_version, bl);
4324 encode(snaps, bl);
4325 encode(user_version, bl);
4326 encode(mod_desc, bl);
4327 encode(extra_reqids, bl);
4328 if (op == ERROR)
4329 encode(return_code, bl);
4330 if (!extra_reqids.empty())
4331 encode(extra_reqid_return_codes, bl);
4332 ENCODE_FINISH(bl);
4333 }
4334
4335 void pg_log_entry_t::decode(bufferlist::const_iterator &bl)
4336 {
4337 DECODE_START_LEGACY_COMPAT_LEN(12, 4, 4, bl);
4338 decode(op, bl);
4339 if (struct_v < 2) {
4340 sobject_t old_soid;
4341 decode(old_soid, bl);
4342 soid.oid = old_soid.oid;
4343 soid.snap = old_soid.snap;
4344 invalid_hash = true;
4345 } else {
4346 decode(soid, bl);
4347 }
4348 if (struct_v < 3)
4349 invalid_hash = true;
4350 decode(version, bl);
4351
4352 if (struct_v >= 6 && op == LOST_REVERT)
4353 decode(reverting_to, bl);
4354 else
4355 decode(prior_version, bl);
4356
4357 decode(reqid, bl);
4358
4359 decode(mtime, bl);
4360 if (struct_v < 5)
4361 invalid_pool = true;
4362
4363 if (op == LOST_REVERT) {
4364 if (struct_v >= 6) {
4365 decode(prior_version, bl);
4366 } else {
4367 reverting_to = prior_version;
4368 }
4369 }
4370 if (struct_v >= 7 || // for v >= 7, this is for all ops.
4371 op == CLONE) { // for v < 7, it's only present for CLONE.
4372 decode(snaps, bl);
4373 // ensure snaps does not pin a larger buffer in memory
4374 snaps.rebuild();
4375 snaps.reassign_to_mempool(mempool::mempool_osd_pglog);
4376 }
4377
4378 if (struct_v >= 8)
4379 decode(user_version, bl);
4380 else
4381 user_version = version.version;
4382
4383 if (struct_v >= 9)
4384 decode(mod_desc, bl);
4385 else
4386 mod_desc.mark_unrollbackable();
4387 if (struct_v >= 10)
4388 decode(extra_reqids, bl);
4389 if (struct_v >= 11 && op == ERROR)
4390 decode(return_code, bl);
4391 if (struct_v >= 12 && !extra_reqids.empty())
4392 decode(extra_reqid_return_codes, bl);
4393 DECODE_FINISH(bl);
4394 }
4395
4396 void pg_log_entry_t::dump(Formatter *f) const
4397 {
4398 f->dump_string("op", get_op_name());
4399 f->dump_stream("object") << soid;
4400 f->dump_stream("version") << version;
4401 f->dump_stream("prior_version") << prior_version;
4402 f->dump_stream("reqid") << reqid;
4403 f->open_array_section("extra_reqids");
4404 uint32_t idx = 0;
4405 for (auto p = extra_reqids.begin();
4406 p != extra_reqids.end();
4407 ++idx, ++p) {
4408 f->open_object_section("extra_reqid");
4409 f->dump_stream("reqid") << p->first;
4410 f->dump_stream("user_version") << p->second;
4411 auto it = extra_reqid_return_codes.find(idx);
4412 if (it != extra_reqid_return_codes.end()) {
4413 f->dump_int("return_code", it->second);
4414 }
4415 f->close_section();
4416 }
4417 f->close_section();
4418 f->dump_stream("mtime") << mtime;
4419 f->dump_int("return_code", return_code);
4420 if (snaps.length() > 0) {
4421 vector<snapid_t> v;
4422 bufferlist c = snaps;
4423 auto p = c.cbegin();
4424 try {
4425 using ceph::decode;
4426 decode(v, p);
4427 } catch (...) {
4428 v.clear();
4429 }
4430 f->open_object_section("snaps");
4431 for (vector<snapid_t>::iterator p = v.begin(); p != v.end(); ++p)
4432 f->dump_unsigned("snap", *p);
4433 f->close_section();
4434 }
4435 {
4436 f->open_object_section("mod_desc");
4437 mod_desc.dump(f);
4438 f->close_section();
4439 }
4440 }
4441
4442 void pg_log_entry_t::generate_test_instances(list<pg_log_entry_t*>& o)
4443 {
4444 o.push_back(new pg_log_entry_t());
4445 hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
4446 o.push_back(new pg_log_entry_t(MODIFY, oid, eversion_t(1,2), eversion_t(3,4),
4447 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4448 utime_t(8,9), 0));
4449 o.push_back(new pg_log_entry_t(ERROR, oid, eversion_t(1,2), eversion_t(3,4),
4450 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4451 utime_t(8,9), -ENOENT));
4452 }
4453
4454 ostream& operator<<(ostream& out, const pg_log_entry_t& e)
4455 {
4456 out << e.version << " (" << e.prior_version << ") "
4457 << std::left << std::setw(8) << e.get_op_name() << ' '
4458 << e.soid << " by " << e.reqid << " " << e.mtime
4459 << " " << e.return_code;
4460 if (e.snaps.length()) {
4461 vector<snapid_t> snaps;
4462 bufferlist c = e.snaps;
4463 auto p = c.cbegin();
4464 try {
4465 decode(snaps, p);
4466 } catch (...) {
4467 snaps.clear();
4468 }
4469 out << " snaps " << snaps;
4470 }
4471 return out;
4472 }
4473
4474 // -- pg_log_dup_t --
4475
4476 std::string pg_log_dup_t::get_key_name() const
4477 {
4478 static const char prefix[] = "dup_";
4479 std::string key(36, ' ');
4480 memcpy(&key[0], prefix, 4);
4481 version.get_key_name(&key[4]);
4482 key.resize(35); // remove the null terminator
4483 return key;
4484 }
4485
4486 void pg_log_dup_t::encode(bufferlist &bl) const
4487 {
4488 ENCODE_START(1, 1, bl);
4489 encode(reqid, bl);
4490 encode(version, bl);
4491 encode(user_version, bl);
4492 encode(return_code, bl);
4493 ENCODE_FINISH(bl);
4494 }
4495
4496 void pg_log_dup_t::decode(bufferlist::const_iterator &bl)
4497 {
4498 DECODE_START(1, bl);
4499 decode(reqid, bl);
4500 decode(version, bl);
4501 decode(user_version, bl);
4502 decode(return_code, bl);
4503 DECODE_FINISH(bl);
4504 }
4505
4506 void pg_log_dup_t::dump(Formatter *f) const
4507 {
4508 f->dump_stream("reqid") << reqid;
4509 f->dump_stream("version") << version;
4510 f->dump_stream("user_version") << user_version;
4511 f->dump_stream("return_code") << return_code;
4512 }
4513
4514 void pg_log_dup_t::generate_test_instances(list<pg_log_dup_t*>& o)
4515 {
4516 o.push_back(new pg_log_dup_t());
4517 o.push_back(new pg_log_dup_t(eversion_t(1,2),
4518 1,
4519 osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4520 0));
4521 o.push_back(new pg_log_dup_t(eversion_t(1,2),
4522 2,
4523 osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
4524 -ENOENT));
4525 }
4526
4527
4528 std::ostream& operator<<(std::ostream& out, const pg_log_dup_t& e) {
4529 return out << "log_dup(reqid=" << e.reqid <<
4530 " v=" << e.version << " uv=" << e.user_version <<
4531 " rc=" << e.return_code << ")";
4532 }
4533
4534
4535 // -- pg_log_t --
4536
4537 // out: pg_log_t that only has entries that apply to import_pgid using curmap
4538 // reject: Entries rejected from "in" are in the reject.log. Other fields not set.
4539 void pg_log_t::filter_log(spg_t import_pgid, const OSDMap &curmap,
4540 const string &hit_set_namespace, const pg_log_t &in,
4541 pg_log_t &out, pg_log_t &reject)
4542 {
4543 out = in;
4544 out.log.clear();
4545 reject.log.clear();
4546
4547 for (list<pg_log_entry_t>::const_iterator i = in.log.begin();
4548 i != in.log.end(); ++i) {
4549
4550 // Reject pg log entries for temporary objects
4551 if (i->soid.is_temp()) {
4552 reject.log.push_back(*i);
4553 continue;
4554 }
4555
4556 if (i->soid.nspace != hit_set_namespace) {
4557 object_t oid = i->soid.oid;
4558 object_locator_t loc(i->soid);
4559 pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
4560 pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
4561
4562 if (import_pgid.pgid == pgid) {
4563 out.log.push_back(*i);
4564 } else {
4565 reject.log.push_back(*i);
4566 }
4567 } else {
4568 out.log.push_back(*i);
4569 }
4570 }
4571 }
4572
4573 void pg_log_t::encode(bufferlist& bl) const
4574 {
4575 ENCODE_START(7, 3, bl);
4576 encode(head, bl);
4577 encode(tail, bl);
4578 encode(log, bl);
4579 encode(can_rollback_to, bl);
4580 encode(rollback_info_trimmed_to, bl);
4581 encode(dups, bl);
4582 ENCODE_FINISH(bl);
4583 }
4584
4585 void pg_log_t::decode(bufferlist::const_iterator &bl, int64_t pool)
4586 {
4587 DECODE_START_LEGACY_COMPAT_LEN(7, 3, 3, bl);
4588 decode(head, bl);
4589 decode(tail, bl);
4590 if (struct_v < 2) {
4591 bool backlog;
4592 decode(backlog, bl);
4593 }
4594 decode(log, bl);
4595 if (struct_v >= 5)
4596 decode(can_rollback_to, bl);
4597
4598 if (struct_v >= 6)
4599 decode(rollback_info_trimmed_to, bl);
4600 else
4601 rollback_info_trimmed_to = tail;
4602
4603 if (struct_v >= 7)
4604 decode(dups, bl);
4605
4606 DECODE_FINISH(bl);
4607
4608 // handle hobject_t format change
4609 if (struct_v < 4) {
4610 for (list<pg_log_entry_t>::iterator i = log.begin();
4611 i != log.end();
4612 ++i) {
4613 if (!i->soid.is_max() && i->soid.pool == -1)
4614 i->soid.pool = pool;
4615 }
4616 }
4617 }
4618
4619 void pg_log_t::dump(Formatter *f) const
4620 {
4621 f->dump_stream("head") << head;
4622 f->dump_stream("tail") << tail;
4623 f->open_array_section("log");
4624 for (list<pg_log_entry_t>::const_iterator p = log.begin(); p != log.end(); ++p) {
4625 f->open_object_section("entry");
4626 p->dump(f);
4627 f->close_section();
4628 }
4629 f->close_section();
4630 f->open_array_section("dups");
4631 for (const auto& entry : dups) {
4632 f->open_object_section("entry");
4633 entry.dump(f);
4634 f->close_section();
4635 }
4636 f->close_section();
4637 }
4638
4639 void pg_log_t::generate_test_instances(list<pg_log_t*>& o)
4640 {
4641 o.push_back(new pg_log_t);
4642
4643 // this is nonsensical:
4644 o.push_back(new pg_log_t);
4645 o.back()->head = eversion_t(1,2);
4646 o.back()->tail = eversion_t(3,4);
4647 list<pg_log_entry_t*> e;
4648 pg_log_entry_t::generate_test_instances(e);
4649 for (list<pg_log_entry_t*>::iterator p = e.begin(); p != e.end(); ++p)
4650 o.back()->log.push_back(**p);
4651 }
4652
4653 static void _handle_dups(CephContext* cct, pg_log_t &target, const pg_log_t &other, unsigned maxdups)
4654 {
4655 auto earliest_dup_version =
4656 target.head.version < maxdups ? 0u : target.head.version - maxdups + 1;
4657 lgeneric_subdout(cct, osd, 20) << "copy_up_to/copy_after earliest_dup_version " << earliest_dup_version << dendl;
4658
4659 for (auto d = other.dups.cbegin(); d != other.dups.cend(); ++d) {
4660 if (d->version.version >= earliest_dup_version) {
4661 lgeneric_subdout(cct, osd, 20)
4662 << "copy_up_to/copy_after copy dup version "
4663 << d->version << dendl;
4664 target.dups.push_back(pg_log_dup_t(*d));
4665 }
4666 }
4667
4668 for (auto i = other.log.cbegin(); i != other.log.cend(); ++i) {
4669 ceph_assert(i->version > other.tail);
4670 if (i->version > target.tail)
4671 break;
4672 if (i->version.version >= earliest_dup_version) {
4673 lgeneric_subdout(cct, osd, 20)
4674 << "copy_up_to/copy_after copy dup from log version "
4675 << i->version << dendl;
4676 target.dups.push_back(pg_log_dup_t(*i));
4677 }
4678 }
4679 }
4680
4681
4682 void pg_log_t::copy_after(CephContext* cct, const pg_log_t &other, eversion_t v)
4683 {
4684 can_rollback_to = other.can_rollback_to;
4685 head = other.head;
4686 tail = other.tail;
4687 lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v << dendl;
4688 for (list<pg_log_entry_t>::const_reverse_iterator i = other.log.rbegin();
4689 i != other.log.rend();
4690 ++i) {
4691 ceph_assert(i->version > other.tail);
4692 if (i->version <= v) {
4693 // make tail accurate.
4694 tail = i->version;
4695 break;
4696 }
4697 lgeneric_subdout(cct, osd, 20) << __func__ << " copy log version " << i->version << dendl;
4698 log.push_front(*i);
4699 }
4700 _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked);
4701 }
4702
4703 void pg_log_t::copy_up_to(CephContext* cct, const pg_log_t &other, int max)
4704 {
4705 can_rollback_to = other.can_rollback_to;
4706 int n = 0;
4707 head = other.head;
4708 tail = other.tail;
4709 lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max << dendl;
4710 for (list<pg_log_entry_t>::const_reverse_iterator i = other.log.rbegin();
4711 i != other.log.rend();
4712 ++i) {
4713 ceph_assert(i->version > other.tail);
4714 if (n++ >= max) {
4715 tail = i->version;
4716 break;
4717 }
4718 lgeneric_subdout(cct, osd, 20) << __func__ << " copy log version " << i->version << dendl;
4719 log.push_front(*i);
4720 }
4721 _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked);
4722 }
4723
4724 ostream& pg_log_t::print(ostream& out) const
4725 {
4726 out << *this << std::endl;
4727 for (list<pg_log_entry_t>::const_iterator p = log.begin();
4728 p != log.end();
4729 ++p)
4730 out << *p << std::endl;
4731 for (const auto& entry : dups) {
4732 out << " dup entry: " << entry << std::endl;
4733 }
4734 return out;
4735 }
4736
4737 // -- pg_missing_t --
4738
4739 ostream& operator<<(ostream& out, const pg_missing_item& i)
4740 {
4741 out << i.need;
4742 if (i.have != eversion_t())
4743 out << "(" << i.have << ")";
4744 out << " flags = " << i.flag_str();
4745 return out;
4746 }
4747
4748 // -- object_copy_cursor_t --
4749
4750 void object_copy_cursor_t::encode(bufferlist& bl) const
4751 {
4752 ENCODE_START(1, 1, bl);
4753 encode(attr_complete, bl);
4754 encode(data_offset, bl);
4755 encode(data_complete, bl);
4756 encode(omap_offset, bl);
4757 encode(omap_complete, bl);
4758 ENCODE_FINISH(bl);
4759 }
4760
4761 void object_copy_cursor_t::decode(bufferlist::const_iterator &bl)
4762 {
4763 DECODE_START(1, bl);
4764 decode(attr_complete, bl);
4765 decode(data_offset, bl);
4766 decode(data_complete, bl);
4767 decode(omap_offset, bl);
4768 decode(omap_complete, bl);
4769 DECODE_FINISH(bl);
4770 }
4771
4772 void object_copy_cursor_t::dump(Formatter *f) const
4773 {
4774 f->dump_unsigned("attr_complete", (int)attr_complete);
4775 f->dump_unsigned("data_offset", data_offset);
4776 f->dump_unsigned("data_complete", (int)data_complete);
4777 f->dump_string("omap_offset", omap_offset);
4778 f->dump_unsigned("omap_complete", (int)omap_complete);
4779 }
4780
4781 void object_copy_cursor_t::generate_test_instances(list<object_copy_cursor_t*>& o)
4782 {
4783 o.push_back(new object_copy_cursor_t);
4784 o.push_back(new object_copy_cursor_t);
4785 o.back()->attr_complete = true;
4786 o.back()->data_offset = 123;
4787 o.push_back(new object_copy_cursor_t);
4788 o.back()->attr_complete = true;
4789 o.back()->data_complete = true;
4790 o.back()->omap_offset = "foo";
4791 o.push_back(new object_copy_cursor_t);
4792 o.back()->attr_complete = true;
4793 o.back()->data_complete = true;
4794 o.back()->omap_complete = true;
4795 }
4796
4797 // -- object_copy_data_t --
4798
4799 void object_copy_data_t::encode(bufferlist& bl, uint64_t features) const
4800 {
4801 ENCODE_START(8, 5, bl);
4802 encode(size, bl);
4803 encode(mtime, bl);
4804 encode(attrs, bl);
4805 encode(data, bl);
4806 encode(omap_data, bl);
4807 encode(cursor, bl);
4808 encode(omap_header, bl);
4809 encode(snaps, bl);
4810 encode(snap_seq, bl);
4811 encode(flags, bl);
4812 encode(data_digest, bl);
4813 encode(omap_digest, bl);
4814 encode(reqids, bl);
4815 encode(truncate_seq, bl);
4816 encode(truncate_size, bl);
4817 encode(reqid_return_codes, bl);
4818 ENCODE_FINISH(bl);
4819 }
4820
4821 void object_copy_data_t::decode(bufferlist::const_iterator& bl)
4822 {
4823 DECODE_START(7, bl);
4824 if (struct_v < 5) {
4825 // old
4826 decode(size, bl);
4827 decode(mtime, bl);
4828 {
4829 string category;
4830 decode(category, bl); // no longer used
4831 }
4832 decode(attrs, bl);
4833 decode(data, bl);
4834 {
4835 map<string,bufferlist> omap;
4836 decode(omap, bl);
4837 omap_data.clear();
4838 if (!omap.empty()) {
4839 using ceph::encode;
4840 encode(omap, omap_data);
4841 }
4842 }
4843 decode(cursor, bl);
4844 if (struct_v >= 2)
4845 decode(omap_header, bl);
4846 if (struct_v >= 3) {
4847 decode(snaps, bl);
4848 decode(snap_seq, bl);
4849 } else {
4850 snaps.clear();
4851 snap_seq = 0;
4852 }
4853 if (struct_v >= 4) {
4854 decode(flags, bl);
4855 decode(data_digest, bl);
4856 decode(omap_digest, bl);
4857 }
4858 } else {
4859 // current
4860 decode(size, bl);
4861 decode(mtime, bl);
4862 decode(attrs, bl);
4863 decode(data, bl);
4864 decode(omap_data, bl);
4865 decode(cursor, bl);
4866 decode(omap_header, bl);
4867 decode(snaps, bl);
4868 decode(snap_seq, bl);
4869 if (struct_v >= 4) {
4870 decode(flags, bl);
4871 decode(data_digest, bl);
4872 decode(omap_digest, bl);
4873 }
4874 if (struct_v >= 6) {
4875 decode(reqids, bl);
4876 }
4877 if (struct_v >= 7) {
4878 decode(truncate_seq, bl);
4879 decode(truncate_size, bl);
4880 }
4881 if (struct_v >= 8) {
4882 decode(reqid_return_codes, bl);
4883 }
4884 }
4885 DECODE_FINISH(bl);
4886 }
4887
4888 void object_copy_data_t::generate_test_instances(list<object_copy_data_t*>& o)
4889 {
4890 o.push_back(new object_copy_data_t());
4891
4892 list<object_copy_cursor_t*> cursors;
4893 object_copy_cursor_t::generate_test_instances(cursors);
4894 list<object_copy_cursor_t*>::iterator ci = cursors.begin();
4895 o.back()->cursor = **(ci++);
4896
4897 o.push_back(new object_copy_data_t());
4898 o.back()->cursor = **(ci++);
4899
4900 o.push_back(new object_copy_data_t());
4901 o.back()->size = 1234;
4902 o.back()->mtime.set_from_double(1234);
4903 bufferptr bp("there", 5);
4904 bufferlist bl;
4905 bl.push_back(bp);
4906 o.back()->attrs["hello"] = bl;
4907 bufferptr bp2("not", 3);
4908 bufferlist bl2;
4909 bl2.push_back(bp2);
4910 map<string,bufferlist> omap;
4911 omap["why"] = bl2;
4912 using ceph::encode;
4913 encode(omap, o.back()->omap_data);
4914 bufferptr databp("iamsomedatatocontain", 20);
4915 o.back()->data.push_back(databp);
4916 o.back()->omap_header.append("this is an omap header");
4917 o.back()->snaps.push_back(123);
4918 o.back()->reqids.push_back(make_pair(osd_reqid_t(), version_t()));
4919 }
4920
4921 void object_copy_data_t::dump(Formatter *f) const
4922 {
4923 f->open_object_section("cursor");
4924 cursor.dump(f);
4925 f->close_section(); // cursor
4926 f->dump_int("size", size);
4927 f->dump_stream("mtime") << mtime;
4928 /* we should really print out the attrs here, but bufferlist
4929 const-correctness prevents that */
4930 f->dump_int("attrs_size", attrs.size());
4931 f->dump_int("flags", flags);
4932 f->dump_unsigned("data_digest", data_digest);
4933 f->dump_unsigned("omap_digest", omap_digest);
4934 f->dump_int("omap_data_length", omap_data.length());
4935 f->dump_int("omap_header_length", omap_header.length());
4936 f->dump_int("data_length", data.length());
4937 f->open_array_section("snaps");
4938 for (vector<snapid_t>::const_iterator p = snaps.begin();
4939 p != snaps.end(); ++p)
4940 f->dump_unsigned("snap", *p);
4941 f->close_section();
4942 f->open_array_section("reqids");
4943 uint32_t idx = 0;
4944 for (auto p = reqids.begin();
4945 p != reqids.end();
4946 ++idx, ++p) {
4947 f->open_object_section("extra_reqid");
4948 f->dump_stream("reqid") << p->first;
4949 f->dump_stream("user_version") << p->second;
4950 auto it = reqid_return_codes.find(idx);
4951 if (it != reqid_return_codes.end()) {
4952 f->dump_int("return_code", it->second);
4953 }
4954 f->close_section();
4955 }
4956 f->close_section();
4957 }
4958
4959 // -- pg_create_t --
4960
4961 void pg_create_t::encode(bufferlist &bl) const
4962 {
4963 ENCODE_START(1, 1, bl);
4964 encode(created, bl);
4965 encode(parent, bl);
4966 encode(split_bits, bl);
4967 ENCODE_FINISH(bl);
4968 }
4969
4970 void pg_create_t::decode(bufferlist::const_iterator &bl)
4971 {
4972 DECODE_START(1, bl);
4973 decode(created, bl);
4974 decode(parent, bl);
4975 decode(split_bits, bl);
4976 DECODE_FINISH(bl);
4977 }
4978
4979 void pg_create_t::dump(Formatter *f) const
4980 {
4981 f->dump_unsigned("created", created);
4982 f->dump_stream("parent") << parent;
4983 f->dump_int("split_bits", split_bits);
4984 }
4985
4986 void pg_create_t::generate_test_instances(list<pg_create_t*>& o)
4987 {
4988 o.push_back(new pg_create_t);
4989 o.push_back(new pg_create_t(1, pg_t(3, 4), 2));
4990 }
4991
4992
4993 // -- pg_hit_set_info_t --
4994
4995 void pg_hit_set_info_t::encode(bufferlist& bl) const
4996 {
4997 ENCODE_START(2, 1, bl);
4998 encode(begin, bl);
4999 encode(end, bl);
5000 encode(version, bl);
5001 encode(using_gmt, bl);
5002 ENCODE_FINISH(bl);
5003 }
5004
5005 void pg_hit_set_info_t::decode(bufferlist::const_iterator& p)
5006 {
5007 DECODE_START(2, p);
5008 decode(begin, p);
5009 decode(end, p);
5010 decode(version, p);
5011 if (struct_v >= 2) {
5012 decode(using_gmt, p);
5013 } else {
5014 using_gmt = false;
5015 }
5016 DECODE_FINISH(p);
5017 }
5018
5019 void pg_hit_set_info_t::dump(Formatter *f) const
5020 {
5021 f->dump_stream("begin") << begin;
5022 f->dump_stream("end") << end;
5023 f->dump_stream("version") << version;
5024 f->dump_stream("using_gmt") << using_gmt;
5025 }
5026
5027 void pg_hit_set_info_t::generate_test_instances(list<pg_hit_set_info_t*>& ls)
5028 {
5029 ls.push_back(new pg_hit_set_info_t);
5030 ls.push_back(new pg_hit_set_info_t);
5031 ls.back()->begin = utime_t(1, 2);
5032 ls.back()->end = utime_t(3, 4);
5033 }
5034
5035
5036 // -- pg_hit_set_history_t --
5037
5038 void pg_hit_set_history_t::encode(bufferlist& bl) const
5039 {
5040 ENCODE_START(1, 1, bl);
5041 encode(current_last_update, bl);
5042 {
5043 utime_t dummy_stamp;
5044 encode(dummy_stamp, bl);
5045 }
5046 {
5047 pg_hit_set_info_t dummy_info;
5048 encode(dummy_info, bl);
5049 }
5050 encode(history, bl);
5051 ENCODE_FINISH(bl);
5052 }
5053
5054 void pg_hit_set_history_t::decode(bufferlist::const_iterator& p)
5055 {
5056 DECODE_START(1, p);
5057 decode(current_last_update, p);
5058 {
5059 utime_t dummy_stamp;
5060 decode(dummy_stamp, p);
5061 }
5062 {
5063 pg_hit_set_info_t dummy_info;
5064 decode(dummy_info, p);
5065 }
5066 decode(history, p);
5067 DECODE_FINISH(p);
5068 }
5069
5070 void pg_hit_set_history_t::dump(Formatter *f) const
5071 {
5072 f->dump_stream("current_last_update") << current_last_update;
5073 f->open_array_section("history");
5074 for (list<pg_hit_set_info_t>::const_iterator p = history.begin();
5075 p != history.end(); ++p) {
5076 f->open_object_section("info");
5077 p->dump(f);
5078 f->close_section();
5079 }
5080 f->close_section();
5081 }
5082
5083 void pg_hit_set_history_t::generate_test_instances(list<pg_hit_set_history_t*>& ls)
5084 {
5085 ls.push_back(new pg_hit_set_history_t);
5086 ls.push_back(new pg_hit_set_history_t);
5087 ls.back()->current_last_update = eversion_t(1, 2);
5088 ls.back()->history.push_back(pg_hit_set_info_t());
5089 }
5090
5091 // -- OSDSuperblock --
5092
5093 void OSDSuperblock::encode(bufferlist &bl) const
5094 {
5095 ENCODE_START(8, 5, bl);
5096 encode(cluster_fsid, bl);
5097 encode(whoami, bl);
5098 encode(current_epoch, bl);
5099 encode(oldest_map, bl);
5100 encode(newest_map, bl);
5101 encode(weight, bl);
5102 compat_features.encode(bl);
5103 encode(clean_thru, bl);
5104 encode(mounted, bl);
5105 encode(osd_fsid, bl);
5106 encode((epoch_t)0, bl); // epoch_t last_epoch_marked_full
5107 encode((uint32_t)0, bl); // map<int64_t,epoch_t> pool_last_epoch_marked_full
5108 ENCODE_FINISH(bl);
5109 }
5110
5111 void OSDSuperblock::decode(bufferlist::const_iterator &bl)
5112 {
5113 DECODE_START_LEGACY_COMPAT_LEN(8, 5, 5, bl);
5114 if (struct_v < 3) {
5115 string magic;
5116 decode(magic, bl);
5117 }
5118 decode(cluster_fsid, bl);
5119 decode(whoami, bl);
5120 decode(current_epoch, bl);
5121 decode(oldest_map, bl);
5122 decode(newest_map, bl);
5123 decode(weight, bl);
5124 if (struct_v >= 2) {
5125 compat_features.decode(bl);
5126 } else { //upgrade it!
5127 compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
5128 }
5129 decode(clean_thru, bl);
5130 decode(mounted, bl);
5131 if (struct_v >= 4)
5132 decode(osd_fsid, bl);
5133 if (struct_v >= 6) {
5134 epoch_t last_map_marked_full;
5135 decode(last_map_marked_full, bl);
5136 }
5137 if (struct_v >= 7) {
5138 map<int64_t,epoch_t> pool_last_map_marked_full;
5139 decode(pool_last_map_marked_full, bl);
5140 }
5141 DECODE_FINISH(bl);
5142 }
5143
5144 void OSDSuperblock::dump(Formatter *f) const
5145 {
5146 f->dump_stream("cluster_fsid") << cluster_fsid;
5147 f->dump_stream("osd_fsid") << osd_fsid;
5148 f->dump_int("whoami", whoami);
5149 f->dump_int("current_epoch", current_epoch);
5150 f->dump_int("oldest_map", oldest_map);
5151 f->dump_int("newest_map", newest_map);
5152 f->dump_float("weight", weight);
5153 f->open_object_section("compat");
5154 compat_features.dump(f);
5155 f->close_section();
5156 f->dump_int("clean_thru", clean_thru);
5157 f->dump_int("last_epoch_mounted", mounted);
5158 }
5159
5160 void OSDSuperblock::generate_test_instances(list<OSDSuperblock*>& o)
5161 {
5162 OSDSuperblock z;
5163 o.push_back(new OSDSuperblock(z));
5164 z.cluster_fsid.parse("01010101-0101-0101-0101-010101010101");
5165 z.osd_fsid.parse("02020202-0202-0202-0202-020202020202");
5166 z.whoami = 3;
5167 z.current_epoch = 4;
5168 z.oldest_map = 5;
5169 z.newest_map = 9;
5170 z.mounted = 8;
5171 z.clean_thru = 7;
5172 o.push_back(new OSDSuperblock(z));
5173 o.push_back(new OSDSuperblock(z));
5174 }
5175
5176 // -- SnapSet --
5177
5178 void SnapSet::encode(bufferlist& bl) const
5179 {
5180 ENCODE_START(3, 2, bl);
5181 encode(seq, bl);
5182 encode(true, bl); // head_exists
5183 encode(snaps, bl);
5184 encode(clones, bl);
5185 encode(clone_overlap, bl);
5186 encode(clone_size, bl);
5187 encode(clone_snaps, bl);
5188 ENCODE_FINISH(bl);
5189 }
5190
5191 void SnapSet::decode(bufferlist::const_iterator& bl)
5192 {
5193 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
5194 decode(seq, bl);
5195 bl.advance(1u); // skip legacy head_exists (always true)
5196 decode(snaps, bl);
5197 decode(clones, bl);
5198 decode(clone_overlap, bl);
5199 decode(clone_size, bl);
5200 if (struct_v >= 3) {
5201 decode(clone_snaps, bl);
5202 } else {
5203 clone_snaps.clear();
5204 }
5205 DECODE_FINISH(bl);
5206 }
5207
5208 void SnapSet::dump(Formatter *f) const
5209 {
5210 SnapContext sc(seq, snaps);
5211 f->open_object_section("snap_context");
5212 sc.dump(f);
5213 f->close_section();
5214 f->open_array_section("clones");
5215 for (vector<snapid_t>::const_iterator p = clones.begin(); p != clones.end(); ++p) {
5216 f->open_object_section("clone");
5217 f->dump_unsigned("snap", *p);
5218 auto cs = clone_size.find(*p);
5219 if (cs != clone_size.end())
5220 f->dump_unsigned("size", cs->second);
5221 else
5222 f->dump_string("size", "????");
5223 auto co = clone_overlap.find(*p);
5224 if (co != clone_overlap.end())
5225 f->dump_stream("overlap") << co->second;
5226 else
5227 f->dump_stream("overlap") << "????";
5228 auto q = clone_snaps.find(*p);
5229 if (q != clone_snaps.end()) {
5230 f->open_array_section("snaps");
5231 for (auto s : q->second) {
5232 f->dump_unsigned("snap", s);
5233 }
5234 f->close_section();
5235 }
5236 f->close_section();
5237 }
5238 f->close_section();
5239 }
5240
5241 void SnapSet::generate_test_instances(list<SnapSet*>& o)
5242 {
5243 o.push_back(new SnapSet);
5244 o.push_back(new SnapSet);
5245 o.back()->seq = 123;
5246 o.back()->snaps.push_back(123);
5247 o.back()->snaps.push_back(12);
5248 o.push_back(new SnapSet);
5249 o.back()->seq = 123;
5250 o.back()->snaps.push_back(123);
5251 o.back()->snaps.push_back(12);
5252 o.back()->clones.push_back(12);
5253 o.back()->clone_size[12] = 12345;
5254 o.back()->clone_overlap[12];
5255 o.back()->clone_snaps[12] = {12, 10, 8};
5256 }
5257
5258 ostream& operator<<(ostream& out, const SnapSet& cs)
5259 {
5260 return out << cs.seq << "=" << cs.snaps << ":"
5261 << cs.clone_snaps;
5262 }
5263
5264 void SnapSet::from_snap_set(const librados::snap_set_t& ss, bool legacy)
5265 {
5266 // NOTE: our reconstruction of snaps (and the snapc) is not strictly
5267 // correct: it will not include snaps that still logically exist
5268 // but for which there was no clone that is defined. For all
5269 // practical purposes this doesn't matter, since we only use that
5270 // information to clone on the OSD, and we have already moved
5271 // forward past that part of the object history.
5272
5273 seq = ss.seq;
5274 set<snapid_t> _snaps;
5275 set<snapid_t> _clones;
5276 for (vector<librados::clone_info_t>::const_iterator p = ss.clones.begin();
5277 p != ss.clones.end();
5278 ++p) {
5279 if (p->cloneid != librados::SNAP_HEAD) {
5280 _clones.insert(p->cloneid);
5281 _snaps.insert(p->snaps.begin(), p->snaps.end());
5282 clone_size[p->cloneid] = p->size;
5283 clone_overlap[p->cloneid]; // the entry must exist, even if it's empty.
5284 for (vector<pair<uint64_t, uint64_t> >::const_iterator q =
5285 p->overlap.begin(); q != p->overlap.end(); ++q)
5286 clone_overlap[p->cloneid].insert(q->first, q->second);
5287 if (!legacy) {
5288 // p->snaps is ascending; clone_snaps is descending
5289 vector<snapid_t>& v = clone_snaps[p->cloneid];
5290 for (auto q = p->snaps.rbegin(); q != p->snaps.rend(); ++q) {
5291 v.push_back(*q);
5292 }
5293 }
5294 }
5295 }
5296
5297 // ascending
5298 clones.clear();
5299 clones.reserve(_clones.size());
5300 for (set<snapid_t>::iterator p = _clones.begin(); p != _clones.end(); ++p)
5301 clones.push_back(*p);
5302
5303 // descending
5304 snaps.clear();
5305 snaps.reserve(_snaps.size());
5306 for (set<snapid_t>::reverse_iterator p = _snaps.rbegin();
5307 p != _snaps.rend(); ++p)
5308 snaps.push_back(*p);
5309 }
5310
5311 uint64_t SnapSet::get_clone_bytes(snapid_t clone) const
5312 {
5313 ceph_assert(clone_size.count(clone));
5314 uint64_t size = clone_size.find(clone)->second;
5315 ceph_assert(clone_overlap.count(clone));
5316 const interval_set<uint64_t> &overlap = clone_overlap.find(clone)->second;
5317 ceph_assert(size >= (uint64_t)overlap.size());
5318 return size - overlap.size();
5319 }
5320
5321 void SnapSet::filter(const pg_pool_t &pinfo)
5322 {
5323 vector<snapid_t> oldsnaps;
5324 oldsnaps.swap(snaps);
5325 for (vector<snapid_t>::const_iterator i = oldsnaps.begin();
5326 i != oldsnaps.end();
5327 ++i) {
5328 if (!pinfo.is_removed_snap(*i))
5329 snaps.push_back(*i);
5330 }
5331 }
5332
5333 SnapSet SnapSet::get_filtered(const pg_pool_t &pinfo) const
5334 {
5335 SnapSet ss = *this;
5336 ss.filter(pinfo);
5337 return ss;
5338 }
5339
5340 // -- watch_info_t --
5341
5342 void watch_info_t::encode(bufferlist& bl, uint64_t features) const
5343 {
5344 ENCODE_START(4, 3, bl);
5345 encode(cookie, bl);
5346 encode(timeout_seconds, bl);
5347 encode(addr, bl, features);
5348 ENCODE_FINISH(bl);
5349 }
5350
5351 void watch_info_t::decode(bufferlist::const_iterator& bl)
5352 {
5353 DECODE_START_LEGACY_COMPAT_LEN(4, 3, 3, bl);
5354 decode(cookie, bl);
5355 if (struct_v < 2) {
5356 uint64_t ver;
5357 decode(ver, bl);
5358 }
5359 decode(timeout_seconds, bl);
5360 if (struct_v >= 4) {
5361 decode(addr, bl);
5362 }
5363 DECODE_FINISH(bl);
5364 }
5365
5366 void watch_info_t::dump(Formatter *f) const
5367 {
5368 f->dump_unsigned("cookie", cookie);
5369 f->dump_unsigned("timeout_seconds", timeout_seconds);
5370 f->open_object_section("addr");
5371 addr.dump(f);
5372 f->close_section();
5373 }
5374
5375 void watch_info_t::generate_test_instances(list<watch_info_t*>& o)
5376 {
5377 o.push_back(new watch_info_t);
5378 o.push_back(new watch_info_t);
5379 o.back()->cookie = 123;
5380 o.back()->timeout_seconds = 99;
5381 entity_addr_t ea;
5382 ea.set_type(entity_addr_t::TYPE_LEGACY);
5383 ea.set_nonce(1);
5384 ea.set_family(AF_INET);
5385 ea.set_in4_quad(0, 127);
5386 ea.set_in4_quad(1, 0);
5387 ea.set_in4_quad(2, 1);
5388 ea.set_in4_quad(3, 2);
5389 ea.set_port(2);
5390 o.back()->addr = ea;
5391 }
5392
5393 // -- chunk_info_t --
5394
5395 void chunk_info_t::encode(bufferlist& bl) const
5396 {
5397 ENCODE_START(1, 1, bl);
5398 encode(offset, bl);
5399 encode(length, bl);
5400 encode(oid, bl);
5401 __u32 _flags = flags;
5402 encode(_flags, bl);
5403 ENCODE_FINISH(bl);
5404 }
5405
5406 void chunk_info_t::decode(bufferlist::const_iterator& bl)
5407 {
5408 DECODE_START(1, bl);
5409 decode(offset, bl);
5410 decode(length, bl);
5411 decode(oid, bl);
5412 __u32 _flags;
5413 decode(_flags, bl);
5414 flags = (cflag_t)_flags;
5415 DECODE_FINISH(bl);
5416 }
5417
5418 void chunk_info_t::dump(Formatter *f) const
5419 {
5420 f->dump_unsigned("length", length);
5421 f->open_object_section("oid");
5422 oid.dump(f);
5423 f->close_section();
5424 f->dump_unsigned("flags", flags);
5425 }
5426
5427 ostream& operator<<(ostream& out, const chunk_info_t& ci)
5428 {
5429 return out << "(len: " << ci.length << " oid: " << ci.oid
5430 << " offset: " << ci.offset
5431 << " flags: " << ci.get_flag_string(ci.flags) << ")";
5432 }
5433
5434 // -- object_manifest_t --
5435
5436 void object_manifest_t::encode(bufferlist& bl) const
5437 {
5438 ENCODE_START(1, 1, bl);
5439 encode(type, bl);
5440 switch (type) {
5441 case TYPE_NONE: break;
5442 case TYPE_REDIRECT:
5443 encode(redirect_target, bl);
5444 break;
5445 case TYPE_CHUNKED:
5446 encode(chunk_map, bl);
5447 break;
5448 default:
5449 ceph_abort();
5450 }
5451 ENCODE_FINISH(bl);
5452 }
5453
5454 void object_manifest_t::decode(bufferlist::const_iterator& bl)
5455 {
5456 DECODE_START(1, bl);
5457 decode(type, bl);
5458 switch (type) {
5459 case TYPE_NONE: break;
5460 case TYPE_REDIRECT:
5461 decode(redirect_target, bl);
5462 break;
5463 case TYPE_CHUNKED:
5464 decode(chunk_map, bl);
5465 break;
5466 default:
5467 ceph_abort();
5468 }
5469 DECODE_FINISH(bl);
5470 }
5471
5472 void object_manifest_t::dump(Formatter *f) const
5473 {
5474 f->dump_unsigned("type", type);
5475 if (type == TYPE_REDIRECT) {
5476 f->open_object_section("redirect_target");
5477 redirect_target.dump(f);
5478 f->close_section();
5479 } else if (type == TYPE_CHUNKED) {
5480 f->open_array_section("chunk_map");
5481 for (auto& p : chunk_map) {
5482 f->open_object_section("chunk");
5483 f->dump_unsigned("offset", p.first);
5484 p.second.dump(f);
5485 f->close_section();
5486 }
5487 f->close_section();
5488 }
5489 }
5490
5491 void object_manifest_t::generate_test_instances(list<object_manifest_t*>& o)
5492 {
5493 o.push_back(new object_manifest_t());
5494 o.back()->type = TYPE_REDIRECT;
5495 }
5496
5497 ostream& operator<<(ostream& out, const object_manifest_t& om)
5498 {
5499 out << "manifest(" << om.get_type_name();
5500 if (om.is_redirect()) {
5501 out << " " << om.redirect_target;
5502 } else if (om.is_chunked()) {
5503 out << " " << om.chunk_map;
5504 }
5505 out << ")";
5506 return out;
5507 }
5508
5509 // -- object_info_t --
5510
5511 void object_info_t::copy_user_bits(const object_info_t& other)
5512 {
5513 // these bits are copied from head->clone.
5514 size = other.size;
5515 mtime = other.mtime;
5516 local_mtime = other.local_mtime;
5517 last_reqid = other.last_reqid;
5518 truncate_seq = other.truncate_seq;
5519 truncate_size = other.truncate_size;
5520 flags = other.flags;
5521 user_version = other.user_version;
5522 data_digest = other.data_digest;
5523 omap_digest = other.omap_digest;
5524 }
5525
5526 void object_info_t::encode(bufferlist& bl, uint64_t features) const
5527 {
5528 object_locator_t myoloc(soid);
5529 map<entity_name_t, watch_info_t> old_watchers;
5530 for (map<pair<uint64_t, entity_name_t>, watch_info_t>::const_iterator i =
5531 watchers.begin();
5532 i != watchers.end();
5533 ++i) {
5534 old_watchers.insert(make_pair(i->first.second, i->second));
5535 }
5536 ENCODE_START(17, 8, bl);
5537 encode(soid, bl);
5538 encode(myoloc, bl); //Retained for compatibility
5539 encode((__u32)0, bl); // was category, no longer used
5540 encode(version, bl);
5541 encode(prior_version, bl);
5542 encode(last_reqid, bl);
5543 encode(size, bl);
5544 encode(mtime, bl);
5545 if (soid.snap == CEPH_NOSNAP)
5546 encode(osd_reqid_t(), bl); // used to be wrlock_by
5547 else
5548 encode((uint32_t)0, bl); // was legacy_snaps
5549 encode(truncate_seq, bl);
5550 encode(truncate_size, bl);
5551 encode(is_lost(), bl);
5552 encode(old_watchers, bl, features);
5553 /* shenanigans to avoid breaking backwards compatibility in the disk format.
5554 * When we can, switch this out for simply putting the version_t on disk. */
5555 eversion_t user_eversion(0, user_version);
5556 encode(user_eversion, bl);
5557 encode(test_flag(FLAG_USES_TMAP), bl);
5558 encode(watchers, bl, features);
5559 __u32 _flags = flags;
5560 encode(_flags, bl);
5561 encode(local_mtime, bl);
5562 encode(data_digest, bl);
5563 encode(omap_digest, bl);
5564 encode(expected_object_size, bl);
5565 encode(expected_write_size, bl);
5566 encode(alloc_hint_flags, bl);
5567 if (has_manifest()) {
5568 encode(manifest, bl);
5569 }
5570 ENCODE_FINISH(bl);
5571 }
5572
5573 void object_info_t::decode(bufferlist::const_iterator& bl)
5574 {
5575 object_locator_t myoloc;
5576 DECODE_START_LEGACY_COMPAT_LEN(17, 8, 8, bl);
5577 map<entity_name_t, watch_info_t> old_watchers;
5578 decode(soid, bl);
5579 decode(myoloc, bl);
5580 {
5581 string category;
5582 decode(category, bl); // no longer used
5583 }
5584 decode(version, bl);
5585 decode(prior_version, bl);
5586 decode(last_reqid, bl);
5587 decode(size, bl);
5588 decode(mtime, bl);
5589 if (soid.snap == CEPH_NOSNAP) {
5590 osd_reqid_t wrlock_by;
5591 decode(wrlock_by, bl);
5592 } else {
5593 vector<snapid_t> legacy_snaps;
5594 decode(legacy_snaps, bl);
5595 }
5596 decode(truncate_seq, bl);
5597 decode(truncate_size, bl);
5598
5599 // if this is struct_v >= 13, we will overwrite this
5600 // below since this field is just here for backwards
5601 // compatibility
5602 __u8 lo;
5603 decode(lo, bl);
5604 flags = (flag_t)lo;
5605
5606 decode(old_watchers, bl);
5607 eversion_t user_eversion;
5608 decode(user_eversion, bl);
5609 user_version = user_eversion.version;
5610
5611 if (struct_v >= 9) {
5612 bool uses_tmap = false;
5613 decode(uses_tmap, bl);
5614 if (uses_tmap)
5615 set_flag(FLAG_USES_TMAP);
5616 } else {
5617 set_flag(FLAG_USES_TMAP);
5618 }
5619 if (struct_v < 10)
5620 soid.pool = myoloc.pool;
5621 if (struct_v >= 11) {
5622 decode(watchers, bl);
5623 } else {
5624 for (map<entity_name_t, watch_info_t>::iterator i = old_watchers.begin();
5625 i != old_watchers.end();
5626 ++i) {
5627 watchers.insert(
5628 make_pair(
5629 make_pair(i->second.cookie, i->first), i->second));
5630 }
5631 }
5632 if (struct_v >= 13) {
5633 __u32 _flags;
5634 decode(_flags, bl);
5635 flags = (flag_t)_flags;
5636 }
5637 if (struct_v >= 14) {
5638 decode(local_mtime, bl);
5639 } else {
5640 local_mtime = utime_t();
5641 }
5642 if (struct_v >= 15) {
5643 decode(data_digest, bl);
5644 decode(omap_digest, bl);
5645 } else {
5646 data_digest = omap_digest = -1;
5647 clear_flag(FLAG_DATA_DIGEST);
5648 clear_flag(FLAG_OMAP_DIGEST);
5649 }
5650 if (struct_v >= 16) {
5651 decode(expected_object_size, bl);
5652 decode(expected_write_size, bl);
5653 decode(alloc_hint_flags, bl);
5654 } else {
5655 expected_object_size = 0;
5656 expected_write_size = 0;
5657 alloc_hint_flags = 0;
5658 }
5659 if (struct_v >= 17) {
5660 if (has_manifest()) {
5661 decode(manifest, bl);
5662 }
5663 }
5664 DECODE_FINISH(bl);
5665 }
5666
5667 void object_info_t::dump(Formatter *f) const
5668 {
5669 f->open_object_section("oid");
5670 soid.dump(f);
5671 f->close_section();
5672 f->dump_stream("version") << version;
5673 f->dump_stream("prior_version") << prior_version;
5674 f->dump_stream("last_reqid") << last_reqid;
5675 f->dump_unsigned("user_version", user_version);
5676 f->dump_unsigned("size", size);
5677 f->dump_stream("mtime") << mtime;
5678 f->dump_stream("local_mtime") << local_mtime;
5679 f->dump_unsigned("lost", (int)is_lost());
5680 vector<string> sv = get_flag_vector(flags);
5681 f->open_array_section("flags");
5682 for (auto str: sv)
5683 f->dump_string("flags", str);
5684 f->close_section();
5685 f->dump_unsigned("truncate_seq", truncate_seq);
5686 f->dump_unsigned("truncate_size", truncate_size);
5687 f->dump_format("data_digest", "0x%08x", data_digest);
5688 f->dump_format("omap_digest", "0x%08x", omap_digest);
5689 f->dump_unsigned("expected_object_size", expected_object_size);
5690 f->dump_unsigned("expected_write_size", expected_write_size);
5691 f->dump_unsigned("alloc_hint_flags", alloc_hint_flags);
5692 f->dump_object("manifest", manifest);
5693 f->open_object_section("watchers");
5694 for (map<pair<uint64_t, entity_name_t>,watch_info_t>::const_iterator p =
5695 watchers.begin(); p != watchers.end(); ++p) {
5696 stringstream ss;
5697 ss << p->first.second;
5698 f->open_object_section(ss.str().c_str());
5699 p->second.dump(f);
5700 f->close_section();
5701 }
5702 f->close_section();
5703 }
5704
5705 void object_info_t::generate_test_instances(list<object_info_t*>& o)
5706 {
5707 o.push_back(new object_info_t());
5708
5709 // fixme
5710 }
5711
5712
5713 ostream& operator<<(ostream& out, const object_info_t& oi)
5714 {
5715 out << oi.soid << "(" << oi.version
5716 << " " << oi.last_reqid;
5717 if (oi.flags)
5718 out << " " << oi.get_flag_string();
5719 out << " s " << oi.size;
5720 out << " uv " << oi.user_version;
5721 if (oi.is_data_digest())
5722 out << " dd " << std::hex << oi.data_digest << std::dec;
5723 if (oi.is_omap_digest())
5724 out << " od " << std::hex << oi.omap_digest << std::dec;
5725 out << " alloc_hint [" << oi.expected_object_size
5726 << " " << oi.expected_write_size
5727 << " " << oi.alloc_hint_flags << "]";
5728 if (oi.has_manifest())
5729 out << " " << oi.manifest;
5730 out << ")";
5731 return out;
5732 }
5733
5734 // -- ObjectRecovery --
5735 void ObjectRecoveryProgress::encode(bufferlist &bl) const
5736 {
5737 ENCODE_START(1, 1, bl);
5738 encode(first, bl);
5739 encode(data_complete, bl);
5740 encode(data_recovered_to, bl);
5741 encode(omap_recovered_to, bl);
5742 encode(omap_complete, bl);
5743 ENCODE_FINISH(bl);
5744 }
5745
5746 void ObjectRecoveryProgress::decode(bufferlist::const_iterator &bl)
5747 {
5748 DECODE_START(1, bl);
5749 decode(first, bl);
5750 decode(data_complete, bl);
5751 decode(data_recovered_to, bl);
5752 decode(omap_recovered_to, bl);
5753 decode(omap_complete, bl);
5754 DECODE_FINISH(bl);
5755 }
5756
5757 ostream &operator<<(ostream &out, const ObjectRecoveryProgress &prog)
5758 {
5759 return prog.print(out);
5760 }
5761
5762 void ObjectRecoveryProgress::generate_test_instances(
5763 list<ObjectRecoveryProgress*>& o)
5764 {
5765 o.push_back(new ObjectRecoveryProgress);
5766 o.back()->first = false;
5767 o.back()->data_complete = true;
5768 o.back()->omap_complete = true;
5769 o.back()->data_recovered_to = 100;
5770
5771 o.push_back(new ObjectRecoveryProgress);
5772 o.back()->first = true;
5773 o.back()->data_complete = false;
5774 o.back()->omap_complete = false;
5775 o.back()->data_recovered_to = 0;
5776 }
5777
5778 ostream &ObjectRecoveryProgress::print(ostream &out) const
5779 {
5780 return out << "ObjectRecoveryProgress("
5781 << ( first ? "" : "!" ) << "first, "
5782 << "data_recovered_to:" << data_recovered_to
5783 << ", data_complete:" << ( data_complete ? "true" : "false" )
5784 << ", omap_recovered_to:" << omap_recovered_to
5785 << ", omap_complete:" << ( omap_complete ? "true" : "false" )
5786 << ", error:" << ( error ? "true" : "false" )
5787 << ")";
5788 }
5789
5790 void ObjectRecoveryProgress::dump(Formatter *f) const
5791 {
5792 f->dump_int("first?", first);
5793 f->dump_int("data_complete?", data_complete);
5794 f->dump_unsigned("data_recovered_to", data_recovered_to);
5795 f->dump_int("omap_complete?", omap_complete);
5796 f->dump_string("omap_recovered_to", omap_recovered_to);
5797 }
5798
5799 void ObjectRecoveryInfo::encode(bufferlist &bl, uint64_t features) const
5800 {
5801 ENCODE_START(2, 1, bl);
5802 encode(soid, bl);
5803 encode(version, bl);
5804 encode(size, bl);
5805 encode(oi, bl, features);
5806 encode(ss, bl);
5807 encode(copy_subset, bl);
5808 encode(clone_subset, bl);
5809 ENCODE_FINISH(bl);
5810 }
5811
5812 void ObjectRecoveryInfo::decode(bufferlist::const_iterator &bl,
5813 int64_t pool)
5814 {
5815 DECODE_START(2, bl);
5816 decode(soid, bl);
5817 decode(version, bl);
5818 decode(size, bl);
5819 decode(oi, bl);
5820 decode(ss, bl);
5821 decode(copy_subset, bl);
5822 decode(clone_subset, bl);
5823 DECODE_FINISH(bl);
5824
5825 if (struct_v < 2) {
5826 if (!soid.is_max() && soid.pool == -1)
5827 soid.pool = pool;
5828 map<hobject_t, interval_set<uint64_t>> tmp;
5829 tmp.swap(clone_subset);
5830 for (map<hobject_t, interval_set<uint64_t>>::iterator i = tmp.begin();
5831 i != tmp.end();
5832 ++i) {
5833 hobject_t first(i->first);
5834 if (!first.is_max() && first.pool == -1)
5835 first.pool = pool;
5836 clone_subset[first].swap(i->second);
5837 }
5838 }
5839 }
5840
5841 void ObjectRecoveryInfo::generate_test_instances(
5842 list<ObjectRecoveryInfo*>& o)
5843 {
5844 o.push_back(new ObjectRecoveryInfo);
5845 o.back()->soid = hobject_t(sobject_t("key", CEPH_NOSNAP));
5846 o.back()->version = eversion_t(0,0);
5847 o.back()->size = 100;
5848 }
5849
5850
5851 void ObjectRecoveryInfo::dump(Formatter *f) const
5852 {
5853 f->dump_stream("object") << soid;
5854 f->dump_stream("at_version") << version;
5855 f->dump_stream("size") << size;
5856 {
5857 f->open_object_section("object_info");
5858 oi.dump(f);
5859 f->close_section();
5860 }
5861 {
5862 f->open_object_section("snapset");
5863 ss.dump(f);
5864 f->close_section();
5865 }
5866 f->dump_stream("copy_subset") << copy_subset;
5867 f->dump_stream("clone_subset") << clone_subset;
5868 }
5869
5870 ostream& operator<<(ostream& out, const ObjectRecoveryInfo &inf)
5871 {
5872 return inf.print(out);
5873 }
5874
5875 ostream &ObjectRecoveryInfo::print(ostream &out) const
5876 {
5877 return out << "ObjectRecoveryInfo("
5878 << soid << "@" << version
5879 << ", size: " << size
5880 << ", copy_subset: " << copy_subset
5881 << ", clone_subset: " << clone_subset
5882 << ", snapset: " << ss
5883 << ")";
5884 }
5885
5886 // -- PushReplyOp --
5887 void PushReplyOp::generate_test_instances(list<PushReplyOp*> &o)
5888 {
5889 o.push_back(new PushReplyOp);
5890 o.push_back(new PushReplyOp);
5891 o.back()->soid = hobject_t(sobject_t("asdf", 2));
5892 o.push_back(new PushReplyOp);
5893 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
5894 }
5895
5896 void PushReplyOp::encode(bufferlist &bl) const
5897 {
5898 ENCODE_START(1, 1, bl);
5899 encode(soid, bl);
5900 ENCODE_FINISH(bl);
5901 }
5902
5903 void PushReplyOp::decode(bufferlist::const_iterator &bl)
5904 {
5905 DECODE_START(1, bl);
5906 decode(soid, bl);
5907 DECODE_FINISH(bl);
5908 }
5909
5910 void PushReplyOp::dump(Formatter *f) const
5911 {
5912 f->dump_stream("soid") << soid;
5913 }
5914
5915 ostream &PushReplyOp::print(ostream &out) const
5916 {
5917 return out
5918 << "PushReplyOp(" << soid
5919 << ")";
5920 }
5921
5922 ostream& operator<<(ostream& out, const PushReplyOp &op)
5923 {
5924 return op.print(out);
5925 }
5926
5927 uint64_t PushReplyOp::cost(CephContext *cct) const
5928 {
5929
5930 return cct->_conf->osd_push_per_object_cost +
5931 cct->_conf->osd_recovery_max_chunk;
5932 }
5933
5934 // -- PullOp --
5935 void PullOp::generate_test_instances(list<PullOp*> &o)
5936 {
5937 o.push_back(new PullOp);
5938 o.push_back(new PullOp);
5939 o.back()->soid = hobject_t(sobject_t("asdf", 2));
5940 o.back()->recovery_info.version = eversion_t(3, 10);
5941 o.push_back(new PullOp);
5942 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
5943 o.back()->recovery_info.version = eversion_t(0, 0);
5944 }
5945
5946 void PullOp::encode(bufferlist &bl, uint64_t features) const
5947 {
5948 ENCODE_START(1, 1, bl);
5949 encode(soid, bl);
5950 encode(recovery_info, bl, features);
5951 encode(recovery_progress, bl);
5952 ENCODE_FINISH(bl);
5953 }
5954
5955 void PullOp::decode(bufferlist::const_iterator &bl)
5956 {
5957 DECODE_START(1, bl);
5958 decode(soid, bl);
5959 decode(recovery_info, bl);
5960 decode(recovery_progress, bl);
5961 DECODE_FINISH(bl);
5962 }
5963
5964 void PullOp::dump(Formatter *f) const
5965 {
5966 f->dump_stream("soid") << soid;
5967 {
5968 f->open_object_section("recovery_info");
5969 recovery_info.dump(f);
5970 f->close_section();
5971 }
5972 {
5973 f->open_object_section("recovery_progress");
5974 recovery_progress.dump(f);
5975 f->close_section();
5976 }
5977 }
5978
5979 ostream &PullOp::print(ostream &out) const
5980 {
5981 return out
5982 << "PullOp(" << soid
5983 << ", recovery_info: " << recovery_info
5984 << ", recovery_progress: " << recovery_progress
5985 << ")";
5986 }
5987
5988 ostream& operator<<(ostream& out, const PullOp &op)
5989 {
5990 return op.print(out);
5991 }
5992
5993 uint64_t PullOp::cost(CephContext *cct) const
5994 {
5995 return cct->_conf->osd_push_per_object_cost +
5996 cct->_conf->osd_recovery_max_chunk;
5997 }
5998
5999 // -- PushOp --
6000 void PushOp::generate_test_instances(list<PushOp*> &o)
6001 {
6002 o.push_back(new PushOp);
6003 o.push_back(new PushOp);
6004 o.back()->soid = hobject_t(sobject_t("asdf", 2));
6005 o.back()->version = eversion_t(3, 10);
6006 o.push_back(new PushOp);
6007 o.back()->soid = hobject_t(sobject_t("asdf", CEPH_NOSNAP));
6008 o.back()->version = eversion_t(0, 0);
6009 }
6010
6011 void PushOp::encode(bufferlist &bl, uint64_t features) const
6012 {
6013 ENCODE_START(1, 1, bl);
6014 encode(soid, bl);
6015 encode(version, bl);
6016 encode(data, bl);
6017 encode(data_included, bl);
6018 encode(omap_header, bl);
6019 encode(omap_entries, bl);
6020 encode(attrset, bl);
6021 encode(recovery_info, bl, features);
6022 encode(after_progress, bl);
6023 encode(before_progress, bl);
6024 ENCODE_FINISH(bl);
6025 }
6026
6027 void PushOp::decode(bufferlist::const_iterator &bl)
6028 {
6029 DECODE_START(1, bl);
6030 decode(soid, bl);
6031 decode(version, bl);
6032 decode(data, bl);
6033 decode(data_included, bl);
6034 decode(omap_header, bl);
6035 decode(omap_entries, bl);
6036 decode(attrset, bl);
6037 decode(recovery_info, bl);
6038 decode(after_progress, bl);
6039 decode(before_progress, bl);
6040 DECODE_FINISH(bl);
6041 }
6042
6043 void PushOp::dump(Formatter *f) const
6044 {
6045 f->dump_stream("soid") << soid;
6046 f->dump_stream("version") << version;
6047 f->dump_int("data_len", data.length());
6048 f->dump_stream("data_included") << data_included;
6049 f->dump_int("omap_header_len", omap_header.length());
6050 f->dump_int("omap_entries_len", omap_entries.size());
6051 f->dump_int("attrset_len", attrset.size());
6052 {
6053 f->open_object_section("recovery_info");
6054 recovery_info.dump(f);
6055 f->close_section();
6056 }
6057 {
6058 f->open_object_section("after_progress");
6059 after_progress.dump(f);
6060 f->close_section();
6061 }
6062 {
6063 f->open_object_section("before_progress");
6064 before_progress.dump(f);
6065 f->close_section();
6066 }
6067 }
6068
6069 ostream &PushOp::print(ostream &out) const
6070 {
6071 return out
6072 << "PushOp(" << soid
6073 << ", version: " << version
6074 << ", data_included: " << data_included
6075 << ", data_size: " << data.length()
6076 << ", omap_header_size: " << omap_header.length()
6077 << ", omap_entries_size: " << omap_entries.size()
6078 << ", attrset_size: " << attrset.size()
6079 << ", recovery_info: " << recovery_info
6080 << ", after_progress: " << after_progress
6081 << ", before_progress: " << before_progress
6082 << ")";
6083 }
6084
6085 ostream& operator<<(ostream& out, const PushOp &op)
6086 {
6087 return op.print(out);
6088 }
6089
6090 uint64_t PushOp::cost(CephContext *cct) const
6091 {
6092 uint64_t cost = data_included.size();
6093 for (map<string, bufferlist>::const_iterator i =
6094 omap_entries.begin();
6095 i != omap_entries.end();
6096 ++i) {
6097 cost += i->second.length();
6098 }
6099 cost += cct->_conf->osd_push_per_object_cost;
6100 return cost;
6101 }
6102
6103 // -- ScrubMap --
6104
6105 void ScrubMap::merge_incr(const ScrubMap &l)
6106 {
6107 ceph_assert(valid_through == l.incr_since);
6108 valid_through = l.valid_through;
6109
6110 for (map<hobject_t,object>::const_iterator p = l.objects.begin();
6111 p != l.objects.end();
6112 ++p){
6113 if (p->second.negative) {
6114 map<hobject_t,object>::iterator q = objects.find(p->first);
6115 if (q != objects.end()) {
6116 objects.erase(q);
6117 }
6118 } else {
6119 objects[p->first] = p->second;
6120 }
6121 }
6122 }
6123
6124 void ScrubMap::encode(bufferlist& bl) const
6125 {
6126 ENCODE_START(3, 2, bl);
6127 encode(objects, bl);
6128 encode((__u32)0, bl); // used to be attrs; now deprecated
6129 bufferlist old_logbl; // not used
6130 encode(old_logbl, bl);
6131 encode(valid_through, bl);
6132 encode(incr_since, bl);
6133 ENCODE_FINISH(bl);
6134 }
6135
6136 void ScrubMap::decode(bufferlist::const_iterator& bl, int64_t pool)
6137 {
6138 DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
6139 decode(objects, bl);
6140 {
6141 map<string,string> attrs; // deprecated
6142 decode(attrs, bl);
6143 }
6144 bufferlist old_logbl; // not used
6145 decode(old_logbl, bl);
6146 decode(valid_through, bl);
6147 decode(incr_since, bl);
6148 DECODE_FINISH(bl);
6149
6150 // handle hobject_t upgrade
6151 if (struct_v < 3) {
6152 map<hobject_t, object> tmp;
6153 tmp.swap(objects);
6154 for (map<hobject_t, object>::iterator i = tmp.begin();
6155 i != tmp.end();
6156 ++i) {
6157 hobject_t first(i->first);
6158 if (!first.is_max() && first.pool == -1)
6159 first.pool = pool;
6160 objects[first] = i->second;
6161 }
6162 }
6163 }
6164
6165 void ScrubMap::dump(Formatter *f) const
6166 {
6167 f->dump_stream("valid_through") << valid_through;
6168 f->dump_stream("incremental_since") << incr_since;
6169 f->open_array_section("objects");
6170 for (map<hobject_t,object>::const_iterator p = objects.begin(); p != objects.end(); ++p) {
6171 f->open_object_section("object");
6172 f->dump_string("name", p->first.oid.name);
6173 f->dump_unsigned("hash", p->first.get_hash());
6174 f->dump_string("key", p->first.get_key());
6175 f->dump_int("snapid", p->first.snap);
6176 p->second.dump(f);
6177 f->close_section();
6178 }
6179 f->close_section();
6180 }
6181
6182 void ScrubMap::generate_test_instances(list<ScrubMap*>& o)
6183 {
6184 o.push_back(new ScrubMap);
6185 o.push_back(new ScrubMap);
6186 o.back()->valid_through = eversion_t(1, 2);
6187 o.back()->incr_since = eversion_t(3, 4);
6188 list<object*> obj;
6189 object::generate_test_instances(obj);
6190 o.back()->objects[hobject_t(object_t("foo"), "fookey", 123, 456, 0, "")] = *obj.back();
6191 obj.pop_back();
6192 o.back()->objects[hobject_t(object_t("bar"), string(), 123, 456, 0, "")] = *obj.back();
6193 }
6194
6195 // -- ScrubMap::object --
6196
6197 void ScrubMap::object::encode(bufferlist& bl) const
6198 {
6199 bool compat_read_error = read_error || ec_hash_mismatch || ec_size_mismatch;
6200 ENCODE_START(10, 7, bl);
6201 encode(size, bl);
6202 encode(negative, bl);
6203 encode(attrs, bl);
6204 encode(digest, bl);
6205 encode(digest_present, bl);
6206 encode((uint32_t)0, bl); // obsolete nlinks
6207 encode((uint32_t)0, bl); // snapcolls
6208 encode(omap_digest, bl);
6209 encode(omap_digest_present, bl);
6210 encode(compat_read_error, bl);
6211 encode(stat_error, bl);
6212 encode(read_error, bl);
6213 encode(ec_hash_mismatch, bl);
6214 encode(ec_size_mismatch, bl);
6215 encode(large_omap_object_found, bl);
6216 encode(large_omap_object_key_count, bl);
6217 encode(large_omap_object_value_size, bl);
6218 encode(object_omap_bytes, bl);
6219 encode(object_omap_keys, bl);
6220 ENCODE_FINISH(bl);
6221 }
6222
6223 void ScrubMap::object::decode(bufferlist::const_iterator& bl)
6224 {
6225 DECODE_START(10, bl);
6226 decode(size, bl);
6227 bool tmp, compat_read_error = false;
6228 decode(tmp, bl);
6229 negative = tmp;
6230 decode(attrs, bl);
6231 decode(digest, bl);
6232 decode(tmp, bl);
6233 digest_present = tmp;
6234 {
6235 uint32_t nlinks;
6236 decode(nlinks, bl);
6237 set<snapid_t> snapcolls;
6238 decode(snapcolls, bl);
6239 }
6240 decode(omap_digest, bl);
6241 decode(tmp, bl);
6242 omap_digest_present = tmp;
6243 decode(compat_read_error, bl);
6244 decode(tmp, bl);
6245 stat_error = tmp;
6246 if (struct_v >= 8) {
6247 decode(tmp, bl);
6248 read_error = tmp;
6249 decode(tmp, bl);
6250 ec_hash_mismatch = tmp;
6251 decode(tmp, bl);
6252 ec_size_mismatch = tmp;
6253 }
6254 // If older encoder found a read_error, set read_error
6255 if (compat_read_error && !read_error && !ec_hash_mismatch && !ec_size_mismatch)
6256 read_error = true;
6257 if (struct_v >= 9) {
6258 decode(tmp, bl);
6259 large_omap_object_found = tmp;
6260 decode(large_omap_object_key_count, bl);
6261 decode(large_omap_object_value_size, bl);
6262 }
6263 if (struct_v >= 10) {
6264 decode(object_omap_bytes, bl);
6265 decode(object_omap_keys, bl);
6266 }
6267 DECODE_FINISH(bl);
6268 }
6269
6270 void ScrubMap::object::dump(Formatter *f) const
6271 {
6272 f->dump_int("size", size);
6273 f->dump_int("negative", negative);
6274 f->open_array_section("attrs");
6275 for (map<string,bufferptr>::const_iterator p = attrs.begin(); p != attrs.end(); ++p) {
6276 f->open_object_section("attr");
6277 f->dump_string("name", p->first);
6278 f->dump_int("length", p->second.length());
6279 f->close_section();
6280 }
6281 f->close_section();
6282 }
6283
6284 void ScrubMap::object::generate_test_instances(list<object*>& o)
6285 {
6286 o.push_back(new object);
6287 o.push_back(new object);
6288 o.back()->negative = true;
6289 o.push_back(new object);
6290 o.back()->size = 123;
6291 o.back()->attrs["foo"] = buffer::copy("foo", 3);
6292 o.back()->attrs["bar"] = buffer::copy("barval", 6);
6293 }
6294
6295 // -- OSDOp --
6296
6297 ostream& operator<<(ostream& out, const OSDOp& op)
6298 {
6299 out << ceph_osd_op_name(op.op.op);
6300 if (ceph_osd_op_type_data(op.op.op)) {
6301 // data extent
6302 switch (op.op.op) {
6303 case CEPH_OSD_OP_ASSERT_VER:
6304 out << " v" << op.op.assert_ver.ver;
6305 break;
6306 case CEPH_OSD_OP_TRUNCATE:
6307 out << " " << op.op.extent.offset;
6308 break;
6309 case CEPH_OSD_OP_MASKTRUNC:
6310 case CEPH_OSD_OP_TRIMTRUNC:
6311 out << " " << op.op.extent.truncate_seq << "@"
6312 << (int64_t)op.op.extent.truncate_size;
6313 break;
6314 case CEPH_OSD_OP_ROLLBACK:
6315 out << " " << snapid_t(op.op.snap.snapid);
6316 break;
6317 case CEPH_OSD_OP_WATCH:
6318 out << " " << ceph_osd_watch_op_name(op.op.watch.op)
6319 << " cookie " << op.op.watch.cookie;
6320 if (op.op.watch.gen)
6321 out << " gen " << op.op.watch.gen;
6322 break;
6323 case CEPH_OSD_OP_NOTIFY:
6324 out << " cookie " << op.op.notify.cookie;
6325 break;
6326 case CEPH_OSD_OP_COPY_GET:
6327 out << " max " << op.op.copy_get.max;
6328 break;
6329 case CEPH_OSD_OP_COPY_FROM:
6330 out << " ver " << op.op.copy_from.src_version;
6331 break;
6332 case CEPH_OSD_OP_SETALLOCHINT:
6333 out << " object_size " << op.op.alloc_hint.expected_object_size
6334 << " write_size " << op.op.alloc_hint.expected_write_size;
6335 break;
6336 case CEPH_OSD_OP_READ:
6337 case CEPH_OSD_OP_SPARSE_READ:
6338 case CEPH_OSD_OP_SYNC_READ:
6339 case CEPH_OSD_OP_WRITE:
6340 case CEPH_OSD_OP_WRITEFULL:
6341 case CEPH_OSD_OP_ZERO:
6342 case CEPH_OSD_OP_APPEND:
6343 case CEPH_OSD_OP_MAPEXT:
6344 case CEPH_OSD_OP_CMPEXT:
6345 out << " " << op.op.extent.offset << "~" << op.op.extent.length;
6346 if (op.op.extent.truncate_seq)
6347 out << " [" << op.op.extent.truncate_seq << "@"
6348 << (int64_t)op.op.extent.truncate_size << "]";
6349 if (op.op.flags)
6350 out << " [" << ceph_osd_op_flag_string(op.op.flags) << "]";
6351 default:
6352 // don't show any arg info
6353 break;
6354 }
6355 } else if (ceph_osd_op_type_attr(op.op.op)) {
6356 // xattr name
6357 if (op.op.xattr.name_len && op.indata.length()) {
6358 out << " ";
6359 op.indata.write(0, op.op.xattr.name_len, out);
6360 }
6361 if (op.op.xattr.value_len)
6362 out << " (" << op.op.xattr.value_len << ")";
6363 if (op.op.op == CEPH_OSD_OP_CMPXATTR)
6364 out << " op " << (int)op.op.xattr.cmp_op
6365 << " mode " << (int)op.op.xattr.cmp_mode;
6366 } else if (ceph_osd_op_type_exec(op.op.op)) {
6367 // class.method
6368 if (op.op.cls.class_len && op.indata.length()) {
6369 out << " ";
6370 op.indata.write(0, op.op.cls.class_len, out);
6371 out << ".";
6372 op.indata.write(op.op.cls.class_len, op.op.cls.method_len, out);
6373 }
6374 } else if (ceph_osd_op_type_pg(op.op.op)) {
6375 switch (op.op.op) {
6376 case CEPH_OSD_OP_PGLS:
6377 case CEPH_OSD_OP_PGLS_FILTER:
6378 case CEPH_OSD_OP_PGNLS:
6379 case CEPH_OSD_OP_PGNLS_FILTER:
6380 out << " start_epoch " << op.op.pgls.start_epoch;
6381 break;
6382 case CEPH_OSD_OP_PG_HITSET_LS:
6383 break;
6384 case CEPH_OSD_OP_PG_HITSET_GET:
6385 out << " " << utime_t(op.op.hit_set_get.stamp);
6386 break;
6387 case CEPH_OSD_OP_SCRUBLS:
6388 break;
6389 }
6390 }
6391 return out;
6392 }
6393
6394
6395 void OSDOp::split_osd_op_vector_in_data(vector<OSDOp>& ops, bufferlist& in)
6396 {
6397 bufferlist::iterator datap = in.begin();
6398 for (unsigned i = 0; i < ops.size(); i++) {
6399 if (ops[i].op.payload_len) {
6400 datap.copy(ops[i].op.payload_len, ops[i].indata);
6401 }
6402 }
6403 }
6404
6405 void OSDOp::merge_osd_op_vector_in_data(vector<OSDOp>& ops, bufferlist& out)
6406 {
6407 for (unsigned i = 0; i < ops.size(); i++) {
6408 if (ops[i].indata.length()) {
6409 ops[i].op.payload_len = ops[i].indata.length();
6410 out.append(ops[i].indata);
6411 }
6412 }
6413 }
6414
6415 void OSDOp::split_osd_op_vector_out_data(vector<OSDOp>& ops, bufferlist& in)
6416 {
6417 bufferlist::iterator datap = in.begin();
6418 for (unsigned i = 0; i < ops.size(); i++) {
6419 if (ops[i].op.payload_len) {
6420 datap.copy(ops[i].op.payload_len, ops[i].outdata);
6421 }
6422 }
6423 }
6424
6425 void OSDOp::merge_osd_op_vector_out_data(vector<OSDOp>& ops, bufferlist& out)
6426 {
6427 for (unsigned i = 0; i < ops.size(); i++) {
6428 if (ops[i].outdata.length()) {
6429 ops[i].op.payload_len = ops[i].outdata.length();
6430 out.append(ops[i].outdata);
6431 }
6432 }
6433 }
6434
6435 void OSDOp::clear_data(vector<OSDOp>& ops)
6436 {
6437 for (unsigned i = 0; i < ops.size(); i++) {
6438 OSDOp& op = ops[i];
6439 op.outdata.clear();
6440 if (ceph_osd_op_type_attr(op.op.op) &&
6441 op.op.xattr.name_len &&
6442 op.indata.length() >= op.op.xattr.name_len) {
6443 bufferptr bp(op.op.xattr.name_len);
6444 bufferlist bl;
6445 bl.append(bp);
6446 bl.copy_in(0, op.op.xattr.name_len, op.indata);
6447 op.indata.claim(bl);
6448 } else if (ceph_osd_op_type_exec(op.op.op) &&
6449 op.op.cls.class_len &&
6450 op.indata.length() >
6451 (op.op.cls.class_len + op.op.cls.method_len)) {
6452 __u8 len = op.op.cls.class_len + op.op.cls.method_len;
6453 bufferptr bp(len);
6454 bufferlist bl;
6455 bl.append(bp);
6456 bl.copy_in(0, len, op.indata);
6457 op.indata.claim(bl);
6458 } else {
6459 op.indata.clear();
6460 }
6461 }
6462 }