]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/PGMap.cc
import ceph nautilus 14.2.2
[ceph.git] / ceph / src / mon / PGMap.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
224ce89b
WB
4#include <boost/algorithm/string.hpp>
5
7c673cae
FG
6#include "PGMap.h"
7
8#define dout_subsys ceph_subsys_mon
9#include "common/debug.h"
11fdf7f2 10#include "common/Clock.h"
7c673cae 11#include "common/Formatter.h"
11fdf7f2 12#include "global/global_context.h"
7c673cae
FG
13#include "include/ceph_features.h"
14#include "include/stringify.h"
15
16#include "osd/osd_types.h"
17#include "osd/OSDMap.h"
18
19#define dout_context g_ceph_context
20
31f18b77
FG
21MEMPOOL_DEFINE_OBJECT_FACTORY(PGMapDigest, pgmap_digest, pgmap);
22MEMPOOL_DEFINE_OBJECT_FACTORY(PGMap, pgmap, pgmap);
23MEMPOOL_DEFINE_OBJECT_FACTORY(PGMap::Incremental, pgmap_inc, pgmap);
24
25
26// ---------------------
27// PGMapDigest
28
29void PGMapDigest::encode(bufferlist& bl, uint64_t features) const
30{
31 // NOTE: see PGMap::encode_digest
11fdf7f2
TL
32 uint8_t v = 4;
33 if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
34 v = 1;
35 } else if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
36 v = 3;
37 }
38 ENCODE_START(v, 1, bl);
39 encode(num_pg, bl);
40 encode(num_pg_active, bl);
41 encode(num_pg_unknown, bl);
42 encode(num_osd, bl);
43 encode(pg_pool_sum, bl, features);
44 encode(pg_sum, bl, features);
45 encode(osd_sum, bl, features);
46 if (v >= 2) {
47 encode(num_pg_by_state, bl);
48 } else {
49 uint32_t n = num_pg_by_state.size();
50 encode(n, bl);
51 for (auto p : num_pg_by_state) {
52 encode((uint32_t)p.first, bl);
53 encode(p.second, bl);
54 }
55 }
56 encode(num_pg_by_osd, bl);
57 encode(num_pg_by_pool, bl);
58 encode(osd_last_seq, bl);
59 encode(per_pool_sum_delta, bl, features);
60 encode(per_pool_sum_deltas_stamps, bl);
61 encode(pg_sum_delta, bl, features);
62 encode(stamp_delta, bl);
63 encode(avail_space_by_rule, bl);
64 if (struct_v >= 3) {
65 encode(purged_snaps, bl);
66 }
67 if (struct_v >= 4) {
68 encode(osd_sum_by_class, bl, features);
69 }
7c673cae
FG
70 ENCODE_FINISH(bl);
71}
72
11fdf7f2 73void PGMapDigest::decode(bufferlist::const_iterator& p)
31f18b77 74{
11fdf7f2
TL
75 DECODE_START(4, p);
76 decode(num_pg, p);
77 decode(num_pg_active, p);
78 decode(num_pg_unknown, p);
79 decode(num_osd, p);
80 decode(pg_pool_sum, p);
81 decode(pg_sum, p);
82 decode(osd_sum, p);
83 if (struct_v >= 2) {
84 decode(num_pg_by_state, p);
85 } else {
86 map<int32_t, int32_t> nps;
87 decode(nps, p);
88 num_pg_by_state.clear();
89 for (auto i : nps) {
90 num_pg_by_state[i.first] = i.second;
91 }
92 }
93 decode(num_pg_by_osd, p);
94 decode(num_pg_by_pool, p);
95 decode(osd_last_seq, p);
96 decode(per_pool_sum_delta, p);
97 decode(per_pool_sum_deltas_stamps, p);
98 decode(pg_sum_delta, p);
99 decode(stamp_delta, p);
100 decode(avail_space_by_rule, p);
101 if (struct_v >= 3) {
102 decode(purged_snaps, p);
103 }
104 if (struct_v >= 4) {
105 decode(osd_sum_by_class, p);
106 }
31f18b77
FG
107 DECODE_FINISH(p);
108}
109
110void PGMapDigest::dump(Formatter *f) const
111{
112 f->dump_unsigned("num_pg", num_pg);
113 f->dump_unsigned("num_pg_active", num_pg_active);
114 f->dump_unsigned("num_pg_unknown", num_pg_unknown);
115 f->dump_unsigned("num_osd", num_osd);
116 f->dump_object("pool_sum", pg_sum);
117 f->dump_object("osd_sum", osd_sum);
11fdf7f2
TL
118
119 f->open_object_section("osd_sum_by_class");
120 for (auto& i : osd_sum_by_class) {
121 f->dump_object(i.first.c_str(), i.second);
122 }
123 f->close_section();
124
31f18b77
FG
125 f->open_array_section("pool_stats");
126 for (auto& p : pg_pool_sum) {
127 f->open_object_section("pool_stat");
128 f->dump_int("poolid", p.first);
129 auto q = num_pg_by_pool.find(p.first);
130 if (q != num_pg_by_pool.end())
131 f->dump_unsigned("num_pg", q->second);
132 p.second.dump(f);
7c673cae
FG
133 f->close_section();
134 }
135 f->close_section();
31f18b77
FG
136 f->open_array_section("osd_stats");
137 int i = 0;
138 // TODO: this isn't really correct since we can dump non-existent OSDs
139 // I dunno what osd_last_seq is set to in that case...
140 for (auto& p : osd_last_seq) {
7c673cae 141 f->open_object_section("osd_stat");
31f18b77
FG
142 f->dump_int("osd", i);
143 f->dump_unsigned("seq", p);
7c673cae 144 f->close_section();
31f18b77 145 ++i;
7c673cae
FG
146 }
147 f->close_section();
31f18b77
FG
148 f->open_array_section("num_pg_by_state");
149 for (auto& p : num_pg_by_state) {
150 f->open_object_section("count");
151 f->dump_string("state", pg_state_string(p.first));
152 f->dump_unsigned("num", p.second);
153 f->close_section();
154 }
7c673cae 155 f->close_section();
31f18b77
FG
156 f->open_array_section("num_pg_by_osd");
157 for (auto& p : num_pg_by_osd) {
158 f->open_object_section("count");
159 f->dump_unsigned("osd", p.first);
160 f->dump_unsigned("num_primary_pg", p.second.primary);
161 f->dump_unsigned("num_acting_pg", p.second.acting);
81eedcae 162 f->dump_unsigned("num_up_not_acting_pg", p.second.up_not_acting);
31f18b77
FG
163 f->close_section();
164 }
7c673cae 165 f->close_section();
11fdf7f2
TL
166 f->open_array_section("purged_snaps");
167 for (auto& j : purged_snaps) {
168 f->open_object_section("pool");
169 f->dump_int("pool", j.first);
170 f->open_object_section("purged_snaps");
171 for (auto i = j.second.begin(); i != j.second.end(); ++i) {
172 f->open_object_section("interval");
173 f->dump_stream("start") << i.get_start();
174 f->dump_stream("length") << i.get_len();
175 f->close_section();
176 }
177 f->close_section();
178 f->close_section();
179 }
180 f->close_section();
7c673cae
FG
181}
182
31f18b77 183void PGMapDigest::generate_test_instances(list<PGMapDigest*>& ls)
7c673cae 184{
31f18b77 185 ls.push_back(new PGMapDigest);
7c673cae
FG
186}
187
31f18b77
FG
188inline std::string percentify(const float& a) {
189 std::stringstream ss;
190 if (a < 0.01)
191 ss << "0";
192 else
193 ss << std::fixed << std::setprecision(2) << a;
194 return ss.str();
195}
7c673cae 196
31f18b77 197void PGMapDigest::print_summary(Formatter *f, ostream *out) const
7c673cae 198{
31f18b77
FG
199 if (f)
200 f->open_array_section("pgs_by_state");
7c673cae 201
31f18b77
FG
202 // list is descending numeric order (by count)
203 multimap<int,int> state_by_count; // count -> state
204 for (auto p = num_pg_by_state.begin();
205 p != num_pg_by_state.end();
206 ++p) {
207 state_by_count.insert(make_pair(p->second, p->first));
7c673cae 208 }
31f18b77
FG
209 if (f) {
210 for (auto p = state_by_count.rbegin();
211 p != state_by_count.rend();
212 ++p)
213 {
214 f->open_object_section("pgs_by_state_element");
215 f->dump_string("state_name", pg_state_string(p->second));
216 f->dump_unsigned("count", p->first);
217 f->close_section();
218 }
7c673cae 219 }
31f18b77
FG
220 if (f)
221 f->close_section();
7c673cae 222
31f18b77
FG
223 if (f) {
224 f->dump_unsigned("num_pgs", num_pg);
225 f->dump_unsigned("num_pools", pg_pool_sum.size());
226 f->dump_unsigned("num_objects", pg_sum.stats.sum.num_objects);
227 f->dump_unsigned("data_bytes", pg_sum.stats.sum.num_bytes);
11fdf7f2
TL
228 f->dump_unsigned("bytes_used", osd_sum.statfs.get_used_raw());
229 f->dump_unsigned("bytes_avail", osd_sum.statfs.available);
230 f->dump_unsigned("bytes_total", osd_sum.statfs.total);
31f18b77
FG
231 } else {
232 *out << " pools: " << pg_pool_sum.size() << " pools, "
233 << num_pg << " pgs\n";
1adf2230
AA
234 *out << " objects: " << si_u_t(pg_sum.stats.sum.num_objects) << " objects, "
235 << byte_u_t(pg_sum.stats.sum.num_bytes) << "\n";
31f18b77 236 *out << " usage: "
11fdf7f2
TL
237 << byte_u_t(osd_sum.statfs.get_used_raw()) << " used, "
238 << byte_u_t(osd_sum.statfs.available) << " / "
239 << byte_u_t(osd_sum.statfs.total) << " avail\n";
31f18b77
FG
240 *out << " pgs: ";
241 }
7c673cae 242
31f18b77 243 bool pad = false;
7c673cae 244
31f18b77
FG
245 if (num_pg_unknown > 0) {
246 float p = (float)num_pg_unknown / (float)num_pg;
247 if (f) {
248 f->dump_float("unknown_pgs_ratio", p);
7c673cae 249 } else {
31f18b77
FG
250 char b[20];
251 snprintf(b, sizeof(b), "%.3lf", p * 100.0);
252 *out << b << "% pgs unknown\n";
253 pad = true;
7c673cae 254 }
7c673cae 255 }
7c673cae 256
31f18b77
FG
257 int num_pg_inactive = num_pg - num_pg_active - num_pg_unknown;
258 if (num_pg_inactive > 0) {
259 float p = (float)num_pg_inactive / (float)num_pg;
260 if (f) {
261 f->dump_float("inactive_pgs_ratio", p);
7c673cae 262 } else {
31f18b77
FG
263 if (pad) {
264 *out << " ";
265 }
266 char b[20];
267 snprintf(b, sizeof(b), "%.3f", p * 100.0);
268 *out << b << "% pgs not active\n";
269 pad = true;
7c673cae 270 }
7c673cae 271 }
31f18b77
FG
272
273 list<string> sl;
274 overall_recovery_summary(f, &sl);
275 if (!f && !sl.empty()) {
276 for (auto p = sl.begin(); p != sl.end(); ++p) {
277 if (pad) {
278 *out << " ";
279 }
280 *out << *p << "\n";
281 pad = true;
7c673cae 282 }
7c673cae 283 }
31f18b77 284 sl.clear();
7c673cae 285
31f18b77
FG
286 if (!f) {
287 unsigned max_width = 1;
288 for (multimap<int,int>::reverse_iterator p = state_by_count.rbegin();
289 p != state_by_count.rend();
290 ++p)
291 {
292 std::stringstream ss;
293 ss << p->first;
11fdf7f2 294 max_width = std::max<size_t>(ss.str().size(), max_width);
7c673cae
FG
295 }
296
31f18b77
FG
297 for (multimap<int,int>::reverse_iterator p = state_by_count.rbegin();
298 p != state_by_count.rend();
299 ++p)
300 {
301 if (pad) {
302 *out << " ";
303 }
304 pad = true;
305 out->setf(std::ios::left);
306 *out << std::setw(max_width) << p->first
307 << " " << pg_state_string(p->second) << "\n";
308 out->unsetf(std::ios::left);
309 }
7c673cae
FG
310 }
311
31f18b77
FG
312 ostringstream ss_rec_io;
313 overall_recovery_rate_summary(f, &ss_rec_io);
314 ostringstream ss_client_io;
315 overall_client_io_rate_summary(f, &ss_client_io);
316 ostringstream ss_cache_io;
317 overall_cache_io_rate_summary(f, &ss_cache_io);
7c673cae 318
31f18b77
FG
319 if (!f && (ss_client_io.str().length() || ss_rec_io.str().length()
320 || ss_cache_io.str().length())) {
321 *out << "\n \n";
322 *out << " io:\n";
7c673cae
FG
323 }
324
31f18b77
FG
325 if (!f && ss_client_io.str().length())
326 *out << " client: " << ss_client_io.str() << "\n";
327 if (!f && ss_rec_io.str().length())
328 *out << " recovery: " << ss_rec_io.str() << "\n";
329 if (!f && ss_cache_io.str().length())
330 *out << " cache: " << ss_cache_io.str() << "\n";
7c673cae
FG
331}
332
31f18b77 333void PGMapDigest::print_oneline_summary(Formatter *f, ostream *out) const
7c673cae 334{
31f18b77
FG
335 std::stringstream ss;
336
337 if (f)
338 f->open_array_section("num_pg_by_state");
339 for (auto p = num_pg_by_state.begin();
340 p != num_pg_by_state.end();
341 ++p) {
342 if (f) {
343 f->open_object_section("state");
344 f->dump_string("name", pg_state_string(p->first));
345 f->dump_unsigned("num", p->second);
346 f->close_section();
347 }
348 if (p != num_pg_by_state.begin())
349 ss << ", ";
350 ss << p->second << " " << pg_state_string(p->first);
7c673cae 351 }
31f18b77
FG
352 if (f)
353 f->close_section();
7c673cae 354
31f18b77
FG
355 string states = ss.str();
356 if (out)
357 *out << num_pg << " pgs: "
358 << states << "; "
1adf2230 359 << byte_u_t(pg_sum.stats.sum.num_bytes) << " data, "
11fdf7f2
TL
360 << byte_u_t(osd_sum.statfs.get_used()) << " used, "
361 << byte_u_t(osd_sum.statfs.available) << " / "
362 << byte_u_t(osd_sum.statfs.total) << " avail";
31f18b77
FG
363 if (f) {
364 f->dump_unsigned("num_pgs", num_pg);
365 f->dump_unsigned("num_bytes", pg_sum.stats.sum.num_bytes);
11fdf7f2
TL
366 f->dump_int("total_bytes", osd_sum.statfs.total);
367 f->dump_int("total_avail_bytes", osd_sum.statfs.available);
368 f->dump_int("total_used_bytes", osd_sum.statfs.get_used());
369 f->dump_int("total_used_raw_bytes", osd_sum.statfs.get_used_raw());
31f18b77 370 }
7c673cae 371
31f18b77
FG
372 // make non-negative; we can get negative values if osds send
373 // uncommitted stats and then "go backward" or if they are just
374 // buggy/wrong.
375 pool_stat_t pos_delta = pg_sum_delta;
376 pos_delta.floor(0);
377 if (pos_delta.stats.sum.num_rd ||
378 pos_delta.stats.sum.num_wr) {
379 if (out)
380 *out << "; ";
381 if (pos_delta.stats.sum.num_rd) {
382 int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)stamp_delta;
383 if (out)
1adf2230 384 *out << byte_u_t(rd) << "/s rd, ";
31f18b77
FG
385 if (f)
386 f->dump_unsigned("read_bytes_sec", rd);
387 }
388 if (pos_delta.stats.sum.num_wr) {
389 int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)stamp_delta;
390 if (out)
1adf2230 391 *out << byte_u_t(wr) << "/s wr, ";
31f18b77
FG
392 if (f)
393 f->dump_unsigned("write_bytes_sec", wr);
394 }
395 int64_t iops = (pos_delta.stats.sum.num_rd + pos_delta.stats.sum.num_wr) / (double)stamp_delta;
396 if (out)
11fdf7f2 397 *out << si_u_t(iops) << " op/s";
31f18b77
FG
398 if (f)
399 f->dump_unsigned("io_sec", iops);
7c673cae 400 }
31f18b77
FG
401
402 list<string> sl;
403 overall_recovery_summary(f, &sl);
404 if (out)
405 for (auto p = sl.begin(); p != sl.end(); ++p)
406 *out << "; " << *p;
407 std::stringstream ssr;
408 overall_recovery_rate_summary(f, &ssr);
409 if (out && ssr.str().length())
410 *out << "; " << ssr.str() << " recovering";
7c673cae
FG
411}
412
11fdf7f2
TL
413void PGMapDigest::get_recovery_stats(
414 double *misplaced_ratio,
415 double *degraded_ratio,
416 double *inactive_pgs_ratio,
417 double *unknown_pgs_ratio) const
418{
419 if (pg_sum.stats.sum.num_objects_degraded &&
420 pg_sum.stats.sum.num_object_copies > 0) {
421 *degraded_ratio = (double)pg_sum.stats.sum.num_objects_degraded /
422 (double)pg_sum.stats.sum.num_object_copies;
423 } else {
424 *degraded_ratio = 0;
425 }
426 if (pg_sum.stats.sum.num_objects_misplaced &&
427 pg_sum.stats.sum.num_object_copies > 0) {
428 *misplaced_ratio = (double)pg_sum.stats.sum.num_objects_misplaced /
429 (double)pg_sum.stats.sum.num_object_copies;
430 } else {
431 *misplaced_ratio = 0;
432 }
433 if (num_pg > 0) {
434 int num_pg_inactive = num_pg - num_pg_active - num_pg_unknown;
435 *inactive_pgs_ratio = (double)num_pg_inactive / (double)num_pg;
436 *unknown_pgs_ratio = (double)num_pg_unknown / (double)num_pg;
437 } else {
438 *inactive_pgs_ratio = 0;
439 *unknown_pgs_ratio = 0;
440 }
441}
442
31f18b77 443void PGMapDigest::recovery_summary(Formatter *f, list<string> *psl,
b32b8144 444 const pool_stat_t& pool_sum) const
7c673cae 445{
b32b8144
FG
446 if (pool_sum.stats.sum.num_objects_degraded && pool_sum.stats.sum.num_object_copies > 0) {
447 double pc = (double)pool_sum.stats.sum.num_objects_degraded /
448 (double)pool_sum.stats.sum.num_object_copies * (double)100.0;
31f18b77
FG
449 char b[20];
450 snprintf(b, sizeof(b), "%.3lf", pc);
451 if (f) {
b32b8144
FG
452 f->dump_unsigned("degraded_objects", pool_sum.stats.sum.num_objects_degraded);
453 f->dump_unsigned("degraded_total", pool_sum.stats.sum.num_object_copies);
31f18b77
FG
454 f->dump_float("degraded_ratio", pc / 100.0);
455 } else {
456 ostringstream ss;
b32b8144
FG
457 ss << pool_sum.stats.sum.num_objects_degraded
458 << "/" << pool_sum.stats.sum.num_object_copies << " objects degraded (" << b << "%)";
31f18b77
FG
459 psl->push_back(ss.str());
460 }
461 }
b32b8144
FG
462 if (pool_sum.stats.sum.num_objects_misplaced && pool_sum.stats.sum.num_object_copies > 0) {
463 double pc = (double)pool_sum.stats.sum.num_objects_misplaced /
464 (double)pool_sum.stats.sum.num_object_copies * (double)100.0;
31f18b77
FG
465 char b[20];
466 snprintf(b, sizeof(b), "%.3lf", pc);
467 if (f) {
b32b8144
FG
468 f->dump_unsigned("misplaced_objects", pool_sum.stats.sum.num_objects_misplaced);
469 f->dump_unsigned("misplaced_total", pool_sum.stats.sum.num_object_copies);
31f18b77
FG
470 f->dump_float("misplaced_ratio", pc / 100.0);
471 } else {
472 ostringstream ss;
b32b8144
FG
473 ss << pool_sum.stats.sum.num_objects_misplaced
474 << "/" << pool_sum.stats.sum.num_object_copies << " objects misplaced (" << b << "%)";
31f18b77
FG
475 psl->push_back(ss.str());
476 }
477 }
b32b8144
FG
478 if (pool_sum.stats.sum.num_objects_unfound && pool_sum.stats.sum.num_objects) {
479 double pc = (double)pool_sum.stats.sum.num_objects_unfound /
480 (double)pool_sum.stats.sum.num_objects * (double)100.0;
31f18b77
FG
481 char b[20];
482 snprintf(b, sizeof(b), "%.3lf", pc);
483 if (f) {
b32b8144
FG
484 f->dump_unsigned("unfound_objects", pool_sum.stats.sum.num_objects_unfound);
485 f->dump_unsigned("unfound_total", pool_sum.stats.sum.num_objects);
31f18b77
FG
486 f->dump_float("unfound_ratio", pc / 100.0);
487 } else {
488 ostringstream ss;
b32b8144
FG
489 ss << pool_sum.stats.sum.num_objects_unfound
490 << "/" << pool_sum.stats.sum.num_objects << " objects unfound (" << b << "%)";
31f18b77
FG
491 psl->push_back(ss.str());
492 }
7c673cae 493 }
7c673cae
FG
494}
495
31f18b77
FG
496void PGMapDigest::recovery_rate_summary(Formatter *f, ostream *out,
497 const pool_stat_t& delta_sum,
498 utime_t delta_stamp) const
7c673cae 499{
31f18b77
FG
500 // make non-negative; we can get negative values if osds send
501 // uncommitted stats and then "go backward" or if they are just
502 // buggy/wrong.
503 pool_stat_t pos_delta = delta_sum;
504 pos_delta.floor(0);
505 if (pos_delta.stats.sum.num_objects_recovered ||
506 pos_delta.stats.sum.num_bytes_recovered ||
507 pos_delta.stats.sum.num_keys_recovered) {
508 int64_t objps = pos_delta.stats.sum.num_objects_recovered / (double)delta_stamp;
509 int64_t bps = pos_delta.stats.sum.num_bytes_recovered / (double)delta_stamp;
510 int64_t kps = pos_delta.stats.sum.num_keys_recovered / (double)delta_stamp;
511 if (f) {
512 f->dump_int("recovering_objects_per_sec", objps);
513 f->dump_int("recovering_bytes_per_sec", bps);
514 f->dump_int("recovering_keys_per_sec", kps);
515 f->dump_int("num_objects_recovered", pos_delta.stats.sum.num_objects_recovered);
516 f->dump_int("num_bytes_recovered", pos_delta.stats.sum.num_bytes_recovered);
517 f->dump_int("num_keys_recovered", pos_delta.stats.sum.num_keys_recovered);
518 } else {
1adf2230 519 *out << byte_u_t(bps) << "/s";
31f18b77 520 if (pos_delta.stats.sum.num_keys_recovered)
11fdf7f2
TL
521 *out << ", " << si_u_t(kps) << " keys/s";
522 *out << ", " << si_u_t(objps) << " objects/s";
31f18b77 523 }
7c673cae 524 }
31f18b77 525}
7c673cae 526
31f18b77
FG
527void PGMapDigest::overall_recovery_rate_summary(Formatter *f, ostream *out) const
528{
529 recovery_rate_summary(f, out, pg_sum_delta, stamp_delta);
7c673cae
FG
530}
531
31f18b77 532void PGMapDigest::overall_recovery_summary(Formatter *f, list<string> *psl) const
7c673cae 533{
31f18b77 534 recovery_summary(f, psl, pg_sum);
7c673cae
FG
535}
536
31f18b77
FG
537void PGMapDigest::pool_recovery_rate_summary(Formatter *f, ostream *out,
538 uint64_t poolid) const
7c673cae 539{
31f18b77
FG
540 auto p = per_pool_sum_delta.find(poolid);
541 if (p == per_pool_sum_delta.end())
542 return;
7c673cae 543
31f18b77 544 auto ts = per_pool_sum_deltas_stamps.find(p->first);
11fdf7f2 545 ceph_assert(ts != per_pool_sum_deltas_stamps.end());
31f18b77
FG
546 recovery_rate_summary(f, out, p->second.first, ts->second);
547}
7c673cae 548
31f18b77
FG
549void PGMapDigest::pool_recovery_summary(Formatter *f, list<string> *psl,
550 uint64_t poolid) const
551{
b32b8144
FG
552 auto p = pg_pool_sum.find(poolid);
553 if (p == pg_pool_sum.end())
31f18b77 554 return;
7c673cae 555
b32b8144 556 recovery_summary(f, psl, p->second);
7c673cae
FG
557}
558
31f18b77
FG
559void PGMapDigest::client_io_rate_summary(Formatter *f, ostream *out,
560 const pool_stat_t& delta_sum,
561 utime_t delta_stamp) const
7c673cae 562{
31f18b77
FG
563 pool_stat_t pos_delta = delta_sum;
564 pos_delta.floor(0);
565 if (pos_delta.stats.sum.num_rd ||
566 pos_delta.stats.sum.num_wr) {
567 if (pos_delta.stats.sum.num_rd) {
568 int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)delta_stamp;
569 if (f) {
570 f->dump_int("read_bytes_sec", rd);
571 } else {
1adf2230 572 *out << byte_u_t(rd) << "/s rd, ";
31f18b77
FG
573 }
574 }
575 if (pos_delta.stats.sum.num_wr) {
576 int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)delta_stamp;
577 if (f) {
578 f->dump_int("write_bytes_sec", wr);
579 } else {
1adf2230 580 *out << byte_u_t(wr) << "/s wr, ";
31f18b77
FG
581 }
582 }
583 int64_t iops_rd = pos_delta.stats.sum.num_rd / (double)delta_stamp;
584 int64_t iops_wr = pos_delta.stats.sum.num_wr / (double)delta_stamp;
585 if (f) {
586 f->dump_int("read_op_per_sec", iops_rd);
587 f->dump_int("write_op_per_sec", iops_wr);
588 } else {
11fdf7f2 589 *out << si_u_t(iops_rd) << " op/s rd, " << si_u_t(iops_wr) << " op/s wr";
31f18b77 590 }
7c673cae
FG
591 }
592}
593
31f18b77 594void PGMapDigest::overall_client_io_rate_summary(Formatter *f, ostream *out) const
7c673cae 595{
31f18b77
FG
596 client_io_rate_summary(f, out, pg_sum_delta, stamp_delta);
597}
7c673cae 598
31f18b77
FG
599void PGMapDigest::pool_client_io_rate_summary(Formatter *f, ostream *out,
600 uint64_t poolid) const
601{
602 auto p = per_pool_sum_delta.find(poolid);
603 if (p == per_pool_sum_delta.end())
7c673cae
FG
604 return;
605
31f18b77 606 auto ts = per_pool_sum_deltas_stamps.find(p->first);
11fdf7f2 607 ceph_assert(ts != per_pool_sum_deltas_stamps.end());
31f18b77 608 client_io_rate_summary(f, out, p->second.first, ts->second);
7c673cae
FG
609}
610
31f18b77
FG
611void PGMapDigest::cache_io_rate_summary(Formatter *f, ostream *out,
612 const pool_stat_t& delta_sum,
613 utime_t delta_stamp) const
7c673cae 614{
31f18b77
FG
615 pool_stat_t pos_delta = delta_sum;
616 pos_delta.floor(0);
617 bool have_output = false;
7c673cae 618
31f18b77
FG
619 if (pos_delta.stats.sum.num_flush) {
620 int64_t flush = (pos_delta.stats.sum.num_flush_kb << 10) / (double)delta_stamp;
621 if (f) {
622 f->dump_int("flush_bytes_sec", flush);
623 } else {
1adf2230 624 *out << byte_u_t(flush) << "/s flush";
31f18b77 625 have_output = true;
7c673cae
FG
626 }
627 }
31f18b77
FG
628 if (pos_delta.stats.sum.num_evict) {
629 int64_t evict = (pos_delta.stats.sum.num_evict_kb << 10) / (double)delta_stamp;
630 if (f) {
631 f->dump_int("evict_bytes_sec", evict);
632 } else {
633 if (have_output)
634 *out << ", ";
1adf2230 635 *out << byte_u_t(evict) << "/s evict";
31f18b77
FG
636 have_output = true;
637 }
7c673cae 638 }
31f18b77
FG
639 if (pos_delta.stats.sum.num_promote) {
640 int64_t promote = pos_delta.stats.sum.num_promote / (double)delta_stamp;
641 if (f) {
642 f->dump_int("promote_op_per_sec", promote);
643 } else {
644 if (have_output)
645 *out << ", ";
11fdf7f2 646 *out << si_u_t(promote) << " op/s promote";
31f18b77
FG
647 have_output = true;
648 }
7c673cae 649 }
31f18b77
FG
650 if (pos_delta.stats.sum.num_flush_mode_low) {
651 if (f) {
652 f->dump_int("num_flush_mode_low", pos_delta.stats.sum.num_flush_mode_low);
653 } else {
654 if (have_output)
655 *out << ", ";
11fdf7f2 656 *out << si_u_t(pos_delta.stats.sum.num_flush_mode_low) << " PGs flushing";
31f18b77
FG
657 have_output = true;
658 }
7c673cae 659 }
31f18b77
FG
660 if (pos_delta.stats.sum.num_flush_mode_high) {
661 if (f) {
662 f->dump_int("num_flush_mode_high", pos_delta.stats.sum.num_flush_mode_high);
663 } else {
664 if (have_output)
665 *out << ", ";
11fdf7f2 666 *out << si_u_t(pos_delta.stats.sum.num_flush_mode_high) << " PGs flushing (high)";
31f18b77
FG
667 have_output = true;
668 }
7c673cae 669 }
31f18b77
FG
670 if (pos_delta.stats.sum.num_evict_mode_some) {
671 if (f) {
672 f->dump_int("num_evict_mode_some", pos_delta.stats.sum.num_evict_mode_some);
673 } else {
674 if (have_output)
675 *out << ", ";
11fdf7f2 676 *out << si_u_t(pos_delta.stats.sum.num_evict_mode_some) << " PGs evicting";
31f18b77
FG
677 have_output = true;
678 }
679 }
680 if (pos_delta.stats.sum.num_evict_mode_full) {
681 if (f) {
682 f->dump_int("num_evict_mode_full", pos_delta.stats.sum.num_evict_mode_full);
683 } else {
684 if (have_output)
685 *out << ", ";
11fdf7f2 686 *out << si_u_t(pos_delta.stats.sum.num_evict_mode_full) << " PGs evicting (full)";
31f18b77 687 }
7c673cae
FG
688 }
689}
690
31f18b77 691void PGMapDigest::overall_cache_io_rate_summary(Formatter *f, ostream *out) const
7c673cae 692{
31f18b77 693 cache_io_rate_summary(f, out, pg_sum_delta, stamp_delta);
7c673cae
FG
694}
695
31f18b77
FG
696void PGMapDigest::pool_cache_io_rate_summary(Formatter *f, ostream *out,
697 uint64_t poolid) const
7c673cae 698{
31f18b77
FG
699 auto p = per_pool_sum_delta.find(poolid);
700 if (p == per_pool_sum_delta.end())
701 return;
7c673cae 702
31f18b77 703 auto ts = per_pool_sum_deltas_stamps.find(p->first);
11fdf7f2 704 ceph_assert(ts != per_pool_sum_deltas_stamps.end());
31f18b77 705 cache_io_rate_summary(f, out, p->second.first, ts->second);
7c673cae
FG
706}
707
d2e6a577
FG
708ceph_statfs PGMapDigest::get_statfs(OSDMap &osdmap,
709 boost::optional<int64_t> data_pool) const
710{
711 ceph_statfs statfs;
712 bool filter = false;
713 object_stat_sum_t sum;
714
715 if (data_pool) {
716 auto i = pg_pool_sum.find(*data_pool);
717 if (i != pg_pool_sum.end()) {
718 sum = i->second.stats.sum;
719 filter = true;
720 }
721 }
722
723 if (filter) {
724 statfs.kb_used = (sum.num_bytes >> 10);
725 statfs.kb_avail = get_pool_free_space(osdmap, *data_pool) >> 10;
726 statfs.num_objects = sum.num_objects;
727 statfs.kb = statfs.kb_used + statfs.kb_avail;
728 } else {
729 // these are in KB.
11fdf7f2
TL
730 statfs.kb = osd_sum.statfs.kb();
731 statfs.kb_used = osd_sum.statfs.kb_used_raw();
732 statfs.kb_avail = osd_sum.statfs.kb_avail();
d2e6a577
FG
733 statfs.num_objects = pg_sum.stats.sum.num_objects;
734 }
735
736 return statfs;
737}
738
31f18b77
FG
739void PGMapDigest::dump_pool_stats_full(
740 const OSDMap &osd_map,
741 stringstream *ss,
742 Formatter *f,
743 bool verbose) const
7c673cae 744{
31f18b77 745 TextTable tbl;
7c673cae 746
31f18b77
FG
747 if (f) {
748 f->open_array_section("pools");
749 } else {
11fdf7f2
TL
750 tbl.define_column("POOL", TextTable::LEFT, TextTable::LEFT);
751 tbl.define_column("ID", TextTable::LEFT, TextTable::RIGHT);
752 tbl.define_column("STORED", TextTable::LEFT, TextTable::RIGHT);
753 tbl.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT);
31f18b77
FG
754 tbl.define_column("USED", TextTable::LEFT, TextTable::RIGHT);
755 tbl.define_column("%USED", TextTable::LEFT, TextTable::RIGHT);
756 tbl.define_column("MAX AVAIL", TextTable::LEFT, TextTable::RIGHT);
11fdf7f2 757
31f18b77 758 if (verbose) {
11fdf7f2
TL
759 tbl.define_column("QUOTA OBJECTS", TextTable::LEFT, TextTable::LEFT);
760 tbl.define_column("QUOTA BYTES", TextTable::LEFT, TextTable::LEFT);
31f18b77 761 tbl.define_column("DIRTY", TextTable::LEFT, TextTable::RIGHT);
11fdf7f2
TL
762 tbl.define_column("USED COMPR", TextTable::LEFT, TextTable::RIGHT);
763 tbl.define_column("UNDER COMPR", TextTable::LEFT, TextTable::RIGHT);
31f18b77
FG
764 }
765 }
766
767 map<int,uint64_t> avail_by_rule;
768 for (auto p = osd_map.get_pools().begin();
769 p != osd_map.get_pools().end(); ++p) {
770 int64_t pool_id = p->first;
771 if ((pool_id < 0) || (pg_pool_sum.count(pool_id) == 0))
772 continue;
11fdf7f2 773
31f18b77
FG
774 const string& pool_name = osd_map.get_pool_name(pool_id);
775 const pool_stat_t &stat = pg_pool_sum.at(pool_id);
776
777 const pg_pool_t *pool = osd_map.get_pg_pool(pool_id);
778 int ruleno = osd_map.crush->find_rule(pool->get_crush_rule(),
779 pool->get_type(),
780 pool->get_size());
781 int64_t avail;
31f18b77
FG
782 if (avail_by_rule.count(ruleno) == 0) {
783 // FIXME: we don't guarantee avail_space_by_rule is up-to-date before this function is invoked
784 avail = get_rule_avail(ruleno);
785 if (avail < 0)
786 avail = 0;
787 avail_by_rule[ruleno] = avail;
788 } else {
789 avail = avail_by_rule[ruleno];
790 }
31f18b77
FG
791 if (f) {
792 f->open_object_section("pool");
793 f->dump_string("name", pool_name);
794 f->dump_int("id", pool_id);
795 f->open_object_section("stats");
796 } else {
797 tbl << pool_name
798 << pool_id;
31f18b77 799 }
11fdf7f2 800 float raw_used_rate = osd_map.pool_raw_used_rate(pool_id);
81eedcae
TL
801 bool per_pool = use_per_pool_stats();
802 dump_object_stat_sum(tbl, f, stat, avail, raw_used_rate, verbose, per_pool,
803 pool);
11fdf7f2 804 if (f) {
31f18b77 805 f->close_section(); // stats
31f18b77 806 f->close_section(); // pool
11fdf7f2
TL
807 } else {
808 tbl << TextTable::endrow;
809 }
31f18b77
FG
810 }
811 if (f)
812 f->close_section();
813 else {
11fdf7f2 814 ceph_assert(ss != nullptr);
31f18b77
FG
815 *ss << "POOLS:\n";
816 tbl.set_indent(4);
817 *ss << tbl;
818 }
819}
820
11fdf7f2
TL
821void PGMapDigest::dump_cluster_stats(stringstream *ss,
822 Formatter *f,
823 bool verbose) const
31f18b77
FG
824{
825 if (f) {
826 f->open_object_section("stats");
11fdf7f2
TL
827 f->dump_int("total_bytes", osd_sum.statfs.total);
828 f->dump_int("total_avail_bytes", osd_sum.statfs.available);
829 f->dump_int("total_used_bytes", osd_sum.statfs.get_used());
830 f->dump_int("total_used_raw_bytes", osd_sum.statfs.get_used_raw());
831 f->dump_float("total_used_raw_ratio", osd_sum.statfs.get_used_raw_ratio());
81eedcae
TL
832 f->dump_unsigned("num_osds", osd_sum.num_osds);
833 f->dump_unsigned("num_per_pool_osds", osd_sum.num_per_pool_osds);
11fdf7f2
TL
834 f->close_section();
835 f->open_object_section("stats_by_class");
836 for (auto& i : osd_sum_by_class) {
837 f->open_object_section(i.first.c_str());
838 f->dump_int("total_bytes", i.second.statfs.total);
839 f->dump_int("total_avail_bytes", i.second.statfs.available);
840 f->dump_int("total_used_bytes", i.second.statfs.get_used());
841 f->dump_int("total_used_raw_bytes", i.second.statfs.get_used_raw());
842 f->dump_float("total_used_raw_ratio",
843 i.second.statfs.get_used_raw_ratio());
844 f->close_section();
31f18b77
FG
845 }
846 f->close_section();
847 } else {
11fdf7f2 848 ceph_assert(ss != nullptr);
31f18b77 849 TextTable tbl;
11fdf7f2 850 tbl.define_column("CLASS", TextTable::LEFT, TextTable::LEFT);
31f18b77
FG
851 tbl.define_column("SIZE", TextTable::LEFT, TextTable::RIGHT);
852 tbl.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT);
11fdf7f2 853 tbl.define_column("USED", TextTable::LEFT, TextTable::RIGHT);
31f18b77
FG
854 tbl.define_column("RAW USED", TextTable::LEFT, TextTable::RIGHT);
855 tbl.define_column("%RAW USED", TextTable::LEFT, TextTable::RIGHT);
31f18b77 856
11fdf7f2
TL
857
858 for (auto& i : osd_sum_by_class) {
859 tbl << i.first;
860 tbl << stringify(byte_u_t(i.second.statfs.total))
861 << stringify(byte_u_t(i.second.statfs.available))
862 << stringify(byte_u_t(i.second.statfs.get_used()))
863 << stringify(byte_u_t(i.second.statfs.get_used_raw()))
864 << percentify(i.second.statfs.get_used_raw_ratio()*100.0)
865 << TextTable::endrow;
866 }
867 tbl << "TOTAL";
868 tbl << stringify(byte_u_t(osd_sum.statfs.total))
869 << stringify(byte_u_t(osd_sum.statfs.available))
870 << stringify(byte_u_t(osd_sum.statfs.get_used()))
871 << stringify(byte_u_t(osd_sum.statfs.get_used_raw()))
872 << percentify(osd_sum.statfs.get_used_raw_ratio()*100.0)
873 << TextTable::endrow;
874
875 *ss << "RAW STORAGE:\n";
31f18b77
FG
876 tbl.set_indent(4);
877 *ss << tbl;
878 }
879}
880
881void PGMapDigest::dump_object_stat_sum(
882 TextTable &tbl, Formatter *f,
11fdf7f2 883 const pool_stat_t &pool_stat, uint64_t avail,
81eedcae 884 float raw_used_rate, bool verbose, bool per_pool,
31f18b77
FG
885 const pg_pool_t *pool)
886{
11fdf7f2
TL
887 const object_stat_sum_t &sum = pool_stat.stats.sum;
888 const store_statfs_t statfs = pool_stat.store_stats;
889
890 if (sum.num_object_copies > 0) {
891 raw_used_rate *= (float)(sum.num_object_copies - sum.num_objects_degraded) / sum.num_object_copies;
892 }
81eedcae
TL
893
894 uint64_t used_bytes = pool_stat.get_allocated_bytes(per_pool);
31f18b77
FG
895
896 float used = 0.0;
3efd9988 897 // note avail passed in is raw_avail, calc raw_used here.
31f18b77 898 if (avail) {
11fdf7f2 899 used = used_bytes;
31f18b77 900 used /= used + avail;
11fdf7f2 901 } else if (used_bytes) {
31f18b77
FG
902 used = 1.0;
903 }
11fdf7f2
TL
904 auto avail_res = raw_used_rate ? avail / raw_used_rate : 0;
905 // an approximation for actually stored user data
81eedcae 906 auto stored_normalized = pool_stat.get_user_bytes(raw_used_rate, per_pool);
31f18b77 907 if (f) {
11fdf7f2 908 f->dump_int("stored", stored_normalized);
31f18b77 909 f->dump_int("objects", sum.num_objects);
11fdf7f2
TL
910 f->dump_int("kb_used", shift_round_up(used_bytes, 10));
911 f->dump_int("bytes_used", used_bytes);
912 f->dump_float("percent_used", used);
913 f->dump_unsigned("max_avail", avail_res);
31f18b77
FG
914 if (verbose) {
915 f->dump_int("quota_objects", pool->quota_max_objects);
916 f->dump_int("quota_bytes", pool->quota_max_bytes);
917 f->dump_int("dirty", sum.num_objects_dirty);
918 f->dump_int("rd", sum.num_rd);
919 f->dump_int("rd_bytes", sum.num_rd_kb * 1024ull);
920 f->dump_int("wr", sum.num_wr);
921 f->dump_int("wr_bytes", sum.num_wr_kb * 1024ull);
11fdf7f2
TL
922 f->dump_int("compress_bytes_used", statfs.data_compressed_allocated);
923 f->dump_int("compress_under_bytes", statfs.data_compressed_original);
924 // Stored by user amplified by replication
81eedcae 925 f->dump_int("stored_raw", pool_stat.get_user_bytes(1.0, per_pool));
31f18b77
FG
926 }
927 } else {
11fdf7f2
TL
928 tbl << stringify(byte_u_t(stored_normalized));
929 tbl << stringify(si_u_t(sum.num_objects));
930 tbl << stringify(byte_u_t(used_bytes));
31f18b77 931 tbl << percentify(used*100);
11fdf7f2 932 tbl << stringify(byte_u_t(avail_res));
31f18b77 933 if (verbose) {
11fdf7f2
TL
934 if (pool->quota_max_objects == 0)
935 tbl << "N/A";
936 else
937 tbl << stringify(si_u_t(pool->quota_max_objects));
938
939 if (pool->quota_max_bytes == 0)
940 tbl << "N/A";
941 else
942 tbl << stringify(byte_u_t(pool->quota_max_bytes));
943
1adf2230 944 tbl << stringify(si_u_t(sum.num_objects_dirty))
11fdf7f2
TL
945 << stringify(byte_u_t(statfs.data_compressed_allocated))
946 << stringify(byte_u_t(statfs.data_compressed_original))
947 ;
31f18b77
FG
948 }
949 }
950}
951
d2e6a577
FG
952int64_t PGMapDigest::get_pool_free_space(const OSDMap &osd_map,
953 int64_t poolid) const
954{
955 const pg_pool_t *pool = osd_map.get_pg_pool(poolid);
956 int ruleno = osd_map.crush->find_rule(pool->get_crush_rule(),
957 pool->get_type(),
958 pool->get_size());
959 int64_t avail;
960 avail = get_rule_avail(ruleno);
961 if (avail < 0)
962 avail = 0;
963
11fdf7f2 964 return avail / osd_map.pool_raw_used_rate(poolid);
d2e6a577
FG
965}
966
31f18b77
FG
967int64_t PGMap::get_rule_avail(const OSDMap& osdmap, int ruleno) const
968{
969 map<int,float> wm;
970 int r = osdmap.crush->get_rule_weight_osd_map(ruleno, &wm);
971 if (r < 0) {
972 return r;
973 }
974 if (wm.empty()) {
975 return 0;
976 }
977
11fdf7f2 978 float fratio = osdmap.get_full_ratio();
31f18b77
FG
979
980 int64_t min = -1;
981 for (auto p = wm.begin(); p != wm.end(); ++p) {
982 auto osd_info = osd_stat.find(p->first);
983 if (osd_info != osd_stat.end()) {
11fdf7f2 984 if (osd_info->second.statfs.total == 0 || p->second == 0) {
31f18b77
FG
985 // osd must be out, hence its stats have been zeroed
986 // (unless we somehow managed to have a disk with size 0...)
987 //
988 // (p->second == 0), if osd weight is 0, no need to
989 // calculate proj below.
990 continue;
991 }
11fdf7f2 992 double unusable = (double)osd_info->second.statfs.kb() *
31f18b77 993 (1.0 - fratio);
11fdf7f2 994 double avail = std::max(0.0, (double)osd_info->second.statfs.kb_avail() - unusable);
31f18b77
FG
995 avail *= 1024.0;
996 int64_t proj = (int64_t)(avail / (double)p->second);
997 if (min < 0 || proj < min) {
998 min = proj;
999 }
1000 } else {
94b18763
FG
1001 if (osdmap.is_up(p->first)) {
1002 // This is a level 4 rather than an error, because we might have
1003 // only just started, and not received the first stats message yet.
1004 dout(4) << "OSD " << p->first << " is up, but has no stats" << dendl;
1005 }
31f18b77
FG
1006 }
1007 }
1008 return min;
1009}
1010
1011void PGMap::get_rules_avail(const OSDMap& osdmap,
1012 std::map<int,int64_t> *avail_map) const
1013{
1014 avail_map->clear();
1015 for (auto p : osdmap.get_pools()) {
1016 int64_t pool_id = p.first;
1017 if ((pool_id < 0) || (pg_pool_sum.count(pool_id) == 0))
1018 continue;
1019 const pg_pool_t *pool = osdmap.get_pg_pool(pool_id);
1020 int ruleno = osdmap.crush->find_rule(pool->get_crush_rule(),
1021 pool->get_type(),
1022 pool->get_size());
1023 if (avail_map->count(ruleno) == 0)
1024 (*avail_map)[ruleno] = get_rule_avail(osdmap, ruleno);
1025 }
1026}
1027
1028// ---------------------
1029// PGMap
1030
31f18b77 1031void PGMap::Incremental::dump(Formatter *f) const
7c673cae
FG
1032{
1033 f->dump_unsigned("version", version);
1034 f->dump_stream("stamp") << stamp;
31f18b77
FG
1035 f->dump_unsigned("osdmap_epoch", osdmap_epoch);
1036 f->dump_unsigned("pg_scan_epoch", pg_scan);
7c673cae 1037
31f18b77
FG
1038 f->open_array_section("pg_stat_updates");
1039 for (auto p = pg_stat_updates.begin(); p != pg_stat_updates.end(); ++p) {
1040 f->open_object_section("pg_stat");
1041 f->dump_stream("pgid") << p->first;
1042 p->second.dump(f);
1043 f->close_section();
1044 }
7c673cae
FG
1045 f->close_section();
1046
31f18b77
FG
1047 f->open_array_section("osd_stat_updates");
1048 for (auto p = osd_stat_updates.begin(); p != osd_stat_updates.end(); ++p) {
1049 f->open_object_section("osd_stat");
1050 f->dump_int("osd", p->first);
1051 p->second.dump(f);
7c673cae
FG
1052 f->close_section();
1053 }
1054 f->close_section();
11fdf7f2
TL
1055 f->open_array_section("pool_statfs_updates");
1056 for (auto p = pool_statfs_updates.begin(); p != pool_statfs_updates.end(); ++p) {
1057 f->open_object_section("pool_statfs");
1058 f->dump_stream("poolid/osd") << p->first;
1059 p->second.dump(f);
1060 f->close_section();
1061 }
1062 f->close_section();
7c673cae 1063
31f18b77
FG
1064 f->open_array_section("osd_stat_removals");
1065 for (auto p = osd_stat_rm.begin(); p != osd_stat_rm.end(); ++p)
1066 f->dump_int("osd", *p);
7c673cae 1067 f->close_section();
7c673cae 1068
31f18b77
FG
1069 f->open_array_section("pg_removals");
1070 for (auto p = pg_remove.begin(); p != pg_remove.end(); ++p)
1071 f->dump_stream("pgid") << *p;
7c673cae
FG
1072 f->close_section();
1073}
1074
31f18b77 1075void PGMap::Incremental::generate_test_instances(list<PGMap::Incremental*>& o)
7c673cae 1076{
31f18b77
FG
1077 o.push_back(new Incremental);
1078 o.push_back(new Incremental);
1079 o.back()->version = 1;
1080 o.back()->stamp = utime_t(123,345);
1081 o.push_back(new Incremental);
1082 o.back()->version = 2;
11fdf7f2 1083 o.back()->pg_stat_updates[pg_t(1,2)] = pg_stat_t();
31f18b77 1084 o.back()->osd_stat_updates[5] = osd_stat_t();
31f18b77
FG
1085 o.push_back(new Incremental);
1086 o.back()->version = 3;
1087 o.back()->osdmap_epoch = 1;
1088 o.back()->pg_scan = 2;
11fdf7f2 1089 o.back()->pg_stat_updates[pg_t(4,5)] = pg_stat_t();
31f18b77 1090 o.back()->osd_stat_updates[6] = osd_stat_t();
11fdf7f2 1091 o.back()->pg_remove.insert(pg_t(1,2));
31f18b77 1092 o.back()->osd_stat_rm.insert(5);
11fdf7f2 1093 o.back()->pool_statfs_updates[std::make_pair(1234,4)] = store_statfs_t();
7c673cae
FG
1094}
1095
31f18b77
FG
1096// --
1097
1098void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
7c673cae 1099{
11fdf7f2 1100 ceph_assert(inc.version == version+1);
31f18b77 1101 version++;
7c673cae 1102
31f18b77 1103 pool_stat_t pg_sum_old = pg_sum;
11fdf7f2
TL
1104 mempool::pgmap::unordered_map<int32_t, pool_stat_t> pg_pool_sum_old;
1105 pg_pool_sum_old = pg_pool_sum;
7c673cae 1106
31f18b77
FG
1107 for (auto p = inc.pg_stat_updates.begin();
1108 p != inc.pg_stat_updates.end();
1109 ++p) {
1110 const pg_t &update_pg(p->first);
11fdf7f2 1111 auto update_pool = update_pg.pool();
31f18b77 1112 const pg_stat_t &update_stat(p->second);
7c673cae 1113
11fdf7f2
TL
1114 auto pg_stat_iter = pg_stat.find(update_pg);
1115 pool_stat_t &pool_sum_ref = pg_pool_sum[update_pool];
1116 if (pg_stat_iter == pg_stat.end()) {
31f18b77
FG
1117 pg_stat.insert(make_pair(update_pg, update_stat));
1118 } else {
11fdf7f2
TL
1119 stat_pg_sub(update_pg, pg_stat_iter->second);
1120 pool_sum_ref.sub(pg_stat_iter->second);
1121 pg_stat_iter->second = update_stat;
7c673cae 1122 }
31f18b77 1123 stat_pg_add(update_pg, update_stat);
11fdf7f2 1124 pool_sum_ref.add(update_stat);
7c673cae 1125 }
11fdf7f2
TL
1126
1127 for (auto p = inc.pool_statfs_updates.begin();
1128 p != inc.pool_statfs_updates.end();
1129 ++p) {
1130 auto update_pool = p->first.first;
1131 auto update_osd = p->first.second;
1132 auto& statfs_inc = p->second;
1133
1134 auto pool_statfs_iter =
1135 pool_statfs.find(std::make_pair(update_pool, update_osd));
1136 pool_stat_t &pool_sum_ref = pg_pool_sum[update_pool];
1137 if (pool_statfs_iter == pool_statfs.end()) {
1138 pool_statfs.emplace(std::make_pair(update_pool, update_osd), statfs_inc);
1139 } else {
1140 pool_sum_ref.sub(pool_statfs_iter->second);
1141 pool_statfs_iter->second = statfs_inc;
1142 }
1143 pool_sum_ref.add(statfs_inc);
1144 }
1145
31f18b77
FG
1146 for (auto p = inc.get_osd_stat_updates().begin();
1147 p != inc.get_osd_stat_updates().end();
1148 ++p) {
1149 int osd = p->first;
1150 const osd_stat_t &new_stats(p->second);
7c673cae 1151
31f18b77
FG
1152 auto t = osd_stat.find(osd);
1153 if (t == osd_stat.end()) {
1154 osd_stat.insert(make_pair(osd, new_stats));
1155 } else {
1156 stat_osd_sub(t->first, t->second);
1157 t->second = new_stats;
1158 }
31f18b77 1159 stat_osd_add(osd, new_stats);
31f18b77
FG
1160 }
1161 set<int64_t> deleted_pools;
1162 for (auto p = inc.pg_remove.begin();
1163 p != inc.pg_remove.end();
1164 ++p) {
1165 const pg_t &removed_pg(*p);
1166 auto s = pg_stat.find(removed_pg);
11fdf7f2 1167 bool pool_erased = false;
31f18b77 1168 if (s != pg_stat.end()) {
11fdf7f2 1169 pool_erased = stat_pg_sub(removed_pg, s->second);
31f18b77 1170 pg_stat.erase(s);
11fdf7f2
TL
1171 if (pool_erased) {
1172 deleted_pools.insert(removed_pg.pool());
1173 }
31f18b77 1174 }
7c673cae
FG
1175 }
1176
31f18b77
FG
1177 for (auto p = inc.get_osd_stat_rm().begin();
1178 p != inc.get_osd_stat_rm().end();
7c673cae 1179 ++p) {
31f18b77
FG
1180 auto t = osd_stat.find(*p);
1181 if (t != osd_stat.end()) {
1182 stat_osd_sub(t->first, t->second);
1183 osd_stat.erase(t);
31f18b77 1184 }
11fdf7f2
TL
1185 for (auto i = pool_statfs.begin(); i != pool_statfs.end(); ++i) {
1186 if (i->first.second == *p) {
1187 pg_pool_sum[i->first.first].sub(i->second);
1188 pool_statfs.erase(i);
1189 }
1190 }
7c673cae
FG
1191 }
1192
b32b8144
FG
1193 // skip calculating delta while sum was not synchronized
1194 if (!stamp.is_zero() && !pg_sum_old.stats.sum.is_zero()) {
1195 utime_t delta_t;
1196 delta_t = inc.stamp;
1197 delta_t -= stamp;
1198 // calculate a delta, and average over the last 2 deltas.
1199 pool_stat_t d = pg_sum;
1200 d.stats.sub(pg_sum_old.stats);
1201 pg_sum_deltas.push_back(make_pair(d, delta_t));
1202 stamp_delta += delta_t;
1203 pg_sum_delta.stats.add(d.stats);
1204 auto smooth_intervals =
11fdf7f2
TL
1205 cct ? cct->_conf.get_val<uint64_t>("mon_stat_smooth_intervals") : 1;
1206 while (pg_sum_deltas.size() > smooth_intervals) {
b32b8144
FG
1207 pg_sum_delta.stats.sub(pg_sum_deltas.front().first.stats);
1208 stamp_delta -= pg_sum_deltas.front().second;
1209 pg_sum_deltas.pop_front();
1210 }
31f18b77 1211 }
b32b8144 1212 stamp = inc.stamp;
7c673cae 1213
31f18b77 1214 update_pool_deltas(cct, inc.stamp, pg_pool_sum_old);
7c673cae 1215
31f18b77
FG
1216 for (auto p : deleted_pools) {
1217 if (cct)
1218 dout(20) << " deleted pool " << p << dendl;
1219 deleted_pool(p);
1220 }
7c673cae 1221
31f18b77
FG
1222 if (inc.osdmap_epoch)
1223 last_osdmap_epoch = inc.osdmap_epoch;
1224 if (inc.pg_scan)
1225 last_pg_scan = inc.pg_scan;
7c673cae
FG
1226}
1227
31f18b77 1228void PGMap::calc_stats()
7c673cae 1229{
31f18b77
FG
1230 num_pg = 0;
1231 num_pg_active = 0;
1232 num_pg_unknown = 0;
1233 num_osd = 0;
1234 pg_pool_sum.clear();
1235 num_pg_by_pool.clear();
1236 pg_by_osd.clear();
1237 pg_sum = pool_stat_t();
1238 osd_sum = osd_stat_t();
11fdf7f2 1239 osd_sum_by_class.clear();
31f18b77 1240 num_pg_by_state.clear();
11fdf7f2 1241 num_pg_by_pool_state.clear();
31f18b77 1242 num_pg_by_osd.clear();
7c673cae 1243
31f18b77
FG
1244 for (auto p = pg_stat.begin();
1245 p != pg_stat.end();
1246 ++p) {
11fdf7f2
TL
1247 auto pg = p->first;
1248 stat_pg_add(pg, p->second);
1249 pg_pool_sum[pg.pool()].add(p->second);
1250 }
1251 for (auto p = pool_statfs.begin();
1252 p != pool_statfs.end();
1253 ++p) {
1254 auto pool = p->first.first;
1255 pg_pool_sum[pool].add(p->second);
31f18b77
FG
1256 }
1257 for (auto p = osd_stat.begin();
1258 p != osd_stat.end();
1259 ++p)
1260 stat_osd_add(p->first, p->second);
7c673cae
FG
1261}
1262
31f18b77
FG
1263void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s,
1264 bool sameosds)
7c673cae 1265{
11fdf7f2 1266 auto pool = pgid.pool();
31f18b77 1267 pg_sum.add(s);
7c673cae 1268
31f18b77
FG
1269 num_pg++;
1270 num_pg_by_state[s.state]++;
11fdf7f2
TL
1271 num_pg_by_pool_state[pgid.pool()][s.state]++;
1272 num_pg_by_pool[pool]++;
7c673cae 1273
31f18b77
FG
1274 if ((s.state & PG_STATE_CREATING) &&
1275 s.parent_split_bits == 0) {
1276 creating_pgs.insert(pgid);
1277 if (s.acting_primary >= 0) {
1278 creating_pgs_by_osd_epoch[s.acting_primary][s.mapping_epoch].insert(pgid);
7c673cae
FG
1279 }
1280 }
1281
31f18b77
FG
1282 if (s.state & PG_STATE_ACTIVE) {
1283 ++num_pg_active;
1284 }
1285 if (s.state == 0) {
1286 ++num_pg_unknown;
7c673cae
FG
1287 }
1288
31f18b77
FG
1289 if (sameosds)
1290 return;
7c673cae 1291
31f18b77
FG
1292 for (auto p = s.blocked_by.begin();
1293 p != s.blocked_by.end();
1294 ++p) {
1295 ++blocked_by_sum[*p];
7c673cae 1296 }
31f18b77
FG
1297
1298 for (auto p = s.acting.begin(); p != s.acting.end(); ++p) {
1299 pg_by_osd[*p].insert(pgid);
1300 num_pg_by_osd[*p].acting++;
1301 }
1302 for (auto p = s.up.begin(); p != s.up.end(); ++p) {
81eedcae
TL
1303 auto& t = pg_by_osd[*p];
1304 if (t.find(pgid) == t.end()) {
1305 t.insert(pgid);
1306 num_pg_by_osd[*p].up_not_acting++;
1307 }
7c673cae 1308 }
7c673cae 1309
31f18b77
FG
1310 if (s.up_primary >= 0) {
1311 num_pg_by_osd[s.up_primary].primary++;
7c673cae 1312 }
7c673cae 1313}
31f18b77 1314
11fdf7f2 1315bool PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s,
31f18b77 1316 bool sameosds)
7c673cae 1317{
11fdf7f2 1318 bool pool_erased = false;
31f18b77
FG
1319 pg_sum.sub(s);
1320
1321 num_pg--;
1322 int end = --num_pg_by_state[s.state];
11fdf7f2 1323 ceph_assert(end >= 0);
31f18b77
FG
1324 if (end == 0)
1325 num_pg_by_state.erase(s.state);
11fdf7f2
TL
1326 if (--num_pg_by_pool_state[pgid.pool()][s.state] == 0) {
1327 num_pg_by_pool_state[pgid.pool()].erase(s.state);
1328 }
31f18b77
FG
1329 end = --num_pg_by_pool[pgid.pool()];
1330 if (end == 0) {
11fdf7f2 1331 pool_erased = true;
7c673cae 1332 }
7c673cae 1333
31f18b77
FG
1334 if ((s.state & PG_STATE_CREATING) &&
1335 s.parent_split_bits == 0) {
1336 creating_pgs.erase(pgid);
1337 if (s.acting_primary >= 0) {
1338 map<epoch_t,set<pg_t> >& r = creating_pgs_by_osd_epoch[s.acting_primary];
1339 r[s.mapping_epoch].erase(pgid);
1340 if (r[s.mapping_epoch].empty())
1341 r.erase(s.mapping_epoch);
1342 if (r.empty())
1343 creating_pgs_by_osd_epoch.erase(s.acting_primary);
7c673cae
FG
1344 }
1345 }
31f18b77
FG
1346
1347 if (s.state & PG_STATE_ACTIVE) {
1348 --num_pg_active;
1349 }
1350 if (s.state == 0) {
1351 --num_pg_unknown;
1352 }
1353
1354 if (sameosds)
11fdf7f2 1355 return pool_erased;
31f18b77
FG
1356
1357 for (auto p = s.blocked_by.begin();
1358 p != s.blocked_by.end();
1359 ++p) {
1360 auto q = blocked_by_sum.find(*p);
11fdf7f2 1361 ceph_assert(q != blocked_by_sum.end());
31f18b77
FG
1362 --q->second;
1363 if (q->second == 0)
1364 blocked_by_sum.erase(q);
1365 }
1366
81eedcae 1367 set<int32_t> actingset;
31f18b77 1368 for (auto p = s.acting.begin(); p != s.acting.end(); ++p) {
81eedcae 1369 actingset.insert(*p);
31f18b77
FG
1370 auto& oset = pg_by_osd[*p];
1371 oset.erase(pgid);
1372 if (oset.empty())
1373 pg_by_osd.erase(*p);
1374 auto it = num_pg_by_osd.find(*p);
1375 if (it != num_pg_by_osd.end() && it->second.acting > 0)
1376 it->second.acting--;
1377 }
1378 for (auto p = s.up.begin(); p != s.up.end(); ++p) {
1379 auto& oset = pg_by_osd[*p];
1380 oset.erase(pgid);
1381 if (oset.empty())
1382 pg_by_osd.erase(*p);
81eedcae
TL
1383 if (actingset.count(*p))
1384 continue;
31f18b77 1385 auto it = num_pg_by_osd.find(*p);
81eedcae
TL
1386 if (it != num_pg_by_osd.end() && it->second.up_not_acting > 0)
1387 it->second.up_not_acting--;
31f18b77
FG
1388 }
1389
1390 if (s.up_primary >= 0) {
1391 auto it = num_pg_by_osd.find(s.up_primary);
1392 if (it != num_pg_by_osd.end() && it->second.primary > 0)
1393 it->second.primary--;
1394 }
11fdf7f2
TL
1395 return pool_erased;
1396}
1397
1398void PGMap::calc_purged_snaps()
1399{
1400 purged_snaps.clear();
1401 set<int64_t> unknown;
1402 for (auto& i : pg_stat) {
1403 if (i.second.state == 0) {
1404 unknown.insert(i.first.pool());
1405 purged_snaps.erase(i.first.pool());
1406 continue;
1407 } else if (unknown.count(i.first.pool())) {
1408 continue;
1409 }
1410 auto j = purged_snaps.find(i.first.pool());
1411 if (j == purged_snaps.end()) {
1412 // base case
1413 purged_snaps[i.first.pool()] = i.second.purged_snaps;
1414 } else {
1415 j->second.intersection_of(i.second.purged_snaps);
1416 }
1417 }
31f18b77
FG
1418}
1419
11fdf7f2 1420void PGMap::calc_osd_sum_by_class(const OSDMap& osdmap)
31f18b77 1421{
11fdf7f2
TL
1422 osd_sum_by_class.clear();
1423 for (auto& i : osd_stat) {
1424 const char *class_name = osdmap.crush->get_item_class(i.first);
1425 if (class_name) {
1426 osd_sum_by_class[class_name].add(i.second);
1427 }
1428 }
31f18b77
FG
1429}
1430
1431void PGMap::stat_osd_add(int osd, const osd_stat_t &s)
1432{
1433 num_osd++;
1434 osd_sum.add(s);
1435 if (osd >= (int)osd_last_seq.size()) {
1436 osd_last_seq.resize(osd + 1);
1437 }
1438 osd_last_seq[osd] = s.seq;
1439}
1440
1441void PGMap::stat_osd_sub(int osd, const osd_stat_t &s)
1442{
1443 num_osd--;
1444 osd_sum.sub(s);
11fdf7f2 1445 ceph_assert(osd < (int)osd_last_seq.size());
31f18b77
FG
1446 osd_last_seq[osd] = 0;
1447}
1448
31f18b77 1449void PGMap::encode_digest(const OSDMap& osdmap,
11fdf7f2 1450 bufferlist& bl, uint64_t features)
31f18b77
FG
1451{
1452 get_rules_avail(osdmap, &avail_space_by_rule);
11fdf7f2
TL
1453 calc_osd_sum_by_class(osdmap);
1454 calc_purged_snaps();
31f18b77
FG
1455 PGMapDigest::encode(bl, features);
1456}
1457
1458void PGMap::encode(bufferlist &bl, uint64_t features) const
1459{
11fdf7f2
TL
1460 ENCODE_START(8, 8, bl);
1461 encode(version, bl);
1462 encode(pg_stat, bl);
1463 encode(osd_stat, bl, features);
1464 encode(last_osdmap_epoch, bl);
1465 encode(last_pg_scan, bl);
1466 encode(stamp, bl);
1467 encode(pool_statfs, bl, features);
31f18b77
FG
1468 ENCODE_FINISH(bl);
1469}
1470
11fdf7f2 1471void PGMap::decode(bufferlist::const_iterator &bl)
31f18b77 1472{
11fdf7f2
TL
1473 DECODE_START(8, bl);
1474 decode(version, bl);
1475 decode(pg_stat, bl);
1476 decode(osd_stat, bl);
1477 decode(last_osdmap_epoch, bl);
1478 decode(last_pg_scan, bl);
1479 decode(stamp, bl);
1480 decode(pool_statfs, bl);
31f18b77
FG
1481 DECODE_FINISH(bl);
1482
1483 calc_stats();
7c673cae
FG
1484}
1485
31f18b77
FG
1486void PGMap::dump(Formatter *f) const
1487{
1488 dump_basic(f);
1489 dump_pg_stats(f, false);
1490 dump_pool_stats(f);
1491 dump_osd_stats(f);
1492}
1493
1494void PGMap::dump_basic(Formatter *f) const
1495{
1496 f->dump_unsigned("version", version);
1497 f->dump_stream("stamp") << stamp;
1498 f->dump_unsigned("last_osdmap_epoch", last_osdmap_epoch);
1499 f->dump_unsigned("last_pg_scan", last_pg_scan);
31f18b77
FG
1500
1501 f->open_object_section("pg_stats_sum");
1502 pg_sum.dump(f);
1503 f->close_section();
1504
1505 f->open_object_section("osd_stats_sum");
1506 osd_sum.dump(f);
1507 f->close_section();
1508
31f18b77
FG
1509 dump_delta(f);
1510}
1511
1512void PGMap::dump_delta(Formatter *f) const
1513{
1514 f->open_object_section("pg_stats_delta");
1515 pg_sum_delta.dump(f);
11fdf7f2 1516 f->dump_stream("stamp_delta") << stamp_delta;
31f18b77
FG
1517 f->close_section();
1518}
1519
1520void PGMap::dump_pg_stats(Formatter *f, bool brief) const
1521{
1522 f->open_array_section("pg_stats");
1523 for (auto i = pg_stat.begin();
1524 i != pg_stat.end();
1525 ++i) {
1526 f->open_object_section("pg_stat");
1527 f->dump_stream("pgid") << i->first;
1528 if (brief)
1529 i->second.dump_brief(f);
1530 else
1531 i->second.dump(f);
1532 f->close_section();
1533 }
1534 f->close_section();
1535}
1536
1537void PGMap::dump_pool_stats(Formatter *f) const
1538{
1539 f->open_array_section("pool_stats");
1540 for (auto p = pg_pool_sum.begin();
1541 p != pg_pool_sum.end();
1542 ++p) {
1543 f->open_object_section("pool_stat");
1544 f->dump_int("poolid", p->first);
1545 auto q = num_pg_by_pool.find(p->first);
1546 if (q != num_pg_by_pool.end())
1547 f->dump_unsigned("num_pg", q->second);
1548 p->second.dump(f);
1549 f->close_section();
1550 }
1551 f->close_section();
1552}
1553
1554void PGMap::dump_osd_stats(Formatter *f) const
1555{
1556 f->open_array_section("osd_stats");
1557 for (auto q = osd_stat.begin();
1558 q != osd_stat.end();
1559 ++q) {
1560 f->open_object_section("osd_stat");
1561 f->dump_int("osd", q->first);
1562 q->second.dump(f);
1563 f->close_section();
1564 }
1565 f->close_section();
1566}
1567
1568void PGMap::dump_pg_stats_plain(
1569 ostream& ss,
1570 const mempool::pgmap::unordered_map<pg_t, pg_stat_t>& pg_stats,
1571 bool brief) const
1572{
1573 TextTable tab;
1574
1575 if (brief){
1576 tab.define_column("PG_STAT", TextTable::LEFT, TextTable::LEFT);
1577 tab.define_column("STATE", TextTable::LEFT, TextTable::RIGHT);
1578 tab.define_column("UP", TextTable::LEFT, TextTable::RIGHT);
1579 tab.define_column("UP_PRIMARY", TextTable::LEFT, TextTable::RIGHT);
1580 tab.define_column("ACTING", TextTable::LEFT, TextTable::RIGHT);
1581 tab.define_column("ACTING_PRIMARY", TextTable::LEFT, TextTable::RIGHT);
1582 }
1583 else {
1584 tab.define_column("PG_STAT", TextTable::LEFT, TextTable::LEFT);
1585 tab.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT);
1586 tab.define_column("MISSING_ON_PRIMARY", TextTable::LEFT, TextTable::RIGHT);
1587 tab.define_column("DEGRADED", TextTable::LEFT, TextTable::RIGHT);
1588 tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT);
1589 tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT);
1590 tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT);
11fdf7f2
TL
1591 tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT);
1592 tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT);
31f18b77
FG
1593 tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT);
1594 tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT);
1595 tab.define_column("STATE", TextTable::LEFT, TextTable::RIGHT);
1596 tab.define_column("STATE_STAMP", TextTable::LEFT, TextTable::RIGHT);
1597 tab.define_column("VERSION", TextTable::LEFT, TextTable::RIGHT);
1598 tab.define_column("REPORTED", TextTable::LEFT, TextTable::RIGHT);
1599 tab.define_column("UP", TextTable::LEFT, TextTable::RIGHT);
1600 tab.define_column("UP_PRIMARY", TextTable::LEFT, TextTable::RIGHT);
1601 tab.define_column("ACTING", TextTable::LEFT, TextTable::RIGHT);
1602 tab.define_column("ACTING_PRIMARY", TextTable::LEFT, TextTable::RIGHT);
1603 tab.define_column("LAST_SCRUB", TextTable::LEFT, TextTable::RIGHT);
1604 tab.define_column("SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT);
1605 tab.define_column("LAST_DEEP_SCRUB", TextTable::LEFT, TextTable::RIGHT);
1606 tab.define_column("DEEP_SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT);
b32b8144 1607 tab.define_column("SNAPTRIMQ_LEN", TextTable::LEFT, TextTable::RIGHT);
31f18b77
FG
1608 }
1609
1610 for (auto i = pg_stats.begin();
1611 i != pg_stats.end(); ++i) {
1612 const pg_stat_t &st(i->second);
1613 if (brief) {
1614 tab << i->first
1615 << pg_state_string(st.state)
1616 << st.up
1617 << st.up_primary
1618 << st.acting
1619 << st.acting_primary
1620 << TextTable::endrow;
7c673cae 1621 } else {
31f18b77
FG
1622 ostringstream reported;
1623 reported << st.reported_epoch << ":" << st.reported_seq;
1624
1625 tab << i->first
1626 << st.stats.sum.num_objects
1627 << st.stats.sum.num_objects_missing_on_primary
1628 << st.stats.sum.num_objects_degraded
1629 << st.stats.sum.num_objects_misplaced
1630 << st.stats.sum.num_objects_unfound
1631 << st.stats.sum.num_bytes
11fdf7f2
TL
1632 << st.stats.sum.num_omap_bytes
1633 << st.stats.sum.num_omap_keys
31f18b77
FG
1634 << st.log_size
1635 << st.ondisk_log_size
1636 << pg_state_string(st.state)
1637 << st.last_change
1638 << st.version
1639 << reported.str()
1640 << pg_vector_string(st.up)
1641 << st.up_primary
1642 << pg_vector_string(st.acting)
1643 << st.acting_primary
1644 << st.last_scrub
1645 << st.last_scrub_stamp
1646 << st.last_deep_scrub
1647 << st.last_deep_scrub_stamp
b32b8144 1648 << st.snaptrimq_len
31f18b77 1649 << TextTable::endrow;
7c673cae
FG
1650 }
1651 }
7c673cae 1652
31f18b77
FG
1653 ss << tab;
1654}
1655
1656void PGMap::dump(ostream& ss) const
1657{
1658 dump_basic(ss);
1659 dump_pg_stats(ss, false);
1660 dump_pool_stats(ss, false);
1661 dump_pg_sum_stats(ss, false);
1662 dump_osd_stats(ss);
1663}
1664
1665void PGMap::dump_basic(ostream& ss) const
1666{
1667 ss << "version " << version << std::endl;
1668 ss << "stamp " << stamp << std::endl;
1669 ss << "last_osdmap_epoch " << last_osdmap_epoch << std::endl;
1670 ss << "last_pg_scan " << last_pg_scan << std::endl;
31f18b77
FG
1671}
1672
1673void PGMap::dump_pg_stats(ostream& ss, bool brief) const
1674{
1675 dump_pg_stats_plain(ss, pg_stat, brief);
1676}
1677
1678void PGMap::dump_pool_stats(ostream& ss, bool header) const
1679{
1680 TextTable tab;
1681
1682 if (header) {
1683 tab.define_column("POOLID", TextTable::LEFT, TextTable::LEFT);
1684 tab.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT);
1685 tab.define_column("MISSING_ON_PRIMARY", TextTable::LEFT, TextTable::RIGHT);
1686 tab.define_column("DEGRADED", TextTable::LEFT, TextTable::RIGHT);
1687 tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT);
1688 tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT);
1689 tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT);
11fdf7f2
TL
1690 tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT);
1691 tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT);
31f18b77
FG
1692 tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT);
1693 tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT);
1694 } else {
1695 tab.define_column("", TextTable::LEFT, TextTable::LEFT);
1696 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1697 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1698 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1699 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1700 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1701 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1702 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1703 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
11fdf7f2
TL
1704 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1705 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
31f18b77
FG
1706 }
1707
1708 for (auto p = pg_pool_sum.begin();
1709 p != pg_pool_sum.end();
1710 ++p) {
1711 tab << p->first
1712 << p->second.stats.sum.num_objects
1713 << p->second.stats.sum.num_objects_missing_on_primary
1714 << p->second.stats.sum.num_objects_degraded
1715 << p->second.stats.sum.num_objects_misplaced
1716 << p->second.stats.sum.num_objects_unfound
1717 << p->second.stats.sum.num_bytes
11fdf7f2
TL
1718 << p->second.stats.sum.num_omap_bytes
1719 << p->second.stats.sum.num_omap_keys
31f18b77
FG
1720 << p->second.log_size
1721 << p->second.ondisk_log_size
1722 << TextTable::endrow;
1723 }
1724
1725 ss << tab;
1726}
1727
1728void PGMap::dump_pg_sum_stats(ostream& ss, bool header) const
1729{
1730 TextTable tab;
1731
1732 if (header) {
1733 tab.define_column("PG_STAT", TextTable::LEFT, TextTable::LEFT);
1734 tab.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT);
1735 tab.define_column("MISSING_ON_PRIMARY", TextTable::LEFT, TextTable::RIGHT);
1736 tab.define_column("DEGRADED", TextTable::LEFT, TextTable::RIGHT);
1737 tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT);
1738 tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT);
1739 tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT);
11fdf7f2
TL
1740 tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT);
1741 tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT);
31f18b77
FG
1742 tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT);
1743 tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT);
1744 } else {
1745 tab.define_column("", TextTable::LEFT, TextTable::LEFT);
1746 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1747 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1748 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1749 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1750 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1751 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1752 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1753 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
11fdf7f2
TL
1754 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
1755 tab.define_column("", TextTable::LEFT, TextTable::RIGHT);
31f18b77
FG
1756 };
1757
1758 tab << "sum"
1759 << pg_sum.stats.sum.num_objects
1760 << pg_sum.stats.sum.num_objects_missing_on_primary
1761 << pg_sum.stats.sum.num_objects_degraded
1762 << pg_sum.stats.sum.num_objects_misplaced
1763 << pg_sum.stats.sum.num_objects_unfound
1764 << pg_sum.stats.sum.num_bytes
11fdf7f2
TL
1765 << pg_sum.stats.sum.num_omap_bytes
1766 << pg_sum.stats.sum.num_omap_keys
31f18b77
FG
1767 << pg_sum.log_size
1768 << pg_sum.ondisk_log_size
1769 << TextTable::endrow;
1770
1771 ss << tab;
1772}
1773
1774void PGMap::dump_osd_stats(ostream& ss) const
1775{
1776 TextTable tab;
1777
1778 tab.define_column("OSD_STAT", TextTable::LEFT, TextTable::LEFT);
1779 tab.define_column("USED", TextTable::LEFT, TextTable::RIGHT);
1780 tab.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT);
11fdf7f2 1781 tab.define_column("USED_RAW", TextTable::LEFT, TextTable::RIGHT);
31f18b77
FG
1782 tab.define_column("TOTAL", TextTable::LEFT, TextTable::RIGHT);
1783 tab.define_column("HB_PEERS", TextTable::LEFT, TextTable::RIGHT);
1784 tab.define_column("PG_SUM", TextTable::LEFT, TextTable::RIGHT);
1785 tab.define_column("PRIMARY_PG_SUM", TextTable::LEFT, TextTable::RIGHT);
1786
1787 for (auto p = osd_stat.begin();
1788 p != osd_stat.end();
1789 ++p) {
1790 tab << p->first
11fdf7f2
TL
1791 << byte_u_t(p->second.statfs.get_used())
1792 << byte_u_t(p->second.statfs.available)
1793 << byte_u_t(p->second.statfs.get_used_raw())
1794 << byte_u_t(p->second.statfs.total)
31f18b77
FG
1795 << p->second.hb_peers
1796 << get_num_pg_by_osd(p->first)
1797 << get_num_primary_pg_by_osd(p->first)
1798 << TextTable::endrow;
1799 }
1800
1801 tab << "sum"
11fdf7f2
TL
1802 << byte_u_t(osd_sum.statfs.get_used())
1803 << byte_u_t(osd_sum.statfs.available)
1804 << byte_u_t(osd_sum.statfs.get_used_raw())
1805 << byte_u_t(osd_sum.statfs.total)
31f18b77 1806 << TextTable::endrow;
7c673cae 1807
31f18b77 1808 ss << tab;
7c673cae
FG
1809}
1810
31f18b77 1811void PGMap::dump_osd_sum_stats(ostream& ss) const
7c673cae 1812{
31f18b77 1813 TextTable tab;
7c673cae 1814
31f18b77
FG
1815 tab.define_column("OSD_STAT", TextTable::LEFT, TextTable::LEFT);
1816 tab.define_column("USED", TextTable::LEFT, TextTable::RIGHT);
1817 tab.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT);
11fdf7f2 1818 tab.define_column("USED_RAW", TextTable::LEFT, TextTable::RIGHT);
31f18b77 1819 tab.define_column("TOTAL", TextTable::LEFT, TextTable::RIGHT);
7c673cae 1820
31f18b77 1821 tab << "sum"
11fdf7f2
TL
1822 << byte_u_t(osd_sum.statfs.get_used())
1823 << byte_u_t(osd_sum.statfs.available)
1824 << byte_u_t(osd_sum.statfs.get_used_raw())
1825 << byte_u_t(osd_sum.statfs.total)
31f18b77 1826 << TextTable::endrow;
7c673cae 1827
31f18b77 1828 ss << tab;
7c673cae
FG
1829}
1830
31f18b77
FG
1831void PGMap::get_stuck_stats(
1832 int types, const utime_t cutoff,
1833 mempool::pgmap::unordered_map<pg_t, pg_stat_t>& stuck_pgs) const
7c673cae 1834{
11fdf7f2 1835 ceph_assert(types != 0);
31f18b77
FG
1836 for (auto i = pg_stat.begin();
1837 i != pg_stat.end();
1838 ++i) {
1839 utime_t val = cutoff; // don't care about >= cutoff so that is infinity
1840
1841 if ((types & STUCK_INACTIVE) && !(i->second.state & PG_STATE_ACTIVE)) {
1842 if (i->second.last_active < val)
1843 val = i->second.last_active;
7c673cae 1844 }
31f18b77
FG
1845
1846 if ((types & STUCK_UNCLEAN) && !(i->second.state & PG_STATE_CLEAN)) {
1847 if (i->second.last_clean < val)
1848 val = i->second.last_clean;
7c673cae 1849 }
31f18b77
FG
1850
1851 if ((types & STUCK_DEGRADED) && (i->second.state & PG_STATE_DEGRADED)) {
1852 if (i->second.last_undegraded < val)
1853 val = i->second.last_undegraded;
7c673cae 1854 }
7c673cae 1855
31f18b77
FG
1856 if ((types & STUCK_UNDERSIZED) && (i->second.state & PG_STATE_UNDERSIZED)) {
1857 if (i->second.last_fullsized < val)
1858 val = i->second.last_fullsized;
1859 }
7c673cae 1860
31f18b77
FG
1861 if ((types & STUCK_STALE) && (i->second.state & PG_STATE_STALE)) {
1862 if (i->second.last_unstale < val)
1863 val = i->second.last_unstale;
1864 }
7c673cae 1865
31f18b77
FG
1866 // val is now the earliest any of the requested stuck states began
1867 if (val < cutoff) {
1868 stuck_pgs[i->first] = i->second;
1869 }
1870 }
7c673cae
FG
1871}
1872
31f18b77 1873bool PGMap::get_stuck_counts(const utime_t cutoff, map<string, int>& note) const
7c673cae 1874{
31f18b77
FG
1875 int inactive = 0;
1876 int unclean = 0;
1877 int degraded = 0;
1878 int undersized = 0;
1879 int stale = 0;
7c673cae 1880
31f18b77
FG
1881 for (auto i = pg_stat.begin();
1882 i != pg_stat.end();
1883 ++i) {
1884 if (! (i->second.state & PG_STATE_ACTIVE)) {
1885 if (i->second.last_active < cutoff)
1886 ++inactive;
7c673cae 1887 }
31f18b77
FG
1888 if (! (i->second.state & PG_STATE_CLEAN)) {
1889 if (i->second.last_clean < cutoff)
1890 ++unclean;
7c673cae 1891 }
31f18b77
FG
1892 if (i->second.state & PG_STATE_DEGRADED) {
1893 if (i->second.last_undegraded < cutoff)
1894 ++degraded;
7c673cae 1895 }
31f18b77
FG
1896 if (i->second.state & PG_STATE_UNDERSIZED) {
1897 if (i->second.last_fullsized < cutoff)
1898 ++undersized;
7c673cae 1899 }
31f18b77
FG
1900 if (i->second.state & PG_STATE_STALE) {
1901 if (i->second.last_unstale < cutoff)
1902 ++stale;
7c673cae
FG
1903 }
1904 }
31f18b77
FG
1905
1906 if (inactive)
1907 note["stuck inactive"] = inactive;
1908
1909 if (unclean)
1910 note["stuck unclean"] = unclean;
1911
1912 if (undersized)
1913 note["stuck undersized"] = undersized;
1914
1915 if (degraded)
1916 note["stuck degraded"] = degraded;
1917
1918 if (stale)
1919 note["stuck stale"] = stale;
1920
1921 return inactive || unclean || undersized || degraded || stale;
1922}
1923
1924void PGMap::dump_stuck(Formatter *f, int types, utime_t cutoff) const
1925{
1926 mempool::pgmap::unordered_map<pg_t, pg_stat_t> stuck_pg_stats;
1927 get_stuck_stats(types, cutoff, stuck_pg_stats);
1928 f->open_array_section("stuck_pg_stats");
1929 for (auto i = stuck_pg_stats.begin();
1930 i != stuck_pg_stats.end();
1931 ++i) {
1932 f->open_object_section("pg_stat");
1933 f->dump_stream("pgid") << i->first;
1934 i->second.dump(f);
1935 f->close_section();
1936 }
1937 f->close_section();
1938}
1939
1940void PGMap::dump_stuck_plain(ostream& ss, int types, utime_t cutoff) const
1941{
1942 mempool::pgmap::unordered_map<pg_t, pg_stat_t> stuck_pg_stats;
1943 get_stuck_stats(types, cutoff, stuck_pg_stats);
1944 if (!stuck_pg_stats.empty())
1945 dump_pg_stats_plain(ss, stuck_pg_stats, true);
1946}
1947
1948int PGMap::dump_stuck_pg_stats(
1949 stringstream &ds,
1950 Formatter *f,
1951 int threshold,
1952 vector<string>& args) const
1953{
1954 int stuck_types = 0;
1955
1956 for (auto i = args.begin(); i != args.end(); ++i) {
1957 if (*i == "inactive")
1958 stuck_types |= PGMap::STUCK_INACTIVE;
1959 else if (*i == "unclean")
1960 stuck_types |= PGMap::STUCK_UNCLEAN;
1961 else if (*i == "undersized")
1962 stuck_types |= PGMap::STUCK_UNDERSIZED;
1963 else if (*i == "degraded")
1964 stuck_types |= PGMap::STUCK_DEGRADED;
1965 else if (*i == "stale")
1966 stuck_types |= PGMap::STUCK_STALE;
1967 else {
1968 ds << "Unknown type: " << *i << std::endl;
1969 return -EINVAL;
7c673cae
FG
1970 }
1971 }
31f18b77
FG
1972
1973 utime_t now(ceph_clock_now());
1974 utime_t cutoff = now - utime_t(threshold, 0);
1975
1976 if (!f) {
1977 dump_stuck_plain(ds, stuck_types, cutoff);
1978 } else {
1979 dump_stuck(f, stuck_types, cutoff);
1980 f->flush(ds);
7c673cae 1981 }
31f18b77
FG
1982
1983 return 0;
7c673cae
FG
1984}
1985
31f18b77 1986void PGMap::dump_osd_perf_stats(Formatter *f) const
7c673cae 1987{
31f18b77
FG
1988 f->open_array_section("osd_perf_infos");
1989 for (auto i = osd_stat.begin();
1990 i != osd_stat.end();
1991 ++i) {
1992 f->open_object_section("osd");
1993 f->dump_int("id", i->first);
1994 {
1995 f->open_object_section("perf_stats");
1996 i->second.os_perf_stat.dump(f);
1997 f->close_section();
1998 }
1999 f->close_section();
2000 }
2001 f->close_section();
7c673cae 2002}
31f18b77 2003void PGMap::print_osd_perf_stats(std::ostream *ss) const
7c673cae 2004{
31f18b77
FG
2005 TextTable tab;
2006 tab.define_column("osd", TextTable::LEFT, TextTable::RIGHT);
2007 tab.define_column("commit_latency(ms)", TextTable::LEFT, TextTable::RIGHT);
2008 tab.define_column("apply_latency(ms)", TextTable::LEFT, TextTable::RIGHT);
2009 for (auto i = osd_stat.begin();
2010 i != osd_stat.end();
2011 ++i) {
2012 tab << i->first;
11fdf7f2
TL
2013 tab << i->second.os_perf_stat.os_commit_latency_ns / 1000000ull;
2014 tab << i->second.os_perf_stat.os_apply_latency_ns / 1000000ull;
31f18b77
FG
2015 tab << TextTable::endrow;
2016 }
2017 (*ss) << tab;
2018}
7c673cae 2019
31f18b77
FG
2020void PGMap::dump_osd_blocked_by_stats(Formatter *f) const
2021{
2022 f->open_array_section("osd_blocked_by_infos");
2023 for (auto i = blocked_by_sum.begin();
2024 i != blocked_by_sum.end();
2025 ++i) {
2026 f->open_object_section("osd");
2027 f->dump_int("id", i->first);
2028 f->dump_int("num_blocked", i->second);
2029 f->close_section();
2030 }
2031 f->close_section();
2032}
2033void PGMap::print_osd_blocked_by_stats(std::ostream *ss) const
2034{
2035 TextTable tab;
2036 tab.define_column("osd", TextTable::LEFT, TextTable::RIGHT);
2037 tab.define_column("num_blocked", TextTable::LEFT, TextTable::RIGHT);
2038 for (auto i = blocked_by_sum.begin();
2039 i != blocked_by_sum.end();
2040 ++i) {
2041 tab << i->first;
2042 tab << i->second;
2043 tab << TextTable::endrow;
2044 }
2045 (*ss) << tab;
7c673cae
FG
2046}
2047
31f18b77 2048
7c673cae
FG
2049/**
2050 * update aggregated delta
2051 *
2052 * @param cct ceph context
2053 * @param ts Timestamp for the stats being delta'ed
2054 * @param old_pool_sum Previous stats sum
2055 * @param last_ts Last timestamp for pool
2056 * @param result_pool_sum Resulting stats
2057 * @param result_pool_delta Resulting pool delta
2058 * @param result_ts_delta Resulting timestamp delta
2059 * @param delta_avg_list List of last N computed deltas, used to average
2060 */
31f18b77
FG
2061void PGMap::update_delta(
2062 CephContext *cct,
2063 const utime_t ts,
2064 const pool_stat_t& old_pool_sum,
2065 utime_t *last_ts,
2066 const pool_stat_t& current_pool_sum,
2067 pool_stat_t *result_pool_delta,
2068 utime_t *result_ts_delta,
2069 mempool::pgmap::list<pair<pool_stat_t,utime_t> > *delta_avg_list)
7c673cae
FG
2070{
2071 /* @p ts is the timestamp we want to associate with the data
2072 * in @p old_pool_sum, and on which we will base ourselves to
2073 * calculate the delta, stored in 'delta_t'.
2074 */
2075 utime_t delta_t;
2076 delta_t = ts; // start with the provided timestamp
2077 delta_t -= *last_ts; // take the last timestamp we saw
2078 *last_ts = ts; // @p ts becomes the last timestamp we saw
2079
31f18b77
FG
2080 // adjust delta_t, quick start if there is no update in a long period
2081 delta_t = std::min(delta_t,
2082 utime_t(2 * (cct ? cct->_conf->mon_delta_reset_interval : 10), 0));
2083
2084 // calculate a delta, and average over the last 6 deltas by default.
7c673cae
FG
2085 /* start by taking a copy of our current @p result_pool_sum, and by
2086 * taking out the stats from @p old_pool_sum. This generates a stats
2087 * delta. Stash this stats delta in @p delta_avg_list, along with the
2088 * timestamp delta for these results.
2089 */
2090 pool_stat_t d = current_pool_sum;
2091 d.stats.sub(old_pool_sum.stats);
7c673cae
FG
2092
2093 /* Aggregate current delta, and take out the last seen delta (if any) to
2094 * average it out.
b32b8144 2095 * Skip calculating delta while sum was not synchronized.
7c673cae 2096 */
b32b8144
FG
2097 if(!old_pool_sum.stats.sum.is_zero()) {
2098 delta_avg_list->push_back(make_pair(d,delta_t));
2099 *result_ts_delta += delta_t;
2100 result_pool_delta->stats.add(d.stats);
2101 }
11fdf7f2
TL
2102 size_t s = cct ? cct->_conf.get_val<uint64_t>("mon_stat_smooth_intervals") : 1;
2103 while (delta_avg_list->size() > s) {
7c673cae
FG
2104 result_pool_delta->stats.sub(delta_avg_list->front().first.stats);
2105 *result_ts_delta -= delta_avg_list->front().second;
2106 delta_avg_list->pop_front();
2107 }
2108}
2109
7c673cae
FG
2110/**
2111 * Update a given pool's deltas
2112 *
2113 * @param cct Ceph Context
2114 * @param ts Timestamp for the stats being delta'ed
2115 * @param pool Pool's id
2116 * @param old_pool_sum Previous stats sum
2117 */
31f18b77
FG
2118void PGMap::update_one_pool_delta(
2119 CephContext *cct,
2120 const utime_t ts,
11fdf7f2 2121 const int64_t pool,
31f18b77 2122 const pool_stat_t& old_pool_sum)
7c673cae
FG
2123{
2124 if (per_pool_sum_deltas.count(pool) == 0) {
11fdf7f2
TL
2125 ceph_assert(per_pool_sum_deltas_stamps.count(pool) == 0);
2126 ceph_assert(per_pool_sum_delta.count(pool) == 0);
7c673cae
FG
2127 }
2128
31f18b77 2129 auto& sum_delta = per_pool_sum_delta[pool];
7c673cae
FG
2130
2131 update_delta(cct, ts, old_pool_sum, &sum_delta.second, pg_pool_sum[pool],
2132 &sum_delta.first, &per_pool_sum_deltas_stamps[pool],
2133 &per_pool_sum_deltas[pool]);
2134}
2135
2136/**
2137 * Update pools' deltas
2138 *
2139 * @param cct CephContext
2140 * @param ts Timestamp for the stats being delta'ed
2141 * @param pg_pool_sum_old Map of pool stats for delta calcs.
2142 */
31f18b77
FG
2143void PGMap::update_pool_deltas(
2144 CephContext *cct, const utime_t ts,
11fdf7f2 2145 const mempool::pgmap::unordered_map<int32_t,pool_stat_t>& pg_pool_sum_old)
7c673cae 2146{
31f18b77 2147 for (auto it = pg_pool_sum_old.begin();
7c673cae
FG
2148 it != pg_pool_sum_old.end(); ++it) {
2149 update_one_pool_delta(cct, ts, it->first, it->second);
2150 }
2151}
2152
2153void PGMap::clear_delta()
2154{
2155 pg_sum_delta = pool_stat_t();
2156 pg_sum_deltas.clear();
2157 stamp_delta = utime_t();
2158}
2159
7c673cae
FG
2160void PGMap::generate_test_instances(list<PGMap*>& o)
2161{
2162 o.push_back(new PGMap);
2163 list<Incremental*> inc;
2164 Incremental::generate_test_instances(inc);
2165 delete inc.front();
2166 inc.pop_front();
2167 while (!inc.empty()) {
2168 PGMap *pmp = new PGMap();
2169 *pmp = *o.back();
2170 o.push_back(pmp);
2171 o.back()->apply_incremental(NULL, *inc.front());
2172 delete inc.front();
2173 inc.pop_front();
2174 }
2175}
2176
11fdf7f2 2177void PGMap::get_filtered_pg_stats(uint64_t state, int64_t poolid, int64_t osdid,
7c673cae
FG
2178 bool primary, set<pg_t>& pgs) const
2179{
31f18b77 2180 for (auto i = pg_stat.begin();
7c673cae
FG
2181 i != pg_stat.end();
2182 ++i) {
11fdf7f2 2183 if ((poolid >= 0) && (poolid != i->first.pool()))
7c673cae
FG
2184 continue;
2185 if ((osdid >= 0) && !(i->second.is_acting_osd(osdid,primary)))
2186 continue;
11fdf7f2
TL
2187 if (state == (uint64_t)-1 || // "all"
2188 (i->second.state & state) || // matches a state bit
2189 (state == 0 && i->second.state == 0)) { // matches "unknown" (== 0)
2190 pgs.insert(i->first);
2191 }
7c673cae
FG
2192 }
2193}
2194
2195void PGMap::dump_filtered_pg_stats(Formatter *f, set<pg_t>& pgs) const
2196{
2197 f->open_array_section("pg_stats");
31f18b77 2198 for (auto i = pgs.begin(); i != pgs.end(); ++i) {
7c673cae
FG
2199 const pg_stat_t& st = pg_stat.at(*i);
2200 f->open_object_section("pg_stat");
2201 f->dump_stream("pgid") << *i;
2202 st.dump(f);
2203 f->close_section();
2204 }
2205 f->close_section();
2206}
2207
2208void PGMap::dump_filtered_pg_stats(ostream& ss, set<pg_t>& pgs) const
2209{
2210 TextTable tab;
11fdf7f2 2211 utime_t now = ceph_clock_now();
7c673cae 2212
11fdf7f2 2213 tab.define_column("PG", TextTable::LEFT, TextTable::LEFT);
7c673cae 2214 tab.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT);
7c673cae
FG
2215 tab.define_column("DEGRADED", TextTable::LEFT, TextTable::RIGHT);
2216 tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT);
2217 tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT);
2218 tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT);
11fdf7f2
TL
2219 tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT);
2220 tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT);
7c673cae 2221 tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT);
7c673cae 2222 tab.define_column("STATE", TextTable::LEFT, TextTable::RIGHT);
11fdf7f2 2223 tab.define_column("SINCE", TextTable::LEFT, TextTable::RIGHT);
7c673cae
FG
2224 tab.define_column("VERSION", TextTable::LEFT, TextTable::RIGHT);
2225 tab.define_column("REPORTED", TextTable::LEFT, TextTable::RIGHT);
2226 tab.define_column("UP", TextTable::LEFT, TextTable::RIGHT);
7c673cae 2227 tab.define_column("ACTING", TextTable::LEFT, TextTable::RIGHT);
7c673cae 2228 tab.define_column("SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT);
7c673cae
FG
2229 tab.define_column("DEEP_SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT);
2230
31f18b77 2231 for (auto i = pgs.begin(); i != pgs.end(); ++i) {
7c673cae
FG
2232 const pg_stat_t& st = pg_stat.at(*i);
2233
2234 ostringstream reported;
2235 reported << st.reported_epoch << ":" << st.reported_seq;
2236
11fdf7f2
TL
2237 ostringstream upstr, actingstr;
2238 upstr << st.up << 'p' << st.up_primary;
2239 actingstr << st.acting << 'p' << st.acting_primary;
7c673cae
FG
2240 tab << *i
2241 << st.stats.sum.num_objects
7c673cae
FG
2242 << st.stats.sum.num_objects_degraded
2243 << st.stats.sum.num_objects_misplaced
2244 << st.stats.sum.num_objects_unfound
2245 << st.stats.sum.num_bytes
11fdf7f2
TL
2246 << st.stats.sum.num_omap_bytes
2247 << st.stats.sum.num_omap_keys
7c673cae 2248 << st.log_size
7c673cae 2249 << pg_state_string(st.state)
11fdf7f2 2250 << utimespan_str(now - st.last_change)
7c673cae
FG
2251 << st.version
2252 << reported.str()
11fdf7f2
TL
2253 << upstr.str()
2254 << actingstr.str()
7c673cae 2255 << st.last_scrub_stamp
7c673cae
FG
2256 << st.last_deep_scrub_stamp
2257 << TextTable::endrow;
2258 }
2259
2260 ss << tab;
2261}
2262
11fdf7f2
TL
2263void PGMap::dump_pool_stats_and_io_rate(int64_t poolid, const OSDMap &osd_map,
2264 Formatter *f,
2265 stringstream *rs) const {
2266 string pool_name = osd_map.get_pool_name(poolid);
2267 if (f) {
2268 f->open_object_section("pool");
2269 f->dump_string("pool_name", pool_name.c_str());
2270 f->dump_int("pool_id", poolid);
2271 f->open_object_section("recovery");
2272 }
2273 list<string> sl;
2274 stringstream tss;
2275 pool_recovery_summary(f, &sl, poolid);
2276 if (!f && !sl.empty()) {
2277 for (auto &p : sl)
2278 tss << " " << p << "\n";
2279 }
2280 if (f) {
2281 f->close_section(); // object section recovery
2282 f->open_object_section("recovery_rate");
2283 }
2284 ostringstream rss;
2285 pool_recovery_rate_summary(f, &rss, poolid);
2286 if (!f && !rss.str().empty())
2287 tss << " recovery io " << rss.str() << "\n";
2288 if (f) {
2289 f->close_section(); // object section recovery_rate
2290 f->open_object_section("client_io_rate");
2291 }
2292 rss.clear();
2293 rss.str("");
2294 pool_client_io_rate_summary(f, &rss, poolid);
2295 if (!f && !rss.str().empty())
2296 tss << " client io " << rss.str() << "\n";
2297 // dump cache tier IO rate for cache pool
2298 const pg_pool_t *pool = osd_map.get_pg_pool(poolid);
2299 if (pool->is_tier()) {
2300 if (f) {
2301 f->close_section(); // object section client_io_rate
2302 f->open_object_section("cache_io_rate");
7c673cae 2303 }
11fdf7f2
TL
2304 rss.clear();
2305 rss.str("");
2306 pool_cache_io_rate_summary(f, &rss, poolid);
2307 if (!f && !rss.str().empty())
2308 tss << " cache tier io " << rss.str() << "\n";
2309 }
2310 if (f) {
2311 f->close_section(); // object section cache_io_rate
2312 f->close_section(); // object section pool
2313 } else {
2314 *rs << "pool " << pool_name << " id " << poolid << "\n";
2315 if (!tss.str().empty())
2316 *rs << tss.str() << "\n";
2317 else
2318 *rs << " nothing is going on\n\n";
7c673cae 2319 }
7c673cae
FG
2320}
2321
11fdf7f2 2322void PGMap::get_health_checks(
31f18b77 2323 CephContext *cct,
11fdf7f2
TL
2324 const OSDMap& osdmap,
2325 health_check_map_t *checks) const
7c673cae 2326{
11fdf7f2
TL
2327 utime_t now = ceph_clock_now();
2328 const auto max = cct->_conf.get_val<uint64_t>("mon_health_max_detail");
2329 const auto& pools = osdmap.get_pools();
224ce89b 2330
224ce89b
WB
2331 typedef enum pg_consequence_t {
2332 UNAVAILABLE = 1, // Client IO to the pool may block
2333 DEGRADED = 2, // Fewer than the requested number of replicas are present
2334 DEGRADED_FULL = 3, // Fewer than the request number of replicas may be present
2335 // and insufficiet resources are present to fix this
2336 DAMAGED = 4 // The data may be missing or inconsistent on disk and
2337 // requires repair
2338 } pg_consequence_t;
2339
2340 // For a given PG state, how should it be reported at the pool level?
2341 class PgStateResponse {
2342 public:
2343 pg_consequence_t consequence;
2344 typedef std::function< utime_t(const pg_stat_t&) > stuck_cb;
2345 stuck_cb stuck_since;
2346 bool invert;
2347
11fdf7f2
TL
2348 PgStateResponse(const pg_consequence_t& c, stuck_cb&& s)
2349 : consequence(c), stuck_since(std::move(s)), invert(false)
224ce89b
WB
2350 {
2351 }
2352
11fdf7f2
TL
2353 PgStateResponse(const pg_consequence_t& c, stuck_cb&& s, bool i)
2354 : consequence(c), stuck_since(std::move(s)), invert(i)
224ce89b
WB
2355 {
2356 }
2357 };
2358
2359 // Record the PG state counts that contributed to a reported pool state
2360 class PgCauses {
2361 public:
2362 // Map of PG_STATE_* to number of pgs in that state.
2363 std::map<unsigned, unsigned> states;
2364
2365 // List of all PG IDs that had a state contributing
2366 // to this health condition.
2367 std::set<pg_t> pgs;
2368
2369 std::map<pg_t, std::string> pg_messages;
2370 };
2371
2372 // Map of PG state to how to respond to it
2373 std::map<unsigned, PgStateResponse> state_to_response = {
2374 // Immediate reports
2375 { PG_STATE_INCONSISTENT, {DAMAGED, {}} },
c07f9fc5 2376 { PG_STATE_INCOMPLETE, {UNAVAILABLE, {}} },
224ce89b 2377 { PG_STATE_SNAPTRIM_ERROR, {DAMAGED, {}} },
b32b8144
FG
2378 { PG_STATE_RECOVERY_UNFOUND, {DAMAGED, {}} },
2379 { PG_STATE_BACKFILL_UNFOUND, {DAMAGED, {}} },
c07f9fc5
FG
2380 { PG_STATE_BACKFILL_TOOFULL, {DEGRADED_FULL, {}} },
2381 { PG_STATE_RECOVERY_TOOFULL, {DEGRADED_FULL, {}} },
224ce89b
WB
2382 { PG_STATE_DEGRADED, {DEGRADED, {}} },
2383 { PG_STATE_DOWN, {UNAVAILABLE, {}} },
2384 // Delayed (wait until stuck) reports
2385 { PG_STATE_PEERING, {UNAVAILABLE, [](const pg_stat_t &p){return p.last_peered;} } },
2386 { PG_STATE_UNDERSIZED, {DEGRADED, [](const pg_stat_t &p){return p.last_fullsized;} } },
2387 { PG_STATE_STALE, {UNAVAILABLE, [](const pg_stat_t &p){return p.last_unstale;} } },
2388 // Delayed and inverted reports
b32b8144 2389 { PG_STATE_ACTIVE, {UNAVAILABLE, [](const pg_stat_t &p){return p.last_active;}, true} }
224ce89b
WB
2390 };
2391
2392 // Specialized state printer that takes account of inversion of
2393 // ACTIVE, CLEAN checks.
11fdf7f2 2394 auto state_name = [](const uint64_t &state) {
224ce89b
WB
2395 // Special cases for the states that are inverted checks
2396 if (state == PG_STATE_CLEAN) {
2397 return std::string("unclean");
2398 } else if (state == PG_STATE_ACTIVE) {
2399 return std::string("inactive");
2400 } else {
2401 return pg_state_string(state);
2402 }
2403 };
2404
2405 // Map of what is wrong to information about why, implicitly also stores
2406 // the list of what is wrong.
2407 std::map<pg_consequence_t, PgCauses> detected;
2408
2409 // Optimisation: trim down the number of checks to apply based on
2410 // the summary counters
2411 std::map<unsigned, PgStateResponse> possible_responses;
2412 for (const auto &i : num_pg_by_state) {
2413 for (const auto &j : state_to_response) {
2414 if (!j.second.invert) {
2415 // Check for normal tests by seeing if any pgs have the flag
2416 if (i.first & j.first) {
2417 possible_responses.insert(j);
2418 }
2419 }
2420 }
2421 }
2422
2423 for (const auto &j : state_to_response) {
2424 if (j.second.invert) {
2425 // Check for inverted tests by seeing if not-all pgs have the flag
2426 const auto &found = num_pg_by_state.find(j.first);
2427 if (found == num_pg_by_state.end() || found->second != num_pg) {
2428 possible_responses.insert(j);
2429 }
2430 }
2431 }
2432
11fdf7f2 2433 utime_t cutoff = now - utime_t(cct->_conf.get_val<int64_t>("mon_pg_stuck_threshold"), 0);
224ce89b
WB
2434 // Loop over all PGs, if there are any possibly-unhealthy states in there
2435 if (!possible_responses.empty()) {
2436 for (const auto& i : pg_stat) {
2437 const auto &pg_id = i.first;
2438 const auto &pg_info = i.second;
2439
2440 for (const auto &j : state_to_response) {
2441 const auto &pg_response_state = j.first;
2442 const auto &pg_response = j.second;
2443
2444 // Apply the state test
2445 if (!(bool(pg_info.state & pg_response_state) != pg_response.invert)) {
2446 continue;
2447 }
2448
2449 // Apply stuckness test if needed
2450 if (pg_response.stuck_since) {
2451 // Delayed response, check for stuckness
2452 utime_t last_whatever = pg_response.stuck_since(pg_info);
2453 if (last_whatever >= cutoff) {
2454 // Not stuck enough, ignore.
2455 continue;
2456 } else {
2457
2458 }
2459 }
2460
2461 auto &causes = detected[pg_response.consequence];
2462 causes.states[pg_response_state]++;
2463 causes.pgs.insert(pg_id);
2464
2465 // Don't bother composing detail string if we have already recorded
2466 // too many
2467 if (causes.pg_messages.size() > max) {
2468 continue;
2469 }
2470
2471 std::ostringstream ss;
2472 if (pg_response.stuck_since) {
2473 utime_t since = pg_response.stuck_since(pg_info);
2474 ss << "pg " << pg_id << " is stuck " << state_name(pg_response_state);
2475 if (since == utime_t()) {
2476 ss << " since forever";
2477 } else {
2478 utime_t dur = now - since;
2479 ss << " for " << dur;
2480 }
2481 ss << ", current state " << pg_state_string(pg_info.state)
2482 << ", last acting " << pg_info.acting;
2483 } else {
2484 ss << "pg " << pg_id << " is "
2485 << pg_state_string(pg_info.state);
2486 ss << ", acting " << pg_info.acting;
2487 if (pg_info.stats.sum.num_objects_unfound) {
2488 ss << ", " << pg_info.stats.sum.num_objects_unfound
2489 << " unfound";
2490 }
2491 }
2492
2493 if (pg_info.state & PG_STATE_INCOMPLETE) {
2494 const pg_pool_t *pi = osdmap.get_pg_pool(pg_id.pool());
2495 if (pi && pi->min_size > 1) {
2496 ss << " (reducing pool "
2497 << osdmap.get_pool_name(pg_id.pool())
2498 << " min_size from " << (int)pi->min_size
2499 << " may help; search ceph.com/docs for 'incomplete')";
2500 }
2501 }
2502
2503 causes.pg_messages[pg_id] = ss.str();
2504 }
2505 }
2506 } else {
2507 dout(10) << __func__ << " skipping loop over PGs: counters look OK" << dendl;
2508 }
2509
2510 for (const auto &i : detected) {
2511 std::string health_code;
2512 health_status_t sev;
2513 std::string summary;
2514 switch(i.first) {
2515 case UNAVAILABLE:
2516 health_code = "PG_AVAILABILITY";
2517 sev = HEALTH_WARN;
2518 summary = "Reduced data availability: ";
2519 break;
2520 case DEGRADED:
2521 health_code = "PG_DEGRADED";
2522 summary = "Degraded data redundancy: ";
2523 sev = HEALTH_WARN;
2524 break;
2525 case DEGRADED_FULL:
2526 health_code = "PG_DEGRADED_FULL";
2527 summary = "Degraded data redundancy (low space): ";
2528 sev = HEALTH_ERR;
2529 break;
2530 case DAMAGED:
2531 health_code = "PG_DAMAGED";
2532 summary = "Possible data damage: ";
2533 sev = HEALTH_ERR;
2534 break;
2535 default:
11fdf7f2 2536 ceph_abort();
224ce89b
WB
2537 }
2538
2539 if (i.first == DEGRADED) {
2540 if (pg_sum.stats.sum.num_objects_degraded &&
2541 pg_sum.stats.sum.num_object_copies > 0) {
2542 double pc = (double)pg_sum.stats.sum.num_objects_degraded /
2543 (double)pg_sum.stats.sum.num_object_copies * (double)100.0;
2544 char b[20];
2545 snprintf(b, sizeof(b), "%.3lf", pc);
2546 ostringstream ss;
2547 ss << pg_sum.stats.sum.num_objects_degraded
2548 << "/" << pg_sum.stats.sum.num_object_copies << " objects degraded ("
2549 << b << "%)";
2550
2551 // Throw in a comma for the benefit of the following PG counts
2552 summary += ss.str() + ", ";
2553 }
2554 }
2555
2556 // Compose summary message saying how many PGs in what states led
2557 // to this health check failing
2558 std::vector<std::string> pg_msgs;
2559 for (const auto &j : i.second.states) {
2560 std::ostringstream msg;
2561 msg << j.second << (j.second > 1 ? " pgs " : " pg ") << state_name(j.first);
2562 pg_msgs.push_back(msg.str());
2563 }
2564 summary += joinify(pg_msgs.begin(), pg_msgs.end(), std::string(", "));
2565
2566
2567
2568 health_check_t *check = &checks->add(
2569 health_code,
2570 sev,
2571 summary);
2572
2573 // Compose list of PGs contributing to this health check failing
2574 for (const auto &j : i.second.pg_messages) {
2575 check->detail.push_back(j.second);
2576 }
2577 }
2578
224ce89b
WB
2579 // OSD_SCRUB_ERRORS
2580 if (pg_sum.stats.sum.num_scrub_errors) {
2581 ostringstream ss;
2582 ss << pg_sum.stats.sum.num_scrub_errors << " scrub errors";
2583 checks->add("OSD_SCRUB_ERRORS", HEALTH_ERR, ss.str());
2584 }
2585
28e407b8
AA
2586 // LARGE_OMAP_OBJECTS
2587 if (pg_sum.stats.sum.num_large_omap_objects) {
2588 list<string> detail;
2589 for (auto &pool : pools) {
2590 const string& pool_name = osdmap.get_pool_name(pool.first);
2591 auto it2 = pg_pool_sum.find(pool.first);
2592 if (it2 == pg_pool_sum.end()) {
2593 continue;
2594 }
2595 const pool_stat_t *pstat = &it2->second;
2596 if (pstat == nullptr) {
2597 continue;
2598 }
2599 const object_stat_sum_t& sum = pstat->stats.sum;
2600 if (sum.num_large_omap_objects) {
2601 stringstream ss;
2602 ss << sum.num_large_omap_objects << " large objects found in pool "
2603 << "'" << pool_name << "'";
2604 detail.push_back(ss.str());
2605 }
2606 }
2607 if (!detail.empty()) {
2608 ostringstream ss;
2609 ss << pg_sum.stats.sum.num_large_omap_objects << " large omap objects";
2610 auto& d = checks->add("LARGE_OMAP_OBJECTS", HEALTH_WARN, ss.str());
2611 stringstream tip;
2612 tip << "Search the cluster log for 'Large omap object found' for more "
2613 << "details.";
2614 detail.push_back(tip.str());
2615 d.detail.swap(detail);
2616 }
2617 }
2618
224ce89b
WB
2619 // CACHE_POOL_NEAR_FULL
2620 {
2621 list<string> detail;
2622 unsigned num_pools = 0;
2623 for (auto& p : pools) {
2624 if ((!p.second.target_max_objects && !p.second.target_max_bytes) ||
2625 !pg_pool_sum.count(p.first)) {
2626 continue;
2627 }
2628 bool nearfull = false;
2629 const string& name = osdmap.get_pool_name(p.first);
2630 const pool_stat_t& st = get_pg_pool_sum_stat(p.first);
2631 uint64_t ratio = p.second.cache_target_full_ratio_micro +
2632 ((1000000 - p.second.cache_target_full_ratio_micro) *
2633 cct->_conf->mon_cache_target_full_warn_ratio);
2634 if (p.second.target_max_objects &&
2635 (uint64_t)(st.stats.sum.num_objects -
2636 st.stats.sum.num_objects_hit_set_archive) >
2637 p.second.target_max_objects * (ratio / 1000000.0)) {
2638 ostringstream ss;
2639 ss << "cache pool '" << name << "' with "
1adf2230 2640 << si_u_t(st.stats.sum.num_objects)
224ce89b 2641 << " objects at/near target max "
1adf2230 2642 << si_u_t(p.second.target_max_objects) << " objects";
224ce89b
WB
2643 detail.push_back(ss.str());
2644 nearfull = true;
2645 }
2646 if (p.second.target_max_bytes &&
2647 (uint64_t)(st.stats.sum.num_bytes -
2648 st.stats.sum.num_bytes_hit_set_archive) >
2649 p.second.target_max_bytes * (ratio / 1000000.0)) {
2650 ostringstream ss;
2651 ss << "cache pool '" << name
1adf2230
AA
2652 << "' with " << byte_u_t(st.stats.sum.num_bytes)
2653 << " at/near target max "
2654 << byte_u_t(p.second.target_max_bytes);
224ce89b
WB
2655 detail.push_back(ss.str());
2656 nearfull = true;
2657 }
2658 if (nearfull) {
2659 ++num_pools;
2660 }
2661 }
2662 if (!detail.empty()) {
2663 ostringstream ss;
2664 ss << num_pools << " cache pools at or near target size";
2665 auto& d = checks->add("CACHE_POOL_NEAR_FULL", HEALTH_WARN, ss.str());
2666 d.detail.swap(detail);
2667 }
2668 }
2669
2670 // TOO_FEW_PGS
3efd9988
FG
2671 unsigned num_in = osdmap.get_num_in_osds();
2672 auto sum_pg_up = std::max(static_cast<size_t>(pg_sum.up), pg_stat.size());
2673 const auto min_pg_per_osd =
11fdf7f2 2674 cct->_conf.get_val<uint64_t>("mon_pg_warn_min_per_osd");
3efd9988
FG
2675 if (num_in && min_pg_per_osd > 0 && osdmap.get_pools().size() > 0) {
2676 auto per = sum_pg_up / num_in;
2677 if (per < min_pg_per_osd && per) {
224ce89b
WB
2678 ostringstream ss;
2679 ss << "too few PGs per OSD (" << per
3efd9988 2680 << " < min " << min_pg_per_osd << ")";
224ce89b
WB
2681 checks->add("TOO_FEW_PGS", HEALTH_WARN, ss.str());
2682 }
2683 }
2684
2685 // TOO_MANY_PGS
11fdf7f2 2686 auto max_pg_per_osd = cct->_conf.get_val<uint64_t>("mon_max_pg_per_osd");
3efd9988
FG
2687 if (num_in && max_pg_per_osd > 0) {
2688 auto per = sum_pg_up / num_in;
2689 if (per > max_pg_per_osd) {
224ce89b
WB
2690 ostringstream ss;
2691 ss << "too many PGs per OSD (" << per
3efd9988 2692 << " > max " << max_pg_per_osd << ")";
224ce89b
WB
2693 checks->add("TOO_MANY_PGS", HEALTH_WARN, ss.str());
2694 }
2695 }
2696
2697 // SMALLER_PGP_NUM
2698 // MANY_OBJECTS_PER_PG
2699 if (!pg_stat.empty()) {
2700 list<string> pgp_detail, many_detail;
b32b8144 2701 const auto mon_pg_warn_min_objects =
11fdf7f2 2702 cct->_conf.get_val<int64_t>("mon_pg_warn_min_objects");
b32b8144 2703 const auto mon_pg_warn_min_pool_objects =
11fdf7f2 2704 cct->_conf.get_val<int64_t>("mon_pg_warn_min_pool_objects");
b32b8144 2705 const auto mon_pg_warn_max_object_skew =
11fdf7f2 2706 cct->_conf.get_val<double>("mon_pg_warn_max_object_skew");
224ce89b
WB
2707 for (auto p = pg_pool_sum.begin();
2708 p != pg_pool_sum.end();
2709 ++p) {
2710 const pg_pool_t *pi = osdmap.get_pg_pool(p->first);
2711 if (!pi)
2712 continue; // in case osdmap changes haven't propagated to PGMap yet
2713 const string& name = osdmap.get_pool_name(p->first);
11fdf7f2
TL
2714 // NOTE: we use pg_num_target and pgp_num_target for the purposes of
2715 // the warnings. If the cluster is failing to converge on the target
2716 // values that is a separate issue!
2717 if (pi->get_pg_num_target() > pi->get_pgp_num_target() &&
224ce89b
WB
2718 !(name.find(".DELETED") != string::npos &&
2719 cct->_conf->mon_fake_pool_delete)) {
2720 ostringstream ss;
2721 ss << "pool " << name << " pg_num "
11fdf7f2
TL
2722 << pi->get_pg_num_target()
2723 << " > pgp_num " << pi->get_pgp_num_target();
224ce89b
WB
2724 pgp_detail.push_back(ss.str());
2725 }
2726 int average_objects_per_pg = pg_sum.stats.sum.num_objects / pg_stat.size();
2727 if (average_objects_per_pg > 0 &&
b32b8144
FG
2728 pg_sum.stats.sum.num_objects >= mon_pg_warn_min_objects &&
2729 p->second.stats.sum.num_objects >= mon_pg_warn_min_pool_objects) {
11fdf7f2
TL
2730 int objects_per_pg = p->second.stats.sum.num_objects /
2731 pi->get_pg_num_target();
224ce89b 2732 float ratio = (float)objects_per_pg / (float)average_objects_per_pg;
b32b8144
FG
2733 if (mon_pg_warn_max_object_skew > 0 &&
2734 ratio > mon_pg_warn_max_object_skew) {
224ce89b
WB
2735 ostringstream ss;
2736 ss << "pool " << name << " objects per pg ("
2737 << objects_per_pg << ") is more than " << ratio
2738 << " times cluster average ("
2739 << average_objects_per_pg << ")";
2740 many_detail.push_back(ss.str());
2741 }
2742 }
2743 }
2744 if (!pgp_detail.empty()) {
2745 ostringstream ss;
2746 ss << pgp_detail.size() << " pools have pg_num > pgp_num";
2747 auto& d = checks->add("SMALLER_PGP_NUM", HEALTH_WARN, ss.str());
2748 d.detail.swap(pgp_detail);
2749 }
2750 if (!many_detail.empty()) {
2751 ostringstream ss;
2752 ss << many_detail.size() << " pools have many more objects per pg than"
2753 << " average";
2754 auto& d = checks->add("MANY_OBJECTS_PER_PG", HEALTH_WARN, ss.str());
2755 d.detail.swap(many_detail);
2756 }
2757 }
2758
2759 // POOL_FULL
2760 // POOL_NEAR_FULL
2761 {
11fdf7f2
TL
2762 float warn_threshold = (float)g_conf().get_val<int64_t>("mon_pool_quota_warn_threshold")/100;
2763 float crit_threshold = (float)g_conf().get_val<int64_t>("mon_pool_quota_crit_threshold")/100;
224ce89b
WB
2764 list<string> full_detail, nearfull_detail;
2765 unsigned full_pools = 0, nearfull_pools = 0;
2766 for (auto it : pools) {
2767 auto it2 = pg_pool_sum.find(it.first);
2768 if (it2 == pg_pool_sum.end()) {
2769 continue;
2770 }
2771 const pool_stat_t *pstat = &it2->second;
2772 const object_stat_sum_t& sum = pstat->stats.sum;
2773 const string& pool_name = osdmap.get_pool_name(it.first);
2774 const pg_pool_t &pool = it.second;
2775 bool full = false, nearfull = false;
2776 if (pool.quota_max_objects > 0) {
2777 stringstream ss;
2778 if ((uint64_t)sum.num_objects >= pool.quota_max_objects) {
2779 } else if (crit_threshold > 0 &&
2780 sum.num_objects >= pool.quota_max_objects*crit_threshold) {
2781 ss << "pool '" << pool_name
2782 << "' has " << sum.num_objects << " objects"
2783 << " (max " << pool.quota_max_objects << ")";
2784 full_detail.push_back(ss.str());
2785 full = true;
2786 } else if (warn_threshold > 0 &&
2787 sum.num_objects >= pool.quota_max_objects*warn_threshold) {
2788 ss << "pool '" << pool_name
2789 << "' has " << sum.num_objects << " objects"
2790 << " (max " << pool.quota_max_objects << ")";
2791 nearfull_detail.push_back(ss.str());
2792 nearfull = true;
2793 }
2794 }
2795 if (pool.quota_max_bytes > 0) {
2796 stringstream ss;
2797 if ((uint64_t)sum.num_bytes >= pool.quota_max_bytes) {
2798 } else if (crit_threshold > 0 &&
2799 sum.num_bytes >= pool.quota_max_bytes*crit_threshold) {
2800 ss << "pool '" << pool_name
1adf2230
AA
2801 << "' has " << byte_u_t(sum.num_bytes)
2802 << " (max " << byte_u_t(pool.quota_max_bytes) << ")";
224ce89b
WB
2803 full_detail.push_back(ss.str());
2804 full = true;
2805 } else if (warn_threshold > 0 &&
2806 sum.num_bytes >= pool.quota_max_bytes*warn_threshold) {
2807 ss << "pool '" << pool_name
1adf2230
AA
2808 << "' has " << byte_u_t(sum.num_bytes)
2809 << " (max " << byte_u_t(pool.quota_max_bytes) << ")";
224ce89b
WB
2810 nearfull_detail.push_back(ss.str());
2811 nearfull = true;
2812 }
2813 }
2814 if (full) {
2815 ++full_pools;
2816 }
2817 if (nearfull) {
2818 ++nearfull_pools;
2819 }
2820 }
2821 if (full_pools) {
2822 ostringstream ss;
2823 ss << full_pools << " pools full";
2824 auto& d = checks->add("POOL_FULL", HEALTH_ERR, ss.str());
2825 d.detail.swap(full_detail);
2826 }
2827 if (nearfull_pools) {
2828 ostringstream ss;
11fdf7f2 2829 ss << nearfull_pools << " pools nearfull";
224ce89b
WB
2830 auto& d = checks->add("POOL_NEAR_FULL", HEALTH_WARN, ss.str());
2831 d.detail.swap(nearfull_detail);
2832 }
2833 }
2834
2835 // OBJECT_MISPLACED
2836 if (pg_sum.stats.sum.num_objects_misplaced &&
11fdf7f2
TL
2837 pg_sum.stats.sum.num_object_copies > 0 &&
2838 cct->_conf->mon_warn_on_misplaced) {
224ce89b
WB
2839 double pc = (double)pg_sum.stats.sum.num_objects_misplaced /
2840 (double)pg_sum.stats.sum.num_object_copies * (double)100.0;
2841 char b[20];
2842 snprintf(b, sizeof(b), "%.3lf", pc);
2843 ostringstream ss;
2844 ss << pg_sum.stats.sum.num_objects_misplaced
2845 << "/" << pg_sum.stats.sum.num_object_copies << " objects misplaced ("
2846 << b << "%)";
2847 checks->add("OBJECT_MISPLACED", HEALTH_WARN, ss.str());
2848 }
2849
2850 // OBJECT_UNFOUND
2851 if (pg_sum.stats.sum.num_objects_unfound &&
2852 pg_sum.stats.sum.num_objects) {
2853 double pc = (double)pg_sum.stats.sum.num_objects_unfound /
2854 (double)pg_sum.stats.sum.num_objects * (double)100.0;
2855 char b[20];
2856 snprintf(b, sizeof(b), "%.3lf", pc);
2857 ostringstream ss;
2858 ss << pg_sum.stats.sum.num_objects_unfound
b5b8bbf5 2859 << "/" << pg_sum.stats.sum.num_objects << " objects unfound (" << b << "%)";
c07f9fc5
FG
2860 auto& d = checks->add("OBJECT_UNFOUND", HEALTH_WARN, ss.str());
2861
2862 for (auto& p : pg_stat) {
2863 if (p.second.stats.sum.num_objects_unfound) {
2864 ostringstream ss;
2865 ss << "pg " << p.first
2866 << " has " << p.second.stats.sum.num_objects_unfound
2867 << " unfound objects";
2868 d.detail.push_back(ss.str());
2869 if (d.detail.size() > max) {
2870 d.detail.push_back("(additional pgs left out for brevity)");
2871 break;
2872 }
2873 }
2874 }
224ce89b
WB
2875 }
2876
2877 // REQUEST_SLOW
2878 // REQUEST_STUCK
11fdf7f2
TL
2879 // SLOW_OPS unifies them in mimic.
2880 if (osdmap.require_osd_release < CEPH_RELEASE_MIMIC &&
2881 cct->_conf->mon_osd_warn_op_age > 0 &&
c07f9fc5
FG
2882 !osd_sum.op_queue_age_hist.h.empty() &&
2883 osd_sum.op_queue_age_hist.upper_bound() / 1000.0 >
224ce89b
WB
2884 cct->_conf->mon_osd_warn_op_age) {
2885 list<string> warn_detail, error_detail;
2886 unsigned warn = 0, error = 0;
2887 float err_age =
2888 cct->_conf->mon_osd_warn_op_age * cct->_conf->mon_osd_err_op_age_ratio;
2889 const pow2_hist_t& h = osd_sum.op_queue_age_hist;
2890 for (unsigned i = h.h.size() - 1; i > 0; --i) {
2891 float ub = (float)(1 << i) / 1000.0;
2892 if (ub < cct->_conf->mon_osd_warn_op_age)
2893 break;
2894 if (h.h[i]) {
2895 ostringstream ss;
2896 ss << h.h[i] << " ops are blocked > " << ub << " sec";
2897 if (ub > err_age) {
2898 error += h.h[i];
2899 error_detail.push_back(ss.str());
2900 } else {
2901 warn += h.h[i];
2902 warn_detail.push_back(ss.str());
2903 }
2904 }
2905 }
2906
2907 map<float,set<int>> warn_osd_by_max; // max -> osds
2908 map<float,set<int>> error_osd_by_max; // max -> osds
2909 if (!warn_detail.empty() || !error_detail.empty()) {
2910 for (auto& p : osd_stat) {
2911 const pow2_hist_t& h = p.second.op_queue_age_hist;
2912 for (unsigned i = h.h.size() - 1; i > 0; --i) {
2913 float ub = (float)(1 << i) / 1000.0;
2914 if (ub < cct->_conf->mon_osd_warn_op_age)
2915 break;
2916 if (h.h[i]) {
2917 if (ub > err_age) {
2918 error_osd_by_max[ub].insert(p.first);
2919 } else {
2920 warn_osd_by_max[ub].insert(p.first);
2921 }
2922 break;
2923 }
2924 }
2925 }
2926 }
2927
2928 if (!warn_detail.empty()) {
11fdf7f2
TL
2929 ostringstream ss;
2930 ss << warn << " slow requests are blocked > "
2931 << cct->_conf->mon_osd_warn_op_age << " sec";
2932 auto& d = checks->add("REQUEST_SLOW", HEALTH_WARN, ss.str());
2933 d.detail.swap(warn_detail);
224ce89b
WB
2934 int left = max;
2935 for (auto& p : warn_osd_by_max) {
2936 ostringstream ss;
2937 if (p.second.size() > 1) {
c07f9fc5
FG
2938 ss << "osds " << p.second
2939 << " have blocked requests > " << p.first << " sec";
224ce89b 2940 } else {
c07f9fc5
FG
2941 ss << "osd." << *p.second.begin()
2942 << " has blocked requests > " << p.first << " sec";
224ce89b 2943 }
11fdf7f2 2944 d.detail.push_back(ss.str());
224ce89b
WB
2945 if (--left == 0) {
2946 break;
2947 }
2948 }
2949 }
2950 if (!error_detail.empty()) {
11fdf7f2
TL
2951 ostringstream ss;
2952 ss << error << " stuck requests are blocked > "
2953 << err_age << " sec";
2954 auto& d = checks->add("REQUEST_STUCK", HEALTH_ERR, ss.str());
2955 d.detail.swap(error_detail);
224ce89b
WB
2956 int left = max;
2957 for (auto& p : error_osd_by_max) {
2958 ostringstream ss;
2959 if (p.second.size() > 1) {
c07f9fc5
FG
2960 ss << "osds " << p.second
2961 << " have stuck requests > " << p.first << " sec";
224ce89b 2962 } else {
c07f9fc5
FG
2963 ss << "osd." << *p.second.begin()
2964 << " has stuck requests > " << p.first << " sec";
224ce89b 2965 }
11fdf7f2 2966 d.detail.push_back(ss.str());
224ce89b
WB
2967 if (--left == 0) {
2968 break;
2969 }
2970 }
2971 }
2972 }
7c673cae 2973
11fdf7f2
TL
2974 // OBJECT_STORE_WARN
2975 if (osd_sum.os_alerts.size()) {
2976 map<string, pair<size_t, list<string>>> os_alerts_sum;
2977
2978 for (auto& a : osd_sum.os_alerts) {
2979 int left = max;
2980 string s0 = " osd.";
2981 s0 += stringify(a.first);
2982 for (auto& aa : a.second) {
2983 string s(s0);
2984 s += " ";
2985 s += aa.second;
2986 auto it = os_alerts_sum.find(aa.first);
2987 if (it == os_alerts_sum.end()) {
2988 list<string> d;
2989 d.emplace_back(s);
2990 os_alerts_sum.emplace(aa.first, std::make_pair(1, d));
2991 } else {
2992 auto& p = it->second;
2993 ++p.first;
2994 p.second.emplace_back(s);
2995 }
2996 if (--left == 0) {
2997 break;
2998 }
2999 }
3000 }
3001
3002 for (auto& asum : os_alerts_sum) {
3003 string summary;
3004 if (asum.first == "BLUEFS_SPILLOVER") {
3005 summary = "BlueFS spillover detected";
3006 } else if (asum.first == "BLUESTORE_NO_COMPRESSION") {
3007 summary = "BlueStore compression broken";
81eedcae
TL
3008 } else if (asum.first == "BLUESTORE_LEGACY_STATFS") {
3009 summary = "Legacy BlueStore stats reporting detected";
3010 } else if (asum.first == "BLUESTORE_DISK_SIZE_MISMATCH") {
3011 summary = "BlueStore has dangerous mismatch between block device and free list sizes";
11fdf7f2
TL
3012 }
3013 summary += " on ";
3014 summary += stringify(asum.second.first);
3015 summary += " OSD(s)";
3016 auto& d = checks->add(asum.first, HEALTH_WARN, summary);
3017 for (auto& s : asum.second.second) {
3018 d.detail.push_back(s);
3019 }
3020 }
3021 }
224ce89b
WB
3022 // PG_NOT_SCRUBBED
3023 // PG_NOT_DEEP_SCRUBBED
11fdf7f2
TL
3024 if (cct->_conf->mon_warn_pg_not_scrubbed_ratio ||
3025 cct->_conf->mon_warn_pg_not_deep_scrubbed_ratio) {
a8e16298
TL
3026 list<string> detail, deep_detail;
3027 int detail_max = max, deep_detail_max = max;
3028 int detail_more = 0, deep_detail_more = 0;
3029 int detail_total = 0, deep_detail_total = 0;
3030 for (auto& p : pg_stat) {
3031 int64_t pnum = p.first.pool();
3032 auto pool = osdmap.get_pg_pool(pnum);
3033 if (!pool)
3034 continue;
11fdf7f2 3035 if (cct->_conf->mon_warn_pg_not_scrubbed_ratio) {
a8e16298
TL
3036 double scrub_max_interval = 0;
3037 pool->opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &scrub_max_interval);
3038 if (scrub_max_interval <= 0) {
3039 scrub_max_interval = cct->_conf->osd_scrub_max_interval;
c07f9fc5 3040 }
11fdf7f2 3041 const double age = (cct->_conf->mon_warn_pg_not_scrubbed_ratio * scrub_max_interval) +
a8e16298
TL
3042 scrub_max_interval;
3043 utime_t cutoff = now;
3044 cutoff -= age;
3045 if (p.second.last_scrub_stamp < cutoff) {
3046 if (detail_max > 0) {
3047 ostringstream ss;
3048 ss << "pg " << p.first << " not scrubbed since "
3049 << p.second.last_scrub_stamp;
3050 detail.push_back(ss.str());
3051 --detail_max;
3052 } else {
3053 ++detail_more;
3054 }
3055 ++detail_total;
3056 }
3057 }
11fdf7f2 3058 if (cct->_conf->mon_warn_pg_not_deep_scrubbed_ratio) {
a8e16298
TL
3059 double deep_scrub_interval = 0;
3060 pool->opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &deep_scrub_interval);
3061 if (deep_scrub_interval <= 0) {
3062 deep_scrub_interval = cct->_conf->osd_deep_scrub_interval;
3063 }
11fdf7f2 3064 double deep_age = (cct->_conf->mon_warn_pg_not_deep_scrubbed_ratio * deep_scrub_interval) +
a8e16298
TL
3065 deep_scrub_interval;
3066 utime_t deep_cutoff = now;
3067 deep_cutoff -= deep_age;
3068 if (p.second.last_deep_scrub_stamp < deep_cutoff) {
3069 if (deep_detail_max > 0) {
3070 ostringstream ss;
3071 ss << "pg " << p.first << " not deep-scrubbed since "
3072 << p.second.last_deep_scrub_stamp;
3073 deep_detail.push_back(ss.str());
3074 --deep_detail_max;
3075 } else {
3076 ++deep_detail_more;
3077 }
3078 ++deep_detail_total;
c07f9fc5 3079 }
224ce89b 3080 }
a8e16298
TL
3081 }
3082 if (detail_total) {
3083 ostringstream ss;
3084 ss << detail_total << " pgs not scrubbed in time";
3085 auto& d = checks->add("PG_NOT_SCRUBBED", HEALTH_WARN, ss.str());
3086
c07f9fc5 3087 if (!detail.empty()) {
c07f9fc5 3088 d.detail.swap(detail);
a8e16298
TL
3089
3090 if (detail_more) {
3091 ostringstream ss;
3092 ss << detail_more << " more pgs... ";
3093 d.detail.push_back(ss.str());
3094 }
c07f9fc5 3095 }
a8e16298
TL
3096 }
3097 if (deep_detail_total) {
3098 ostringstream ss;
3099 ss << deep_detail_total << " pgs not deep-scrubbed in time";
3100 auto& d = checks->add("PG_NOT_DEEP_SCRUBBED", HEALTH_WARN, ss.str());
3101
c07f9fc5 3102 if (!deep_detail.empty()) {
c07f9fc5 3103 d.detail.swap(deep_detail);
a8e16298
TL
3104
3105 if (deep_detail_more) {
3106 ostringstream ss;
3107 ss << deep_detail_more << " more pgs... ";
3108 d.detail.push_back(ss.str());
3109 }
c07f9fc5
FG
3110 }
3111 }
3112 }
3113
3114 // POOL_APP
11fdf7f2 3115 if (g_conf().get_val<bool>("mon_warn_on_pool_no_app")) {
c07f9fc5
FG
3116 list<string> detail;
3117 for (auto &it : pools) {
3118 const pg_pool_t &pool = it.second;
3119 const string& pool_name = osdmap.get_pool_name(it.first);
3120 auto it2 = pg_pool_sum.find(it.first);
3121 if (it2 == pg_pool_sum.end()) {
3122 continue;
3123 }
3124 const pool_stat_t *pstat = &it2->second;
3125 if (pstat == nullptr) {
3126 continue;
3127 }
3128 const object_stat_sum_t& sum = pstat->stats.sum;
3129 // application metadata is not encoded until luminous is minimum
3130 // required release
11fdf7f2
TL
3131 if (sum.num_objects > 0 && pool.application_metadata.empty() &&
3132 !pool.is_tier()) {
c07f9fc5
FG
3133 stringstream ss;
3134 ss << "application not enabled on pool '" << pool_name << "'";
3135 detail.push_back(ss.str());
224ce89b
WB
3136 }
3137 }
3138 if (!detail.empty()) {
3139 ostringstream ss;
c07f9fc5
FG
3140 ss << "application not enabled on " << detail.size() << " pool(s)";
3141 auto& d = checks->add("POOL_APP_NOT_ENABLED", HEALTH_WARN, ss.str());
3142 stringstream tip;
3143 tip << "use 'ceph osd pool application enable <pool-name> "
3144 << "<app-name>', where <app-name> is 'cephfs', 'rbd', 'rgw', "
3145 << "or freeform for custom applications.";
3146 detail.push_back(tip.str());
224ce89b
WB
3147 d.detail.swap(detail);
3148 }
31f18b77 3149 }
b32b8144
FG
3150
3151 // PG_SLOW_SNAP_TRIMMING
3152 if (!pg_stat.empty() && cct->_conf->mon_osd_snap_trim_queue_warn_on > 0) {
3153 uint32_t snapthreshold = cct->_conf->mon_osd_snap_trim_queue_warn_on;
3154 uint64_t snaptrimq_exceeded = 0;
3155 uint32_t longest_queue = 0;
3156 const pg_t* longest_q_pg = nullptr;
3157 list<string> detail;
3158
3159 for (auto& i: pg_stat) {
3160 uint32_t current_len = i.second.snaptrimq_len;
3161 if (current_len >= snapthreshold) {
3162 snaptrimq_exceeded++;
3163 if (longest_queue <= current_len) {
3164 longest_q_pg = &i.first;
3165 longest_queue = current_len;
3166 }
3167 if (detail.size() < max - 1) {
3168 stringstream ss;
3169 ss << "snap trim queue for pg " << i.first << " at " << current_len;
3170 detail.push_back(ss.str());
3171 continue;
3172 }
3173 if (detail.size() < max) {
3174 detail.push_back("...more pgs affected");
3175 continue;
3176 }
3177 }
3178 }
3179
3180 if (snaptrimq_exceeded) {
3181 {
3182 ostringstream ss;
3183 ss << "longest queue on pg " << *longest_q_pg << " at " << longest_queue;
3184 detail.push_back(ss.str());
3185 }
3186
3187 stringstream ss;
3188 ss << "snap trim queue for " << snaptrimq_exceeded << " pg(s) >= " << snapthreshold << " (mon_osd_snap_trim_queue_warn_on)";
3189 auto& d = checks->add("PG_SLOW_SNAP_TRIMMING", HEALTH_WARN, ss.str());
3190 detail.push_back("try decreasing \"osd snap trim sleep\" and/or increasing \"osd pg max concurrent snap trims\".");
3191 d.detail.swap(detail);
3192 }
3193 }
31f18b77 3194}
7c673cae 3195
7c673cae
FG
3196int process_pg_map_command(
3197 const string& orig_prefix,
11fdf7f2 3198 const cmdmap_t& orig_cmdmap,
7c673cae
FG
3199 const PGMap& pg_map,
3200 const OSDMap& osdmap,
3201 Formatter *f,
3202 stringstream *ss,
3203 bufferlist *odata)
3204{
3205 string prefix = orig_prefix;
11fdf7f2
TL
3206 auto cmdmap = orig_cmdmap;
3207
3208 string omap_stats_note =
3209 "\n* NOTE: Omap statistics are gathered during deep scrub and "
3210 "may be inaccurate soon afterwards depending on utilisation. See "
3211 "http://docs.ceph.com/docs/master/dev/placement-group/#omap-statistics "
3212 "for further details.\n";
3213 bool omap_stats_note_required = false;
7c673cae
FG
3214
3215 // perhaps these would be better in the parsing, but it's weird
3216 bool primary = false;
3217 if (prefix == "pg dump_json") {
3218 vector<string> v;
3219 v.push_back(string("all"));
3220 cmd_putval(g_ceph_context, cmdmap, "format", string("json"));
3221 cmd_putval(g_ceph_context, cmdmap, "dumpcontents", v);
3222 prefix = "pg dump";
3223 } else if (prefix == "pg dump_pools_json") {
3224 vector<string> v;
3225 v.push_back(string("pools"));
3226 cmd_putval(g_ceph_context, cmdmap, "format", string("json"));
3227 cmd_putval(g_ceph_context, cmdmap, "dumpcontents", v);
3228 prefix = "pg dump";
3229 } else if (prefix == "pg ls-by-primary") {
3230 primary = true;
3231 prefix = "pg ls";
3232 } else if (prefix == "pg ls-by-osd") {
3233 prefix = "pg ls";
3234 } else if (prefix == "pg ls-by-pool") {
3235 prefix = "pg ls";
3236 string poolstr;
3237 cmd_getval(g_ceph_context, cmdmap, "poolstr", poolstr);
3238 int64_t pool = osdmap.lookup_pg_pool_name(poolstr.c_str());
3239 if (pool < 0) {
3240 *ss << "pool " << poolstr << " does not exist";
3241 return -ENOENT;
3242 }
3243 cmd_putval(g_ceph_context, cmdmap, "pool", pool);
3244 }
3245
7c673cae
FG
3246 stringstream ds;
3247 if (prefix == "pg stat") {
3248 if (f) {
3249 f->open_object_section("pg_summary");
3250 pg_map.print_oneline_summary(f, NULL);
3251 f->close_section();
3252 f->flush(ds);
3253 } else {
3254 ds << pg_map;
3255 }
3256 odata->append(ds);
3257 return 0;
3258 }
3259
3260 if (prefix == "pg getmap") {
3261 pg_map.encode(*odata);
3262 *ss << "got pgmap version " << pg_map.version;
3263 return 0;
3264 }
3265
3266 if (prefix == "pg dump") {
3267 string val;
3268 vector<string> dumpcontents;
3269 set<string> what;
3270 if (cmd_getval(g_ceph_context, cmdmap, "dumpcontents", dumpcontents)) {
3271 copy(dumpcontents.begin(), dumpcontents.end(),
3272 inserter(what, what.end()));
3273 }
3274 if (what.empty())
3275 what.insert("all");
3276 if (f) {
3277 if (what.count("all")) {
3278 f->open_object_section("pg_map");
3279 pg_map.dump(f);
3280 f->close_section();
3281 } else if (what.count("summary") || what.count("sum")) {
3282 f->open_object_section("pg_map");
3283 pg_map.dump_basic(f);
3284 f->close_section();
3285 } else {
3286 if (what.count("pools")) {
3287 pg_map.dump_pool_stats(f);
3288 }
3289 if (what.count("osds")) {
3290 pg_map.dump_osd_stats(f);
3291 }
3292 if (what.count("pgs")) {
3293 pg_map.dump_pg_stats(f, false);
3294 }
3295 if (what.count("pgs_brief")) {
3296 pg_map.dump_pg_stats(f, true);
3297 }
3298 if (what.count("delta")) {
3299 f->open_object_section("delta");
3300 pg_map.dump_delta(f);
3301 f->close_section();
3302 }
3303 }
3304 f->flush(*odata);
3305 } else {
3306 if (what.count("all")) {
3307 pg_map.dump(ds);
11fdf7f2 3308 omap_stats_note_required = true;
7c673cae
FG
3309 } else if (what.count("summary") || what.count("sum")) {
3310 pg_map.dump_basic(ds);
3311 pg_map.dump_pg_sum_stats(ds, true);
3312 pg_map.dump_osd_sum_stats(ds);
11fdf7f2 3313 omap_stats_note_required = true;
7c673cae
FG
3314 } else {
3315 if (what.count("pgs_brief")) {
3316 pg_map.dump_pg_stats(ds, true);
3317 }
3318 bool header = true;
3319 if (what.count("pgs")) {
3320 pg_map.dump_pg_stats(ds, false);
3321 header = false;
11fdf7f2 3322 omap_stats_note_required = true;
7c673cae
FG
3323 }
3324 if (what.count("pools")) {
3325 pg_map.dump_pool_stats(ds, header);
11fdf7f2 3326 omap_stats_note_required = true;
7c673cae
FG
3327 }
3328 if (what.count("osds")) {
3329 pg_map.dump_osd_stats(ds);
3330 }
3331 }
3332 odata->append(ds);
11fdf7f2
TL
3333 if (omap_stats_note_required) {
3334 odata->append(omap_stats_note);
3335 }
7c673cae
FG
3336 }
3337 *ss << "dumped " << what;
3338 return 0;
3339 }
3340
3341 if (prefix == "pg ls") {
3342 int64_t osd = -1;
3343 int64_t pool = -1;
3344 vector<string>states;
3345 set<pg_t> pgs;
3346 cmd_getval(g_ceph_context, cmdmap, "pool", pool);
3347 cmd_getval(g_ceph_context, cmdmap, "osd", osd);
3348 cmd_getval(g_ceph_context, cmdmap, "states", states);
3349 if (pool >= 0 && !osdmap.have_pg_pool(pool)) {
3350 *ss << "pool " << pool << " does not exist";
3351 return -ENOENT;
3352 }
3353 if (osd >= 0 && !osdmap.is_up(osd)) {
3354 *ss << "osd " << osd << " is not up";
3355 return -EAGAIN;
3356 }
3357 if (states.empty())
3358 states.push_back("all");
3359
11fdf7f2 3360 uint64_t state = 0;
7c673cae
FG
3361
3362 while (!states.empty()) {
3363 string state_str = states.back();
3364
3365 if (state_str == "all") {
3366 state = -1;
3367 break;
3368 } else {
3efd9988
FG
3369 auto filter = pg_string_state(state_str);
3370 if (!filter) {
c07f9fc5
FG
3371 *ss << "'" << state_str << "' is not a valid pg state,"
3372 << " available choices: " << pg_state_string(0xFFFFFFFF);
3373 return -EINVAL;
3374 }
3efd9988 3375 state |= *filter;
7c673cae
FG
3376 }
3377
3378 states.pop_back();
3379 }
3380
3381 pg_map.get_filtered_pg_stats(state, pool, osd, primary, pgs);
3382
3383 if (f && !pgs.empty()) {
3384 pg_map.dump_filtered_pg_stats(f, pgs);
3385 f->flush(*odata);
3386 } else if (!pgs.empty()) {
3387 pg_map.dump_filtered_pg_stats(ds, pgs);
3388 odata->append(ds);
11fdf7f2 3389 odata->append(omap_stats_note);
7c673cae
FG
3390 }
3391 return 0;
3392 }
3393
3394 if (prefix == "pg dump_stuck") {
3395 vector<string> stuckop_vec;
3396 cmd_getval(g_ceph_context, cmdmap, "stuckops", stuckop_vec);
3397 if (stuckop_vec.empty())
3398 stuckop_vec.push_back("unclean");
3399 int64_t threshold;
3400 cmd_getval(g_ceph_context, cmdmap, "threshold", threshold,
11fdf7f2 3401 g_conf().get_val<int64_t>("mon_pg_stuck_threshold"));
7c673cae 3402
11fdf7f2 3403 if (pg_map.dump_stuck_pg_stats(ds, f, (int)threshold, stuckop_vec) < 0) {
7c673cae 3404 *ss << "failed";
11fdf7f2 3405 } else {
7c673cae 3406 *ss << "ok";
11fdf7f2
TL
3407 }
3408 odata->append(ds);
7c673cae
FG
3409 return 0;
3410 }
3411
3412 if (prefix == "pg debug") {
3413 string debugop;
3414 cmd_getval(g_ceph_context, cmdmap, "debugop", debugop,
3415 string("unfound_objects_exist"));
3416 if (debugop == "unfound_objects_exist") {
3417 bool unfound_objects_exist = false;
3418 for (const auto& p : pg_map.pg_stat) {
3419 if (p.second.stats.sum.num_objects_unfound > 0) {
3420 unfound_objects_exist = true;
3421 break;
3422 }
3423 }
3424 if (unfound_objects_exist)
3425 ds << "TRUE";
3426 else
3427 ds << "FALSE";
3428 odata->append(ds);
3429 return 0;
3430 }
3431 if (debugop == "degraded_pgs_exist") {
3432 bool degraded_pgs_exist = false;
3433 for (const auto& p : pg_map.pg_stat) {
3434 if (p.second.stats.sum.num_objects_degraded > 0) {
3435 degraded_pgs_exist = true;
3436 break;
3437 }
3438 }
3439 if (degraded_pgs_exist)
3440 ds << "TRUE";
3441 else
3442 ds << "FALSE";
3443 odata->append(ds);
3444 return 0;
3445 }
3446 }
3447
3448 if (prefix == "osd perf") {
3449 if (f) {
3450 f->open_object_section("osdstats");
3451 pg_map.dump_osd_perf_stats(f);
3452 f->close_section();
3453 f->flush(ds);
3454 } else {
3455 pg_map.print_osd_perf_stats(&ds);
3456 }
3457 odata->append(ds);
3458 return 0;
3459 }
3460
3461 if (prefix == "osd blocked-by") {
3462 if (f) {
3463 f->open_object_section("osd_blocked_by");
3464 pg_map.dump_osd_blocked_by_stats(f);
3465 f->close_section();
3466 f->flush(ds);
3467 } else {
3468 pg_map.print_osd_blocked_by_stats(&ds);
3469 }
3470 odata->append(ds);
3471 return 0;
3472 }
3473
7c673cae
FG
3474 return -EOPNOTSUPP;
3475}
3476
31f18b77
FG
3477void PGMapUpdater::check_osd_map(
3478 CephContext *cct,
3479 const OSDMap& osdmap,
3480 const PGMap& pgmap,
3481 PGMap::Incremental *pending_inc)
3482{
3483 for (auto& p : pgmap.osd_stat) {
3484 if (!osdmap.exists(p.first)) {
3485 // remove osd_stat
3486 pending_inc->rm_stat(p.first);
3487 } else if (osdmap.is_out(p.first)) {
3488 // zero osd_stat
11fdf7f2
TL
3489 if (p.second.statfs.total != 0) {
3490 pending_inc->stat_osd_out(p.first);
31f18b77
FG
3491 }
3492 } else if (!osdmap.is_up(p.first)) {
3493 // zero the op_queue_age_hist
3494 if (!p.second.op_queue_age_hist.empty()) {
11fdf7f2 3495 pending_inc->stat_osd_down_up(p.first, pgmap);
31f18b77
FG
3496 }
3497 }
3498 }
3499
3500 // deleted pgs (pools)?
3501 for (auto& p : pgmap.pg_pool_sum) {
3502 if (!osdmap.have_pg_pool(p.first)) {
3503 ldout(cct, 10) << __func__ << " pool " << p.first << " gone, removing pgs"
3504 << dendl;
3505 for (auto& q : pgmap.pg_stat) {
11fdf7f2 3506 if (q.first.pool() == p.first) {
31f18b77
FG
3507 pending_inc->pg_remove.insert(q.first);
3508 }
3509 }
3510 auto q = pending_inc->pg_stat_updates.begin();
3511 while (q != pending_inc->pg_stat_updates.end()) {
11fdf7f2 3512 if (q->first.pool() == p.first) {
31f18b77
FG
3513 q = pending_inc->pg_stat_updates.erase(q);
3514 } else {
3515 ++q;
3516 }
3517 }
3518 }
3519 }
3520
11fdf7f2
TL
3521 // new (split or new pool) or merged pgs?
3522 map<int64_t,unsigned> new_pg_num;
31f18b77
FG
3523 for (auto& p : osdmap.get_pools()) {
3524 int64_t poolid = p.first;
3525 const pg_pool_t& pi = p.second;
3526 auto q = pgmap.num_pg_by_pool.find(poolid);
3527 unsigned my_pg_num = 0;
3528 if (q != pgmap.num_pg_by_pool.end())
3529 my_pg_num = q->second;
3530 unsigned pg_num = pi.get_pg_num();
11fdf7f2
TL
3531 new_pg_num[poolid] = pg_num;
3532 if (my_pg_num < pg_num) {
224ce89b 3533 ldout(cct,10) << __func__ << " pool " << poolid << " pg_num " << pg_num
11fdf7f2 3534 << " > my pg_num " << my_pg_num << dendl;
31f18b77
FG
3535 for (unsigned ps = my_pg_num; ps < pg_num; ++ps) {
3536 pg_t pgid(ps, poolid);
3537 if (pending_inc->pg_stat_updates.count(pgid) == 0) {
224ce89b 3538 ldout(cct,20) << __func__ << " adding " << pgid << dendl;
31f18b77
FG
3539 pg_stat_t &stats = pending_inc->pg_stat_updates[pgid];
3540 stats.last_fresh = osdmap.get_modified();
3541 stats.last_active = osdmap.get_modified();
3542 stats.last_change = osdmap.get_modified();
3543 stats.last_peered = osdmap.get_modified();
3544 stats.last_clean = osdmap.get_modified();
3545 stats.last_unstale = osdmap.get_modified();
3546 stats.last_undegraded = osdmap.get_modified();
3547 stats.last_fullsized = osdmap.get_modified();
3548 stats.last_scrub_stamp = osdmap.get_modified();
3549 stats.last_deep_scrub_stamp = osdmap.get_modified();
3550 stats.last_clean_scrub_stamp = osdmap.get_modified();
3551 }
3552 }
11fdf7f2
TL
3553 } else if (my_pg_num > pg_num) {
3554 ldout(cct,10) << __func__ << " pool " << poolid << " pg_num " << pg_num
3555 << " < my pg_num " << my_pg_num << dendl;
3556 for (unsigned i = pg_num; i < my_pg_num; ++i) {
3557 pg_t pgid(i, poolid);
3558 ldout(cct,20) << __func__ << " removing merged " << pgid << dendl;
3559 if (pgmap.pg_stat.count(pgid)) {
3560 pending_inc->pg_remove.insert(pgid);
3561 }
3562 pending_inc->pg_stat_updates.erase(pgid);
7c673cae 3563 }
7c673cae
FG
3564 }
3565 }
11fdf7f2
TL
3566 auto i = pending_inc->pg_stat_updates.begin();
3567 while (i != pending_inc->pg_stat_updates.end()) {
3568 auto j = new_pg_num.find(i->first.pool());
3569 if (j == new_pg_num.end() ||
3570 i->first.ps() >= j->second) {
3571 ldout(cct,20) << __func__ << " removing pending update to old "
3572 << i->first << dendl;
3573 i = pending_inc->pg_stat_updates.erase(i);
3574 } else {
3575 ++i;
7c673cae
FG
3576 }
3577 }
7c673cae
FG
3578}
3579
3580static void _try_mark_pg_stale(
3581 const OSDMap& osdmap,
3582 pg_t pgid,
3583 const pg_stat_t& cur,
3584 PGMap::Incremental *pending_inc)
3585{
3586 if ((cur.state & PG_STATE_STALE) == 0 &&
3587 cur.acting_primary != -1 &&
3588 osdmap.is_down(cur.acting_primary)) {
3589 pg_stat_t *newstat;
3590 auto q = pending_inc->pg_stat_updates.find(pgid);
3591 if (q != pending_inc->pg_stat_updates.end()) {
3592 if ((q->second.acting_primary == cur.acting_primary) ||
3593 ((q->second.state & PG_STATE_STALE) == 0 &&
3594 q->second.acting_primary != -1 &&
3595 osdmap.is_down(q->second.acting_primary))) {
3596 newstat = &q->second;
3597 } else {
3598 // pending update is no longer down or already stale
3599 return;
3600 }
3601 } else {
3602 newstat = &pending_inc->pg_stat_updates[pgid];
3603 *newstat = cur;
3604 }
3605 dout(10) << __func__ << " marking pg " << pgid
3606 << " stale (acting_primary " << newstat->acting_primary
3607 << ")" << dendl;
3608 newstat->state |= PG_STATE_STALE;
3609 newstat->last_unstale = ceph_clock_now();
3610 }
3611}
3612
3613void PGMapUpdater::check_down_pgs(
3614 const OSDMap &osdmap,
3615 const PGMap &pg_map,
3616 bool check_all,
3617 const set<int>& need_check_down_pg_osds,
3618 PGMap::Incremental *pending_inc)
3619{
3620 // if a large number of osds changed state, just iterate over the whole
3621 // pg map.
3622 if (need_check_down_pg_osds.size() > (unsigned)osdmap.get_num_osds() *
11fdf7f2 3623 g_conf().get_val<double>("mon_pg_check_down_all_threshold")) {
7c673cae
FG
3624 check_all = true;
3625 }
3626
3627 if (check_all) {
3628 for (const auto& p : pg_map.pg_stat) {
3629 _try_mark_pg_stale(osdmap, p.first, p.second, pending_inc);
3630 }
3631 } else {
3632 for (auto osd : need_check_down_pg_osds) {
3633 if (osdmap.is_down(osd)) {
3634 auto p = pg_map.pg_by_osd.find(osd);
3635 if (p == pg_map.pg_by_osd.end()) {
3636 continue;
3637 }
3638 for (auto pgid : p->second) {
3639 const pg_stat_t &stat = pg_map.pg_stat.at(pgid);
11fdf7f2 3640 ceph_assert(stat.acting_primary == osd);
7c673cae
FG
3641 _try_mark_pg_stale(osdmap, pgid, stat, pending_inc);
3642 }
3643 }
3644 }
3645 }
3646}
3647
3648int reweight::by_utilization(
3649 const OSDMap &osdmap,
3650 const PGMap &pgm,
3651 int oload,
3652 double max_changef,
3653 int max_osds,
3654 bool by_pg, const set<int64_t> *pools,
3655 bool no_increasing,
3656 mempool::osdmap::map<int32_t, uint32_t>* new_weights,
3657 std::stringstream *ss,
3658 std::string *out_str,
3659 Formatter *f)
3660{
3661 if (oload <= 100) {
3662 *ss << "You must give a percentage higher than 100. "
3663 "The reweighting threshold will be calculated as <average-utilization> "
3664 "times <input-percentage>. For example, an argument of 200 would "
3665 "reweight OSDs which are twice as utilized as the average OSD.\n";
3666 return -EINVAL;
3667 }
3668
3669 vector<int> pgs_by_osd(osdmap.get_max_osd());
3670
3671 // Avoid putting a small number (or 0) in the denominator when calculating
3672 // average_util
3673 double average_util;
3674 if (by_pg) {
3675 // by pg mapping
3676 double weight_sum = 0.0; // sum up the crush weights
3677 unsigned num_pg_copies = 0;
3678 int num_osds = 0;
3679 for (const auto& pg : pgm.pg_stat) {
3680 if (pools && pools->count(pg.first.pool()) == 0)
3681 continue;
3682 for (const auto acting : pg.second.acting) {
b5b8bbf5
FG
3683 if (!osdmap.exists(acting)) {
3684 continue;
3685 }
7c673cae
FG
3686 if (acting >= (int)pgs_by_osd.size())
3687 pgs_by_osd.resize(acting);
3688 if (pgs_by_osd[acting] == 0) {
3689 if (osdmap.crush->get_item_weightf(acting) <= 0) {
3690 //skip if we currently can not identify item
3691 continue;
3692 }
3693 weight_sum += osdmap.crush->get_item_weightf(acting);
3694 ++num_osds;
3695 }
3696 ++pgs_by_osd[acting];
3697 ++num_pg_copies;
3698 }
3699 }
3700
11fdf7f2 3701 if (!num_osds || (num_pg_copies / num_osds < g_conf()->mon_reweight_min_pgs_per_osd)) {
7c673cae
FG
3702 *ss << "Refusing to reweight: we only have " << num_pg_copies
3703 << " PGs across " << num_osds << " osds!\n";
3704 return -EDOM;
3705 }
3706
3707 average_util = (double)num_pg_copies / weight_sum;
3708 } else {
3709 // by osd utilization
11fdf7f2
TL
3710 int num_osd = std::max<size_t>(1, pgm.osd_stat.size());
3711 if ((uint64_t)pgm.osd_sum.statfs.total / num_osd
3712 < g_conf()->mon_reweight_min_bytes_per_osd) {
3713 *ss << "Refusing to reweight: we only have " << pgm.osd_sum.statfs.kb()
7c673cae
FG
3714 << " kb across all osds!\n";
3715 return -EDOM;
3716 }
11fdf7f2
TL
3717 if ((uint64_t)pgm.osd_sum.statfs.get_used_raw() / num_osd
3718 < g_conf()->mon_reweight_min_bytes_per_osd) {
3719 *ss << "Refusing to reweight: we only have "
3720 << pgm.osd_sum.statfs.kb_used_raw()
7c673cae
FG
3721 << " kb used across all osds!\n";
3722 return -EDOM;
3723 }
3724
11fdf7f2
TL
3725 average_util = (double)pgm.osd_sum.statfs.get_used_raw() /
3726 (double)pgm.osd_sum.statfs.total;
7c673cae
FG
3727 }
3728
3729 // adjust down only if we are above the threshold
3730 const double overload_util = average_util * (double)oload / 100.0;
3731
3732 // but aggressively adjust weights up whenever possible.
3733 const double underload_util = average_util;
3734
3735 const unsigned max_change = (unsigned)(max_changef * (double)0x10000);
3736
3737 ostringstream oss;
3738 if (f) {
3739 f->open_object_section("reweight_by_utilization");
3740 f->dump_int("overload_min", oload);
3741 f->dump_float("max_change", max_changef);
3742 f->dump_int("max_change_osds", max_osds);
3743 f->dump_float("average_utilization", average_util);
3744 f->dump_float("overload_utilization", overload_util);
3745 } else {
3746 oss << "oload " << oload << "\n";
3747 oss << "max_change " << max_changef << "\n";
3748 oss << "max_change_osds " << max_osds << "\n";
3749 oss.precision(4);
3750 oss << "average_utilization " << std::fixed << average_util << "\n";
3751 oss << "overload_utilization " << overload_util << "\n";
3752 }
3753 int num_changed = 0;
3754
3755 // precompute util for each OSD
3756 std::vector<std::pair<int, float> > util_by_osd;
3757 for (const auto& p : pgm.osd_stat) {
3758 std::pair<int, float> osd_util;
3759 osd_util.first = p.first;
3760 if (by_pg) {
3761 if (p.first >= (int)pgs_by_osd.size() ||
3762 pgs_by_osd[p.first] == 0) {
3763 // skip if this OSD does not contain any pg
3764 // belonging to the specified pool(s).
3765 continue;
3766 }
3767
3768 if (osdmap.crush->get_item_weightf(p.first) <= 0) {
3769 // skip if we are unable to locate item.
3770 continue;
3771 }
3772
11fdf7f2
TL
3773 osd_util.second =
3774 pgs_by_osd[p.first] / osdmap.crush->get_item_weightf(p.first);
7c673cae 3775 } else {
11fdf7f2
TL
3776 osd_util.second =
3777 (double)p.second.statfs.get_used_raw() / (double)p.second.statfs.total;
7c673cae
FG
3778 }
3779 util_by_osd.push_back(osd_util);
3780 }
3781
3782 // sort by absolute deviation from the mean utilization,
3783 // in descending order.
3784 std::sort(util_by_osd.begin(), util_by_osd.end(),
3785 [average_util](std::pair<int, float> l, std::pair<int, float> r) {
3786 return abs(l.second - average_util) > abs(r.second - average_util);
3787 }
3788 );
3789
3790 if (f)
3791 f->open_array_section("reweights");
3792
3793 for (const auto& p : util_by_osd) {
3794 unsigned weight = osdmap.get_weight(p.first);
3795 if (weight == 0) {
3796 // skip if OSD is currently out
3797 continue;
3798 }
3799 float util = p.second;
3800
3801 if (util >= overload_util) {
3802 // Assign a lower weight to overloaded OSDs. The current weight
3803 // is a factor to take into account the original weights,
3804 // to represent e.g. differing storage capacities
3805 unsigned new_weight = (unsigned)((average_util / util) * (float)weight);
3806 if (weight > max_change)
11fdf7f2 3807 new_weight = std::max(new_weight, weight - max_change);
7c673cae
FG
3808 new_weights->insert({p.first, new_weight});
3809 if (f) {
3810 f->open_object_section("osd");
3811 f->dump_int("osd", p.first);
3812 f->dump_float("weight", (float)weight / (float)0x10000);
3813 f->dump_float("new_weight", (float)new_weight / (float)0x10000);
3814 f->close_section();
3815 } else {
3816 oss << "osd." << p.first << " weight "
3817 << (float)weight / (float)0x10000 << " -> "
3818 << (float)new_weight / (float)0x10000 << "\n";
3819 }
3820 if (++num_changed >= max_osds)
3821 break;
3822 }
3823 if (!no_increasing && util <= underload_util) {
3824 // assign a higher weight.. if we can.
3825 unsigned new_weight = (unsigned)((average_util / util) * (float)weight);
11fdf7f2 3826 new_weight = std::min(new_weight, weight + max_change);
7c673cae
FG
3827 if (new_weight > 0x10000)
3828 new_weight = 0x10000;
3829 if (new_weight > weight) {
3830 new_weights->insert({p.first, new_weight});
3831 oss << "osd." << p.first << " weight "
3832 << (float)weight / (float)0x10000 << " -> "
3833 << (float)new_weight / (float)0x10000 << "\n";
3834 if (++num_changed >= max_osds)
3835 break;
3836 }
3837 }
3838 }
3839 if (f) {
3840 f->close_section();
3841 }
3842
3843 OSDMap newmap;
3844 newmap.deepish_copy_from(osdmap);
3845 OSDMap::Incremental newinc;
3846 newinc.fsid = newmap.get_fsid();
3847 newinc.epoch = newmap.get_epoch() + 1;
3848 newinc.new_weight = *new_weights;
3849 newmap.apply_incremental(newinc);
3850
3851 osdmap.summarize_mapping_stats(&newmap, pools, out_str, f);
3852
3853 if (f) {
3854 f->close_section();
3855 } else {
3856 *out_str += "\n";
3857 *out_str += oss.str();
3858 }
3859 return num_changed;
3860}