]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
224ce89b WB |
4 | #include <boost/algorithm/string.hpp> |
5 | ||
7c673cae FG |
6 | #include "PGMap.h" |
7 | ||
8 | #define dout_subsys ceph_subsys_mon | |
9 | #include "common/debug.h" | |
11fdf7f2 | 10 | #include "common/Clock.h" |
7c673cae | 11 | #include "common/Formatter.h" |
11fdf7f2 | 12 | #include "global/global_context.h" |
7c673cae FG |
13 | #include "include/ceph_features.h" |
14 | #include "include/stringify.h" | |
15 | ||
16 | #include "osd/osd_types.h" | |
17 | #include "osd/OSDMap.h" | |
eafe8130 | 18 | #include <boost/range/adaptor/reversed.hpp> |
7c673cae FG |
19 | |
20 | #define dout_context g_ceph_context | |
21 | ||
9f95a23c TL |
22 | using std::list; |
23 | using std::make_pair; | |
24 | using std::map; | |
25 | using std::pair; | |
26 | using std::ostream; | |
27 | using std::ostringstream; | |
28 | using std::set; | |
29 | using std::string; | |
30 | using std::stringstream; | |
31 | using std::vector; | |
32 | ||
33 | using ceph::bufferlist; | |
34 | using TOPNSPC::common::cmd_getval; | |
35 | ||
31f18b77 FG |
36 | MEMPOOL_DEFINE_OBJECT_FACTORY(PGMapDigest, pgmap_digest, pgmap); |
37 | MEMPOOL_DEFINE_OBJECT_FACTORY(PGMap, pgmap, pgmap); | |
38 | MEMPOOL_DEFINE_OBJECT_FACTORY(PGMap::Incremental, pgmap_inc, pgmap); | |
39 | ||
40 | ||
41 | // --------------------- | |
42 | // PGMapDigest | |
43 | ||
44 | void PGMapDigest::encode(bufferlist& bl, uint64_t features) const | |
45 | { | |
46 | // NOTE: see PGMap::encode_digest | |
11fdf7f2 TL |
47 | uint8_t v = 4; |
48 | if (!HAVE_FEATURE(features, SERVER_MIMIC)) { | |
49 | v = 1; | |
50 | } else if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) { | |
51 | v = 3; | |
52 | } | |
53 | ENCODE_START(v, 1, bl); | |
54 | encode(num_pg, bl); | |
55 | encode(num_pg_active, bl); | |
56 | encode(num_pg_unknown, bl); | |
57 | encode(num_osd, bl); | |
58 | encode(pg_pool_sum, bl, features); | |
59 | encode(pg_sum, bl, features); | |
60 | encode(osd_sum, bl, features); | |
61 | if (v >= 2) { | |
62 | encode(num_pg_by_state, bl); | |
63 | } else { | |
64 | uint32_t n = num_pg_by_state.size(); | |
65 | encode(n, bl); | |
66 | for (auto p : num_pg_by_state) { | |
9f95a23c | 67 | encode((int32_t)p.first, bl); |
11fdf7f2 TL |
68 | encode(p.second, bl); |
69 | } | |
70 | } | |
71 | encode(num_pg_by_osd, bl); | |
72 | encode(num_pg_by_pool, bl); | |
73 | encode(osd_last_seq, bl); | |
74 | encode(per_pool_sum_delta, bl, features); | |
75 | encode(per_pool_sum_deltas_stamps, bl); | |
76 | encode(pg_sum_delta, bl, features); | |
77 | encode(stamp_delta, bl); | |
78 | encode(avail_space_by_rule, bl); | |
79 | if (struct_v >= 3) { | |
80 | encode(purged_snaps, bl); | |
81 | } | |
82 | if (struct_v >= 4) { | |
83 | encode(osd_sum_by_class, bl, features); | |
84 | } | |
7c673cae FG |
85 | ENCODE_FINISH(bl); |
86 | } | |
87 | ||
11fdf7f2 | 88 | void PGMapDigest::decode(bufferlist::const_iterator& p) |
31f18b77 | 89 | { |
11fdf7f2 TL |
90 | DECODE_START(4, p); |
91 | decode(num_pg, p); | |
92 | decode(num_pg_active, p); | |
93 | decode(num_pg_unknown, p); | |
94 | decode(num_osd, p); | |
95 | decode(pg_pool_sum, p); | |
96 | decode(pg_sum, p); | |
97 | decode(osd_sum, p); | |
98 | if (struct_v >= 2) { | |
99 | decode(num_pg_by_state, p); | |
100 | } else { | |
101 | map<int32_t, int32_t> nps; | |
102 | decode(nps, p); | |
103 | num_pg_by_state.clear(); | |
104 | for (auto i : nps) { | |
105 | num_pg_by_state[i.first] = i.second; | |
106 | } | |
107 | } | |
108 | decode(num_pg_by_osd, p); | |
109 | decode(num_pg_by_pool, p); | |
110 | decode(osd_last_seq, p); | |
111 | decode(per_pool_sum_delta, p); | |
112 | decode(per_pool_sum_deltas_stamps, p); | |
113 | decode(pg_sum_delta, p); | |
114 | decode(stamp_delta, p); | |
115 | decode(avail_space_by_rule, p); | |
116 | if (struct_v >= 3) { | |
117 | decode(purged_snaps, p); | |
118 | } | |
119 | if (struct_v >= 4) { | |
120 | decode(osd_sum_by_class, p); | |
121 | } | |
31f18b77 FG |
122 | DECODE_FINISH(p); |
123 | } | |
124 | ||
9f95a23c | 125 | void PGMapDigest::dump(ceph::Formatter *f) const |
31f18b77 FG |
126 | { |
127 | f->dump_unsigned("num_pg", num_pg); | |
128 | f->dump_unsigned("num_pg_active", num_pg_active); | |
129 | f->dump_unsigned("num_pg_unknown", num_pg_unknown); | |
130 | f->dump_unsigned("num_osd", num_osd); | |
131 | f->dump_object("pool_sum", pg_sum); | |
132 | f->dump_object("osd_sum", osd_sum); | |
11fdf7f2 TL |
133 | |
134 | f->open_object_section("osd_sum_by_class"); | |
135 | for (auto& i : osd_sum_by_class) { | |
136 | f->dump_object(i.first.c_str(), i.second); | |
137 | } | |
138 | f->close_section(); | |
139 | ||
31f18b77 FG |
140 | f->open_array_section("pool_stats"); |
141 | for (auto& p : pg_pool_sum) { | |
142 | f->open_object_section("pool_stat"); | |
143 | f->dump_int("poolid", p.first); | |
144 | auto q = num_pg_by_pool.find(p.first); | |
145 | if (q != num_pg_by_pool.end()) | |
146 | f->dump_unsigned("num_pg", q->second); | |
147 | p.second.dump(f); | |
7c673cae FG |
148 | f->close_section(); |
149 | } | |
150 | f->close_section(); | |
31f18b77 FG |
151 | f->open_array_section("osd_stats"); |
152 | int i = 0; | |
153 | // TODO: this isn't really correct since we can dump non-existent OSDs | |
154 | // I dunno what osd_last_seq is set to in that case... | |
155 | for (auto& p : osd_last_seq) { | |
7c673cae | 156 | f->open_object_section("osd_stat"); |
31f18b77 FG |
157 | f->dump_int("osd", i); |
158 | f->dump_unsigned("seq", p); | |
7c673cae | 159 | f->close_section(); |
31f18b77 | 160 | ++i; |
7c673cae FG |
161 | } |
162 | f->close_section(); | |
31f18b77 FG |
163 | f->open_array_section("num_pg_by_state"); |
164 | for (auto& p : num_pg_by_state) { | |
165 | f->open_object_section("count"); | |
166 | f->dump_string("state", pg_state_string(p.first)); | |
167 | f->dump_unsigned("num", p.second); | |
168 | f->close_section(); | |
169 | } | |
7c673cae | 170 | f->close_section(); |
31f18b77 FG |
171 | f->open_array_section("num_pg_by_osd"); |
172 | for (auto& p : num_pg_by_osd) { | |
173 | f->open_object_section("count"); | |
174 | f->dump_unsigned("osd", p.first); | |
175 | f->dump_unsigned("num_primary_pg", p.second.primary); | |
176 | f->dump_unsigned("num_acting_pg", p.second.acting); | |
81eedcae | 177 | f->dump_unsigned("num_up_not_acting_pg", p.second.up_not_acting); |
31f18b77 FG |
178 | f->close_section(); |
179 | } | |
7c673cae | 180 | f->close_section(); |
11fdf7f2 TL |
181 | f->open_array_section("purged_snaps"); |
182 | for (auto& j : purged_snaps) { | |
183 | f->open_object_section("pool"); | |
184 | f->dump_int("pool", j.first); | |
185 | f->open_object_section("purged_snaps"); | |
186 | for (auto i = j.second.begin(); i != j.second.end(); ++i) { | |
187 | f->open_object_section("interval"); | |
188 | f->dump_stream("start") << i.get_start(); | |
189 | f->dump_stream("length") << i.get_len(); | |
190 | f->close_section(); | |
191 | } | |
192 | f->close_section(); | |
193 | f->close_section(); | |
194 | } | |
195 | f->close_section(); | |
7c673cae FG |
196 | } |
197 | ||
31f18b77 | 198 | void PGMapDigest::generate_test_instances(list<PGMapDigest*>& ls) |
7c673cae | 199 | { |
31f18b77 | 200 | ls.push_back(new PGMapDigest); |
7c673cae FG |
201 | } |
202 | ||
31f18b77 FG |
203 | inline std::string percentify(const float& a) { |
204 | std::stringstream ss; | |
205 | if (a < 0.01) | |
206 | ss << "0"; | |
207 | else | |
208 | ss << std::fixed << std::setprecision(2) << a; | |
209 | return ss.str(); | |
210 | } | |
7c673cae | 211 | |
9f95a23c | 212 | void PGMapDigest::print_summary(ceph::Formatter *f, ostream *out) const |
7c673cae | 213 | { |
31f18b77 FG |
214 | if (f) |
215 | f->open_array_section("pgs_by_state"); | |
7c673cae | 216 | |
31f18b77 | 217 | // list is descending numeric order (by count) |
9f95a23c | 218 | std::multimap<int,uint64_t> state_by_count; // count -> state |
31f18b77 FG |
219 | for (auto p = num_pg_by_state.begin(); |
220 | p != num_pg_by_state.end(); | |
221 | ++p) { | |
222 | state_by_count.insert(make_pair(p->second, p->first)); | |
7c673cae | 223 | } |
31f18b77 FG |
224 | if (f) { |
225 | for (auto p = state_by_count.rbegin(); | |
226 | p != state_by_count.rend(); | |
227 | ++p) | |
228 | { | |
229 | f->open_object_section("pgs_by_state_element"); | |
230 | f->dump_string("state_name", pg_state_string(p->second)); | |
231 | f->dump_unsigned("count", p->first); | |
232 | f->close_section(); | |
233 | } | |
7c673cae | 234 | } |
31f18b77 FG |
235 | if (f) |
236 | f->close_section(); | |
7c673cae | 237 | |
31f18b77 FG |
238 | if (f) { |
239 | f->dump_unsigned("num_pgs", num_pg); | |
240 | f->dump_unsigned("num_pools", pg_pool_sum.size()); | |
241 | f->dump_unsigned("num_objects", pg_sum.stats.sum.num_objects); | |
242 | f->dump_unsigned("data_bytes", pg_sum.stats.sum.num_bytes); | |
11fdf7f2 TL |
243 | f->dump_unsigned("bytes_used", osd_sum.statfs.get_used_raw()); |
244 | f->dump_unsigned("bytes_avail", osd_sum.statfs.available); | |
245 | f->dump_unsigned("bytes_total", osd_sum.statfs.total); | |
31f18b77 FG |
246 | } else { |
247 | *out << " pools: " << pg_pool_sum.size() << " pools, " | |
248 | << num_pg << " pgs\n"; | |
1adf2230 AA |
249 | *out << " objects: " << si_u_t(pg_sum.stats.sum.num_objects) << " objects, " |
250 | << byte_u_t(pg_sum.stats.sum.num_bytes) << "\n"; | |
31f18b77 | 251 | *out << " usage: " |
11fdf7f2 TL |
252 | << byte_u_t(osd_sum.statfs.get_used_raw()) << " used, " |
253 | << byte_u_t(osd_sum.statfs.available) << " / " | |
254 | << byte_u_t(osd_sum.statfs.total) << " avail\n"; | |
31f18b77 FG |
255 | *out << " pgs: "; |
256 | } | |
7c673cae | 257 | |
31f18b77 | 258 | bool pad = false; |
7c673cae | 259 | |
31f18b77 FG |
260 | if (num_pg_unknown > 0) { |
261 | float p = (float)num_pg_unknown / (float)num_pg; | |
262 | if (f) { | |
263 | f->dump_float("unknown_pgs_ratio", p); | |
7c673cae | 264 | } else { |
31f18b77 FG |
265 | char b[20]; |
266 | snprintf(b, sizeof(b), "%.3lf", p * 100.0); | |
267 | *out << b << "% pgs unknown\n"; | |
268 | pad = true; | |
7c673cae | 269 | } |
7c673cae | 270 | } |
7c673cae | 271 | |
31f18b77 FG |
272 | int num_pg_inactive = num_pg - num_pg_active - num_pg_unknown; |
273 | if (num_pg_inactive > 0) { | |
274 | float p = (float)num_pg_inactive / (float)num_pg; | |
275 | if (f) { | |
276 | f->dump_float("inactive_pgs_ratio", p); | |
7c673cae | 277 | } else { |
31f18b77 FG |
278 | if (pad) { |
279 | *out << " "; | |
280 | } | |
281 | char b[20]; | |
282 | snprintf(b, sizeof(b), "%.3f", p * 100.0); | |
283 | *out << b << "% pgs not active\n"; | |
284 | pad = true; | |
7c673cae | 285 | } |
7c673cae | 286 | } |
31f18b77 FG |
287 | |
288 | list<string> sl; | |
289 | overall_recovery_summary(f, &sl); | |
290 | if (!f && !sl.empty()) { | |
291 | for (auto p = sl.begin(); p != sl.end(); ++p) { | |
292 | if (pad) { | |
293 | *out << " "; | |
294 | } | |
295 | *out << *p << "\n"; | |
296 | pad = true; | |
7c673cae | 297 | } |
7c673cae | 298 | } |
31f18b77 | 299 | sl.clear(); |
7c673cae | 300 | |
31f18b77 FG |
301 | if (!f) { |
302 | unsigned max_width = 1; | |
9f95a23c | 303 | for (auto p = state_by_count.rbegin(); p != state_by_count.rend(); ++p) |
31f18b77 FG |
304 | { |
305 | std::stringstream ss; | |
306 | ss << p->first; | |
11fdf7f2 | 307 | max_width = std::max<size_t>(ss.str().size(), max_width); |
7c673cae FG |
308 | } |
309 | ||
9f95a23c | 310 | for (auto p = state_by_count.rbegin(); p != state_by_count.rend(); ++p) |
31f18b77 FG |
311 | { |
312 | if (pad) { | |
313 | *out << " "; | |
314 | } | |
315 | pad = true; | |
316 | out->setf(std::ios::left); | |
317 | *out << std::setw(max_width) << p->first | |
318 | << " " << pg_state_string(p->second) << "\n"; | |
319 | out->unsetf(std::ios::left); | |
320 | } | |
7c673cae FG |
321 | } |
322 | ||
31f18b77 FG |
323 | ostringstream ss_rec_io; |
324 | overall_recovery_rate_summary(f, &ss_rec_io); | |
325 | ostringstream ss_client_io; | |
326 | overall_client_io_rate_summary(f, &ss_client_io); | |
327 | ostringstream ss_cache_io; | |
328 | overall_cache_io_rate_summary(f, &ss_cache_io); | |
7c673cae | 329 | |
31f18b77 FG |
330 | if (!f && (ss_client_io.str().length() || ss_rec_io.str().length() |
331 | || ss_cache_io.str().length())) { | |
332 | *out << "\n \n"; | |
333 | *out << " io:\n"; | |
7c673cae FG |
334 | } |
335 | ||
31f18b77 FG |
336 | if (!f && ss_client_io.str().length()) |
337 | *out << " client: " << ss_client_io.str() << "\n"; | |
338 | if (!f && ss_rec_io.str().length()) | |
339 | *out << " recovery: " << ss_rec_io.str() << "\n"; | |
340 | if (!f && ss_cache_io.str().length()) | |
341 | *out << " cache: " << ss_cache_io.str() << "\n"; | |
7c673cae FG |
342 | } |
343 | ||
9f95a23c | 344 | void PGMapDigest::print_oneline_summary(ceph::Formatter *f, ostream *out) const |
7c673cae | 345 | { |
31f18b77 FG |
346 | std::stringstream ss; |
347 | ||
348 | if (f) | |
349 | f->open_array_section("num_pg_by_state"); | |
350 | for (auto p = num_pg_by_state.begin(); | |
351 | p != num_pg_by_state.end(); | |
352 | ++p) { | |
353 | if (f) { | |
354 | f->open_object_section("state"); | |
355 | f->dump_string("name", pg_state_string(p->first)); | |
356 | f->dump_unsigned("num", p->second); | |
357 | f->close_section(); | |
358 | } | |
359 | if (p != num_pg_by_state.begin()) | |
360 | ss << ", "; | |
361 | ss << p->second << " " << pg_state_string(p->first); | |
7c673cae | 362 | } |
31f18b77 FG |
363 | if (f) |
364 | f->close_section(); | |
7c673cae | 365 | |
31f18b77 FG |
366 | string states = ss.str(); |
367 | if (out) | |
368 | *out << num_pg << " pgs: " | |
369 | << states << "; " | |
1adf2230 | 370 | << byte_u_t(pg_sum.stats.sum.num_bytes) << " data, " |
11fdf7f2 TL |
371 | << byte_u_t(osd_sum.statfs.get_used()) << " used, " |
372 | << byte_u_t(osd_sum.statfs.available) << " / " | |
373 | << byte_u_t(osd_sum.statfs.total) << " avail"; | |
31f18b77 FG |
374 | if (f) { |
375 | f->dump_unsigned("num_pgs", num_pg); | |
376 | f->dump_unsigned("num_bytes", pg_sum.stats.sum.num_bytes); | |
11fdf7f2 TL |
377 | f->dump_int("total_bytes", osd_sum.statfs.total); |
378 | f->dump_int("total_avail_bytes", osd_sum.statfs.available); | |
379 | f->dump_int("total_used_bytes", osd_sum.statfs.get_used()); | |
380 | f->dump_int("total_used_raw_bytes", osd_sum.statfs.get_used_raw()); | |
31f18b77 | 381 | } |
7c673cae | 382 | |
31f18b77 FG |
383 | // make non-negative; we can get negative values if osds send |
384 | // uncommitted stats and then "go backward" or if they are just | |
385 | // buggy/wrong. | |
386 | pool_stat_t pos_delta = pg_sum_delta; | |
387 | pos_delta.floor(0); | |
388 | if (pos_delta.stats.sum.num_rd || | |
389 | pos_delta.stats.sum.num_wr) { | |
390 | if (out) | |
391 | *out << "; "; | |
392 | if (pos_delta.stats.sum.num_rd) { | |
393 | int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)stamp_delta; | |
394 | if (out) | |
1adf2230 | 395 | *out << byte_u_t(rd) << "/s rd, "; |
31f18b77 FG |
396 | if (f) |
397 | f->dump_unsigned("read_bytes_sec", rd); | |
398 | } | |
399 | if (pos_delta.stats.sum.num_wr) { | |
400 | int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)stamp_delta; | |
401 | if (out) | |
1adf2230 | 402 | *out << byte_u_t(wr) << "/s wr, "; |
31f18b77 FG |
403 | if (f) |
404 | f->dump_unsigned("write_bytes_sec", wr); | |
405 | } | |
406 | int64_t iops = (pos_delta.stats.sum.num_rd + pos_delta.stats.sum.num_wr) / (double)stamp_delta; | |
407 | if (out) | |
11fdf7f2 | 408 | *out << si_u_t(iops) << " op/s"; |
31f18b77 FG |
409 | if (f) |
410 | f->dump_unsigned("io_sec", iops); | |
7c673cae | 411 | } |
31f18b77 FG |
412 | |
413 | list<string> sl; | |
414 | overall_recovery_summary(f, &sl); | |
415 | if (out) | |
416 | for (auto p = sl.begin(); p != sl.end(); ++p) | |
417 | *out << "; " << *p; | |
418 | std::stringstream ssr; | |
419 | overall_recovery_rate_summary(f, &ssr); | |
420 | if (out && ssr.str().length()) | |
421 | *out << "; " << ssr.str() << " recovering"; | |
7c673cae FG |
422 | } |
423 | ||
11fdf7f2 TL |
424 | void PGMapDigest::get_recovery_stats( |
425 | double *misplaced_ratio, | |
426 | double *degraded_ratio, | |
427 | double *inactive_pgs_ratio, | |
428 | double *unknown_pgs_ratio) const | |
429 | { | |
430 | if (pg_sum.stats.sum.num_objects_degraded && | |
431 | pg_sum.stats.sum.num_object_copies > 0) { | |
432 | *degraded_ratio = (double)pg_sum.stats.sum.num_objects_degraded / | |
433 | (double)pg_sum.stats.sum.num_object_copies; | |
434 | } else { | |
435 | *degraded_ratio = 0; | |
436 | } | |
437 | if (pg_sum.stats.sum.num_objects_misplaced && | |
438 | pg_sum.stats.sum.num_object_copies > 0) { | |
439 | *misplaced_ratio = (double)pg_sum.stats.sum.num_objects_misplaced / | |
440 | (double)pg_sum.stats.sum.num_object_copies; | |
441 | } else { | |
442 | *misplaced_ratio = 0; | |
443 | } | |
444 | if (num_pg > 0) { | |
445 | int num_pg_inactive = num_pg - num_pg_active - num_pg_unknown; | |
446 | *inactive_pgs_ratio = (double)num_pg_inactive / (double)num_pg; | |
447 | *unknown_pgs_ratio = (double)num_pg_unknown / (double)num_pg; | |
448 | } else { | |
449 | *inactive_pgs_ratio = 0; | |
450 | *unknown_pgs_ratio = 0; | |
451 | } | |
452 | } | |
453 | ||
9f95a23c | 454 | void PGMapDigest::recovery_summary(ceph::Formatter *f, list<string> *psl, |
b32b8144 | 455 | const pool_stat_t& pool_sum) const |
7c673cae | 456 | { |
b32b8144 FG |
457 | if (pool_sum.stats.sum.num_objects_degraded && pool_sum.stats.sum.num_object_copies > 0) { |
458 | double pc = (double)pool_sum.stats.sum.num_objects_degraded / | |
459 | (double)pool_sum.stats.sum.num_object_copies * (double)100.0; | |
31f18b77 FG |
460 | char b[20]; |
461 | snprintf(b, sizeof(b), "%.3lf", pc); | |
462 | if (f) { | |
b32b8144 FG |
463 | f->dump_unsigned("degraded_objects", pool_sum.stats.sum.num_objects_degraded); |
464 | f->dump_unsigned("degraded_total", pool_sum.stats.sum.num_object_copies); | |
31f18b77 FG |
465 | f->dump_float("degraded_ratio", pc / 100.0); |
466 | } else { | |
467 | ostringstream ss; | |
b32b8144 FG |
468 | ss << pool_sum.stats.sum.num_objects_degraded |
469 | << "/" << pool_sum.stats.sum.num_object_copies << " objects degraded (" << b << "%)"; | |
31f18b77 FG |
470 | psl->push_back(ss.str()); |
471 | } | |
472 | } | |
b32b8144 FG |
473 | if (pool_sum.stats.sum.num_objects_misplaced && pool_sum.stats.sum.num_object_copies > 0) { |
474 | double pc = (double)pool_sum.stats.sum.num_objects_misplaced / | |
475 | (double)pool_sum.stats.sum.num_object_copies * (double)100.0; | |
31f18b77 FG |
476 | char b[20]; |
477 | snprintf(b, sizeof(b), "%.3lf", pc); | |
478 | if (f) { | |
b32b8144 FG |
479 | f->dump_unsigned("misplaced_objects", pool_sum.stats.sum.num_objects_misplaced); |
480 | f->dump_unsigned("misplaced_total", pool_sum.stats.sum.num_object_copies); | |
31f18b77 FG |
481 | f->dump_float("misplaced_ratio", pc / 100.0); |
482 | } else { | |
483 | ostringstream ss; | |
b32b8144 FG |
484 | ss << pool_sum.stats.sum.num_objects_misplaced |
485 | << "/" << pool_sum.stats.sum.num_object_copies << " objects misplaced (" << b << "%)"; | |
31f18b77 FG |
486 | psl->push_back(ss.str()); |
487 | } | |
488 | } | |
b32b8144 FG |
489 | if (pool_sum.stats.sum.num_objects_unfound && pool_sum.stats.sum.num_objects) { |
490 | double pc = (double)pool_sum.stats.sum.num_objects_unfound / | |
491 | (double)pool_sum.stats.sum.num_objects * (double)100.0; | |
31f18b77 FG |
492 | char b[20]; |
493 | snprintf(b, sizeof(b), "%.3lf", pc); | |
494 | if (f) { | |
b32b8144 FG |
495 | f->dump_unsigned("unfound_objects", pool_sum.stats.sum.num_objects_unfound); |
496 | f->dump_unsigned("unfound_total", pool_sum.stats.sum.num_objects); | |
31f18b77 FG |
497 | f->dump_float("unfound_ratio", pc / 100.0); |
498 | } else { | |
499 | ostringstream ss; | |
b32b8144 FG |
500 | ss << pool_sum.stats.sum.num_objects_unfound |
501 | << "/" << pool_sum.stats.sum.num_objects << " objects unfound (" << b << "%)"; | |
31f18b77 FG |
502 | psl->push_back(ss.str()); |
503 | } | |
7c673cae | 504 | } |
7c673cae FG |
505 | } |
506 | ||
9f95a23c | 507 | void PGMapDigest::recovery_rate_summary(ceph::Formatter *f, ostream *out, |
31f18b77 FG |
508 | const pool_stat_t& delta_sum, |
509 | utime_t delta_stamp) const | |
7c673cae | 510 | { |
31f18b77 FG |
511 | // make non-negative; we can get negative values if osds send |
512 | // uncommitted stats and then "go backward" or if they are just | |
513 | // buggy/wrong. | |
514 | pool_stat_t pos_delta = delta_sum; | |
515 | pos_delta.floor(0); | |
516 | if (pos_delta.stats.sum.num_objects_recovered || | |
517 | pos_delta.stats.sum.num_bytes_recovered || | |
518 | pos_delta.stats.sum.num_keys_recovered) { | |
519 | int64_t objps = pos_delta.stats.sum.num_objects_recovered / (double)delta_stamp; | |
520 | int64_t bps = pos_delta.stats.sum.num_bytes_recovered / (double)delta_stamp; | |
521 | int64_t kps = pos_delta.stats.sum.num_keys_recovered / (double)delta_stamp; | |
522 | if (f) { | |
523 | f->dump_int("recovering_objects_per_sec", objps); | |
524 | f->dump_int("recovering_bytes_per_sec", bps); | |
525 | f->dump_int("recovering_keys_per_sec", kps); | |
526 | f->dump_int("num_objects_recovered", pos_delta.stats.sum.num_objects_recovered); | |
527 | f->dump_int("num_bytes_recovered", pos_delta.stats.sum.num_bytes_recovered); | |
528 | f->dump_int("num_keys_recovered", pos_delta.stats.sum.num_keys_recovered); | |
529 | } else { | |
1adf2230 | 530 | *out << byte_u_t(bps) << "/s"; |
31f18b77 | 531 | if (pos_delta.stats.sum.num_keys_recovered) |
11fdf7f2 TL |
532 | *out << ", " << si_u_t(kps) << " keys/s"; |
533 | *out << ", " << si_u_t(objps) << " objects/s"; | |
31f18b77 | 534 | } |
7c673cae | 535 | } |
31f18b77 | 536 | } |
7c673cae | 537 | |
9f95a23c | 538 | void PGMapDigest::overall_recovery_rate_summary(ceph::Formatter *f, ostream *out) const |
31f18b77 FG |
539 | { |
540 | recovery_rate_summary(f, out, pg_sum_delta, stamp_delta); | |
7c673cae FG |
541 | } |
542 | ||
9f95a23c | 543 | void PGMapDigest::overall_recovery_summary(ceph::Formatter *f, list<string> *psl) const |
7c673cae | 544 | { |
31f18b77 | 545 | recovery_summary(f, psl, pg_sum); |
7c673cae FG |
546 | } |
547 | ||
9f95a23c | 548 | void PGMapDigest::pool_recovery_rate_summary(ceph::Formatter *f, ostream *out, |
31f18b77 | 549 | uint64_t poolid) const |
7c673cae | 550 | { |
31f18b77 FG |
551 | auto p = per_pool_sum_delta.find(poolid); |
552 | if (p == per_pool_sum_delta.end()) | |
553 | return; | |
7c673cae | 554 | |
31f18b77 | 555 | auto ts = per_pool_sum_deltas_stamps.find(p->first); |
11fdf7f2 | 556 | ceph_assert(ts != per_pool_sum_deltas_stamps.end()); |
31f18b77 FG |
557 | recovery_rate_summary(f, out, p->second.first, ts->second); |
558 | } | |
7c673cae | 559 | |
9f95a23c | 560 | void PGMapDigest::pool_recovery_summary(ceph::Formatter *f, list<string> *psl, |
31f18b77 FG |
561 | uint64_t poolid) const |
562 | { | |
b32b8144 FG |
563 | auto p = pg_pool_sum.find(poolid); |
564 | if (p == pg_pool_sum.end()) | |
31f18b77 | 565 | return; |
7c673cae | 566 | |
b32b8144 | 567 | recovery_summary(f, psl, p->second); |
7c673cae FG |
568 | } |
569 | ||
9f95a23c | 570 | void PGMapDigest::client_io_rate_summary(ceph::Formatter *f, ostream *out, |
31f18b77 FG |
571 | const pool_stat_t& delta_sum, |
572 | utime_t delta_stamp) const | |
7c673cae | 573 | { |
31f18b77 FG |
574 | pool_stat_t pos_delta = delta_sum; |
575 | pos_delta.floor(0); | |
576 | if (pos_delta.stats.sum.num_rd || | |
577 | pos_delta.stats.sum.num_wr) { | |
578 | if (pos_delta.stats.sum.num_rd) { | |
579 | int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)delta_stamp; | |
580 | if (f) { | |
581 | f->dump_int("read_bytes_sec", rd); | |
582 | } else { | |
1adf2230 | 583 | *out << byte_u_t(rd) << "/s rd, "; |
31f18b77 FG |
584 | } |
585 | } | |
586 | if (pos_delta.stats.sum.num_wr) { | |
587 | int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)delta_stamp; | |
588 | if (f) { | |
589 | f->dump_int("write_bytes_sec", wr); | |
590 | } else { | |
1adf2230 | 591 | *out << byte_u_t(wr) << "/s wr, "; |
31f18b77 FG |
592 | } |
593 | } | |
594 | int64_t iops_rd = pos_delta.stats.sum.num_rd / (double)delta_stamp; | |
595 | int64_t iops_wr = pos_delta.stats.sum.num_wr / (double)delta_stamp; | |
596 | if (f) { | |
597 | f->dump_int("read_op_per_sec", iops_rd); | |
598 | f->dump_int("write_op_per_sec", iops_wr); | |
599 | } else { | |
11fdf7f2 | 600 | *out << si_u_t(iops_rd) << " op/s rd, " << si_u_t(iops_wr) << " op/s wr"; |
31f18b77 | 601 | } |
7c673cae FG |
602 | } |
603 | } | |
604 | ||
9f95a23c | 605 | void PGMapDigest::overall_client_io_rate_summary(ceph::Formatter *f, ostream *out) const |
7c673cae | 606 | { |
31f18b77 FG |
607 | client_io_rate_summary(f, out, pg_sum_delta, stamp_delta); |
608 | } | |
7c673cae | 609 | |
9f95a23c | 610 | void PGMapDigest::pool_client_io_rate_summary(ceph::Formatter *f, ostream *out, |
31f18b77 FG |
611 | uint64_t poolid) const |
612 | { | |
613 | auto p = per_pool_sum_delta.find(poolid); | |
614 | if (p == per_pool_sum_delta.end()) | |
7c673cae FG |
615 | return; |
616 | ||
31f18b77 | 617 | auto ts = per_pool_sum_deltas_stamps.find(p->first); |
11fdf7f2 | 618 | ceph_assert(ts != per_pool_sum_deltas_stamps.end()); |
31f18b77 | 619 | client_io_rate_summary(f, out, p->second.first, ts->second); |
7c673cae FG |
620 | } |
621 | ||
9f95a23c | 622 | void PGMapDigest::cache_io_rate_summary(ceph::Formatter *f, ostream *out, |
31f18b77 FG |
623 | const pool_stat_t& delta_sum, |
624 | utime_t delta_stamp) const | |
7c673cae | 625 | { |
31f18b77 FG |
626 | pool_stat_t pos_delta = delta_sum; |
627 | pos_delta.floor(0); | |
628 | bool have_output = false; | |
7c673cae | 629 | |
31f18b77 FG |
630 | if (pos_delta.stats.sum.num_flush) { |
631 | int64_t flush = (pos_delta.stats.sum.num_flush_kb << 10) / (double)delta_stamp; | |
632 | if (f) { | |
633 | f->dump_int("flush_bytes_sec", flush); | |
634 | } else { | |
1adf2230 | 635 | *out << byte_u_t(flush) << "/s flush"; |
31f18b77 | 636 | have_output = true; |
7c673cae FG |
637 | } |
638 | } | |
31f18b77 FG |
639 | if (pos_delta.stats.sum.num_evict) { |
640 | int64_t evict = (pos_delta.stats.sum.num_evict_kb << 10) / (double)delta_stamp; | |
641 | if (f) { | |
642 | f->dump_int("evict_bytes_sec", evict); | |
643 | } else { | |
644 | if (have_output) | |
645 | *out << ", "; | |
1adf2230 | 646 | *out << byte_u_t(evict) << "/s evict"; |
31f18b77 FG |
647 | have_output = true; |
648 | } | |
7c673cae | 649 | } |
31f18b77 FG |
650 | if (pos_delta.stats.sum.num_promote) { |
651 | int64_t promote = pos_delta.stats.sum.num_promote / (double)delta_stamp; | |
652 | if (f) { | |
653 | f->dump_int("promote_op_per_sec", promote); | |
654 | } else { | |
655 | if (have_output) | |
656 | *out << ", "; | |
11fdf7f2 | 657 | *out << si_u_t(promote) << " op/s promote"; |
31f18b77 FG |
658 | have_output = true; |
659 | } | |
7c673cae | 660 | } |
31f18b77 FG |
661 | if (pos_delta.stats.sum.num_flush_mode_low) { |
662 | if (f) { | |
663 | f->dump_int("num_flush_mode_low", pos_delta.stats.sum.num_flush_mode_low); | |
664 | } else { | |
665 | if (have_output) | |
666 | *out << ", "; | |
11fdf7f2 | 667 | *out << si_u_t(pos_delta.stats.sum.num_flush_mode_low) << " PGs flushing"; |
31f18b77 FG |
668 | have_output = true; |
669 | } | |
7c673cae | 670 | } |
31f18b77 FG |
671 | if (pos_delta.stats.sum.num_flush_mode_high) { |
672 | if (f) { | |
673 | f->dump_int("num_flush_mode_high", pos_delta.stats.sum.num_flush_mode_high); | |
674 | } else { | |
675 | if (have_output) | |
676 | *out << ", "; | |
11fdf7f2 | 677 | *out << si_u_t(pos_delta.stats.sum.num_flush_mode_high) << " PGs flushing (high)"; |
31f18b77 FG |
678 | have_output = true; |
679 | } | |
7c673cae | 680 | } |
31f18b77 FG |
681 | if (pos_delta.stats.sum.num_evict_mode_some) { |
682 | if (f) { | |
683 | f->dump_int("num_evict_mode_some", pos_delta.stats.sum.num_evict_mode_some); | |
684 | } else { | |
685 | if (have_output) | |
686 | *out << ", "; | |
11fdf7f2 | 687 | *out << si_u_t(pos_delta.stats.sum.num_evict_mode_some) << " PGs evicting"; |
31f18b77 FG |
688 | have_output = true; |
689 | } | |
690 | } | |
691 | if (pos_delta.stats.sum.num_evict_mode_full) { | |
692 | if (f) { | |
693 | f->dump_int("num_evict_mode_full", pos_delta.stats.sum.num_evict_mode_full); | |
694 | } else { | |
695 | if (have_output) | |
696 | *out << ", "; | |
11fdf7f2 | 697 | *out << si_u_t(pos_delta.stats.sum.num_evict_mode_full) << " PGs evicting (full)"; |
31f18b77 | 698 | } |
7c673cae FG |
699 | } |
700 | } | |
701 | ||
9f95a23c | 702 | void PGMapDigest::overall_cache_io_rate_summary(ceph::Formatter *f, ostream *out) const |
7c673cae | 703 | { |
31f18b77 | 704 | cache_io_rate_summary(f, out, pg_sum_delta, stamp_delta); |
7c673cae FG |
705 | } |
706 | ||
9f95a23c | 707 | void PGMapDigest::pool_cache_io_rate_summary(ceph::Formatter *f, ostream *out, |
31f18b77 | 708 | uint64_t poolid) const |
7c673cae | 709 | { |
31f18b77 FG |
710 | auto p = per_pool_sum_delta.find(poolid); |
711 | if (p == per_pool_sum_delta.end()) | |
712 | return; | |
7c673cae | 713 | |
31f18b77 | 714 | auto ts = per_pool_sum_deltas_stamps.find(p->first); |
11fdf7f2 | 715 | ceph_assert(ts != per_pool_sum_deltas_stamps.end()); |
31f18b77 | 716 | cache_io_rate_summary(f, out, p->second.first, ts->second); |
7c673cae FG |
717 | } |
718 | ||
d2e6a577 FG |
719 | ceph_statfs PGMapDigest::get_statfs(OSDMap &osdmap, |
720 | boost::optional<int64_t> data_pool) const | |
721 | { | |
722 | ceph_statfs statfs; | |
723 | bool filter = false; | |
724 | object_stat_sum_t sum; | |
725 | ||
726 | if (data_pool) { | |
727 | auto i = pg_pool_sum.find(*data_pool); | |
728 | if (i != pg_pool_sum.end()) { | |
729 | sum = i->second.stats.sum; | |
730 | filter = true; | |
731 | } | |
732 | } | |
733 | ||
734 | if (filter) { | |
735 | statfs.kb_used = (sum.num_bytes >> 10); | |
736 | statfs.kb_avail = get_pool_free_space(osdmap, *data_pool) >> 10; | |
737 | statfs.num_objects = sum.num_objects; | |
738 | statfs.kb = statfs.kb_used + statfs.kb_avail; | |
739 | } else { | |
740 | // these are in KB. | |
11fdf7f2 TL |
741 | statfs.kb = osd_sum.statfs.kb(); |
742 | statfs.kb_used = osd_sum.statfs.kb_used_raw(); | |
743 | statfs.kb_avail = osd_sum.statfs.kb_avail(); | |
d2e6a577 FG |
744 | statfs.num_objects = pg_sum.stats.sum.num_objects; |
745 | } | |
746 | ||
747 | return statfs; | |
748 | } | |
749 | ||
31f18b77 FG |
750 | void PGMapDigest::dump_pool_stats_full( |
751 | const OSDMap &osd_map, | |
752 | stringstream *ss, | |
9f95a23c | 753 | ceph::Formatter *f, |
31f18b77 | 754 | bool verbose) const |
7c673cae | 755 | { |
31f18b77 | 756 | TextTable tbl; |
7c673cae | 757 | |
31f18b77 FG |
758 | if (f) { |
759 | f->open_array_section("pools"); | |
760 | } else { | |
11fdf7f2 TL |
761 | tbl.define_column("POOL", TextTable::LEFT, TextTable::LEFT); |
762 | tbl.define_column("ID", TextTable::LEFT, TextTable::RIGHT); | |
763 | tbl.define_column("STORED", TextTable::LEFT, TextTable::RIGHT); | |
9f95a23c TL |
764 | if (verbose) { |
765 | tbl.define_column("(DATA)", TextTable::LEFT, TextTable::RIGHT); | |
766 | tbl.define_column("(OMAP)", TextTable::LEFT, TextTable::RIGHT); | |
767 | } | |
11fdf7f2 | 768 | tbl.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT); |
31f18b77 | 769 | tbl.define_column("USED", TextTable::LEFT, TextTable::RIGHT); |
9f95a23c TL |
770 | if (verbose) { |
771 | tbl.define_column("(DATA)", TextTable::LEFT, TextTable::RIGHT); | |
772 | tbl.define_column("(OMAP)", TextTable::LEFT, TextTable::RIGHT); | |
773 | } | |
31f18b77 FG |
774 | tbl.define_column("%USED", TextTable::LEFT, TextTable::RIGHT); |
775 | tbl.define_column("MAX AVAIL", TextTable::LEFT, TextTable::RIGHT); | |
11fdf7f2 | 776 | |
31f18b77 | 777 | if (verbose) { |
11fdf7f2 TL |
778 | tbl.define_column("QUOTA OBJECTS", TextTable::LEFT, TextTable::LEFT); |
779 | tbl.define_column("QUOTA BYTES", TextTable::LEFT, TextTable::LEFT); | |
31f18b77 | 780 | tbl.define_column("DIRTY", TextTable::LEFT, TextTable::RIGHT); |
11fdf7f2 TL |
781 | tbl.define_column("USED COMPR", TextTable::LEFT, TextTable::RIGHT); |
782 | tbl.define_column("UNDER COMPR", TextTable::LEFT, TextTable::RIGHT); | |
31f18b77 FG |
783 | } |
784 | } | |
785 | ||
786 | map<int,uint64_t> avail_by_rule; | |
787 | for (auto p = osd_map.get_pools().begin(); | |
788 | p != osd_map.get_pools().end(); ++p) { | |
789 | int64_t pool_id = p->first; | |
790 | if ((pool_id < 0) || (pg_pool_sum.count(pool_id) == 0)) | |
791 | continue; | |
11fdf7f2 | 792 | |
31f18b77 FG |
793 | const string& pool_name = osd_map.get_pool_name(pool_id); |
794 | const pool_stat_t &stat = pg_pool_sum.at(pool_id); | |
795 | ||
796 | const pg_pool_t *pool = osd_map.get_pg_pool(pool_id); | |
797 | int ruleno = osd_map.crush->find_rule(pool->get_crush_rule(), | |
798 | pool->get_type(), | |
799 | pool->get_size()); | |
800 | int64_t avail; | |
31f18b77 FG |
801 | if (avail_by_rule.count(ruleno) == 0) { |
802 | // FIXME: we don't guarantee avail_space_by_rule is up-to-date before this function is invoked | |
803 | avail = get_rule_avail(ruleno); | |
804 | if (avail < 0) | |
805 | avail = 0; | |
806 | avail_by_rule[ruleno] = avail; | |
807 | } else { | |
808 | avail = avail_by_rule[ruleno]; | |
809 | } | |
31f18b77 FG |
810 | if (f) { |
811 | f->open_object_section("pool"); | |
812 | f->dump_string("name", pool_name); | |
813 | f->dump_int("id", pool_id); | |
814 | f->open_object_section("stats"); | |
815 | } else { | |
816 | tbl << pool_name | |
817 | << pool_id; | |
31f18b77 | 818 | } |
11fdf7f2 | 819 | float raw_used_rate = osd_map.pool_raw_used_rate(pool_id); |
81eedcae | 820 | bool per_pool = use_per_pool_stats(); |
9f95a23c | 821 | bool per_pool_omap = use_per_pool_omap_stats(); |
81eedcae | 822 | dump_object_stat_sum(tbl, f, stat, avail, raw_used_rate, verbose, per_pool, |
9f95a23c | 823 | per_pool_omap, pool); |
11fdf7f2 | 824 | if (f) { |
31f18b77 | 825 | f->close_section(); // stats |
31f18b77 | 826 | f->close_section(); // pool |
11fdf7f2 TL |
827 | } else { |
828 | tbl << TextTable::endrow; | |
829 | } | |
31f18b77 FG |
830 | } |
831 | if (f) | |
832 | f->close_section(); | |
833 | else { | |
11fdf7f2 | 834 | ceph_assert(ss != nullptr); |
9f95a23c | 835 | *ss << "--- POOLS ---\n"; |
31f18b77 FG |
836 | *ss << tbl; |
837 | } | |
838 | } | |
839 | ||
11fdf7f2 | 840 | void PGMapDigest::dump_cluster_stats(stringstream *ss, |
9f95a23c | 841 | ceph::Formatter *f, |
11fdf7f2 | 842 | bool verbose) const |
31f18b77 FG |
843 | { |
844 | if (f) { | |
845 | f->open_object_section("stats"); | |
11fdf7f2 TL |
846 | f->dump_int("total_bytes", osd_sum.statfs.total); |
847 | f->dump_int("total_avail_bytes", osd_sum.statfs.available); | |
848 | f->dump_int("total_used_bytes", osd_sum.statfs.get_used()); | |
849 | f->dump_int("total_used_raw_bytes", osd_sum.statfs.get_used_raw()); | |
850 | f->dump_float("total_used_raw_ratio", osd_sum.statfs.get_used_raw_ratio()); | |
81eedcae TL |
851 | f->dump_unsigned("num_osds", osd_sum.num_osds); |
852 | f->dump_unsigned("num_per_pool_osds", osd_sum.num_per_pool_osds); | |
9f95a23c | 853 | f->dump_unsigned("num_per_pool_omap_osds", osd_sum.num_per_pool_omap_osds); |
11fdf7f2 TL |
854 | f->close_section(); |
855 | f->open_object_section("stats_by_class"); | |
856 | for (auto& i : osd_sum_by_class) { | |
857 | f->open_object_section(i.first.c_str()); | |
858 | f->dump_int("total_bytes", i.second.statfs.total); | |
859 | f->dump_int("total_avail_bytes", i.second.statfs.available); | |
860 | f->dump_int("total_used_bytes", i.second.statfs.get_used()); | |
861 | f->dump_int("total_used_raw_bytes", i.second.statfs.get_used_raw()); | |
862 | f->dump_float("total_used_raw_ratio", | |
863 | i.second.statfs.get_used_raw_ratio()); | |
864 | f->close_section(); | |
31f18b77 FG |
865 | } |
866 | f->close_section(); | |
867 | } else { | |
11fdf7f2 | 868 | ceph_assert(ss != nullptr); |
31f18b77 | 869 | TextTable tbl; |
11fdf7f2 | 870 | tbl.define_column("CLASS", TextTable::LEFT, TextTable::LEFT); |
31f18b77 FG |
871 | tbl.define_column("SIZE", TextTable::LEFT, TextTable::RIGHT); |
872 | tbl.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT); | |
11fdf7f2 | 873 | tbl.define_column("USED", TextTable::LEFT, TextTable::RIGHT); |
31f18b77 FG |
874 | tbl.define_column("RAW USED", TextTable::LEFT, TextTable::RIGHT); |
875 | tbl.define_column("%RAW USED", TextTable::LEFT, TextTable::RIGHT); | |
31f18b77 | 876 | |
11fdf7f2 TL |
877 | |
878 | for (auto& i : osd_sum_by_class) { | |
879 | tbl << i.first; | |
880 | tbl << stringify(byte_u_t(i.second.statfs.total)) | |
881 | << stringify(byte_u_t(i.second.statfs.available)) | |
882 | << stringify(byte_u_t(i.second.statfs.get_used())) | |
883 | << stringify(byte_u_t(i.second.statfs.get_used_raw())) | |
884 | << percentify(i.second.statfs.get_used_raw_ratio()*100.0) | |
885 | << TextTable::endrow; | |
886 | } | |
887 | tbl << "TOTAL"; | |
888 | tbl << stringify(byte_u_t(osd_sum.statfs.total)) | |
889 | << stringify(byte_u_t(osd_sum.statfs.available)) | |
890 | << stringify(byte_u_t(osd_sum.statfs.get_used())) | |
891 | << stringify(byte_u_t(osd_sum.statfs.get_used_raw())) | |
892 | << percentify(osd_sum.statfs.get_used_raw_ratio()*100.0) | |
893 | << TextTable::endrow; | |
894 | ||
9f95a23c | 895 | *ss << "--- RAW STORAGE ---\n"; |
31f18b77 FG |
896 | *ss << tbl; |
897 | } | |
898 | } | |
899 | ||
900 | void PGMapDigest::dump_object_stat_sum( | |
9f95a23c | 901 | TextTable &tbl, ceph::Formatter *f, |
11fdf7f2 | 902 | const pool_stat_t &pool_stat, uint64_t avail, |
9f95a23c | 903 | float raw_used_rate, bool verbose, bool per_pool, bool per_pool_omap, |
31f18b77 FG |
904 | const pg_pool_t *pool) |
905 | { | |
11fdf7f2 TL |
906 | const object_stat_sum_t &sum = pool_stat.stats.sum; |
907 | const store_statfs_t statfs = pool_stat.store_stats; | |
908 | ||
909 | if (sum.num_object_copies > 0) { | |
910 | raw_used_rate *= (float)(sum.num_object_copies - sum.num_objects_degraded) / sum.num_object_copies; | |
911 | } | |
81eedcae | 912 | |
9f95a23c TL |
913 | uint64_t used_data_bytes = pool_stat.get_allocated_data_bytes(per_pool); |
914 | uint64_t used_omap_bytes = pool_stat.get_allocated_omap_bytes(per_pool_omap); | |
915 | uint64_t used_bytes = used_data_bytes + used_omap_bytes; | |
31f18b77 FG |
916 | |
917 | float used = 0.0; | |
3efd9988 | 918 | // note avail passed in is raw_avail, calc raw_used here. |
31f18b77 | 919 | if (avail) { |
11fdf7f2 | 920 | used = used_bytes; |
31f18b77 | 921 | used /= used + avail; |
11fdf7f2 | 922 | } else if (used_bytes) { |
31f18b77 FG |
923 | used = 1.0; |
924 | } | |
11fdf7f2 TL |
925 | auto avail_res = raw_used_rate ? avail / raw_used_rate : 0; |
926 | // an approximation for actually stored user data | |
9f95a23c TL |
927 | auto stored_data_normalized = pool_stat.get_user_data_bytes( |
928 | raw_used_rate, per_pool); | |
929 | auto stored_omap_normalized = pool_stat.get_user_omap_bytes( | |
930 | raw_used_rate, per_pool_omap); | |
931 | auto stored_normalized = stored_data_normalized + stored_omap_normalized; | |
932 | // same, amplied by replication or EC | |
933 | auto stored_raw = stored_normalized * raw_used_rate; | |
31f18b77 | 934 | if (f) { |
11fdf7f2 | 935 | f->dump_int("stored", stored_normalized); |
9f95a23c TL |
936 | if (verbose) { |
937 | f->dump_int("stored_data", stored_data_normalized); | |
938 | f->dump_int("stored_omap", stored_omap_normalized); | |
939 | } | |
31f18b77 | 940 | f->dump_int("objects", sum.num_objects); |
11fdf7f2 TL |
941 | f->dump_int("kb_used", shift_round_up(used_bytes, 10)); |
942 | f->dump_int("bytes_used", used_bytes); | |
9f95a23c TL |
943 | if (verbose) { |
944 | f->dump_int("data_bytes_used", used_data_bytes); | |
945 | f->dump_int("omap_bytes_used", used_omap_bytes); | |
946 | } | |
11fdf7f2 TL |
947 | f->dump_float("percent_used", used); |
948 | f->dump_unsigned("max_avail", avail_res); | |
31f18b77 FG |
949 | if (verbose) { |
950 | f->dump_int("quota_objects", pool->quota_max_objects); | |
951 | f->dump_int("quota_bytes", pool->quota_max_bytes); | |
952 | f->dump_int("dirty", sum.num_objects_dirty); | |
953 | f->dump_int("rd", sum.num_rd); | |
954 | f->dump_int("rd_bytes", sum.num_rd_kb * 1024ull); | |
955 | f->dump_int("wr", sum.num_wr); | |
956 | f->dump_int("wr_bytes", sum.num_wr_kb * 1024ull); | |
11fdf7f2 TL |
957 | f->dump_int("compress_bytes_used", statfs.data_compressed_allocated); |
958 | f->dump_int("compress_under_bytes", statfs.data_compressed_original); | |
959 | // Stored by user amplified by replication | |
9f95a23c | 960 | f->dump_int("stored_raw", stored_raw); |
31f18b77 FG |
961 | } |
962 | } else { | |
11fdf7f2 | 963 | tbl << stringify(byte_u_t(stored_normalized)); |
9f95a23c TL |
964 | if (verbose) { |
965 | tbl << stringify(byte_u_t(stored_data_normalized)); | |
966 | tbl << stringify(byte_u_t(stored_omap_normalized)); | |
967 | } | |
11fdf7f2 TL |
968 | tbl << stringify(si_u_t(sum.num_objects)); |
969 | tbl << stringify(byte_u_t(used_bytes)); | |
9f95a23c TL |
970 | if (verbose) { |
971 | tbl << stringify(byte_u_t(used_data_bytes)); | |
972 | tbl << stringify(byte_u_t(used_omap_bytes)); | |
973 | } | |
31f18b77 | 974 | tbl << percentify(used*100); |
11fdf7f2 | 975 | tbl << stringify(byte_u_t(avail_res)); |
31f18b77 | 976 | if (verbose) { |
11fdf7f2 TL |
977 | if (pool->quota_max_objects == 0) |
978 | tbl << "N/A"; | |
979 | else | |
980 | tbl << stringify(si_u_t(pool->quota_max_objects)); | |
981 | ||
982 | if (pool->quota_max_bytes == 0) | |
983 | tbl << "N/A"; | |
984 | else | |
985 | tbl << stringify(byte_u_t(pool->quota_max_bytes)); | |
986 | ||
1adf2230 | 987 | tbl << stringify(si_u_t(sum.num_objects_dirty)) |
11fdf7f2 TL |
988 | << stringify(byte_u_t(statfs.data_compressed_allocated)) |
989 | << stringify(byte_u_t(statfs.data_compressed_original)) | |
990 | ; | |
31f18b77 FG |
991 | } |
992 | } | |
993 | } | |
994 | ||
d2e6a577 FG |
995 | int64_t PGMapDigest::get_pool_free_space(const OSDMap &osd_map, |
996 | int64_t poolid) const | |
997 | { | |
998 | const pg_pool_t *pool = osd_map.get_pg_pool(poolid); | |
999 | int ruleno = osd_map.crush->find_rule(pool->get_crush_rule(), | |
1000 | pool->get_type(), | |
1001 | pool->get_size()); | |
1002 | int64_t avail; | |
1003 | avail = get_rule_avail(ruleno); | |
1004 | if (avail < 0) | |
1005 | avail = 0; | |
1006 | ||
11fdf7f2 | 1007 | return avail / osd_map.pool_raw_used_rate(poolid); |
d2e6a577 FG |
1008 | } |
1009 | ||
31f18b77 FG |
1010 | int64_t PGMap::get_rule_avail(const OSDMap& osdmap, int ruleno) const |
1011 | { | |
1012 | map<int,float> wm; | |
1013 | int r = osdmap.crush->get_rule_weight_osd_map(ruleno, &wm); | |
1014 | if (r < 0) { | |
1015 | return r; | |
1016 | } | |
1017 | if (wm.empty()) { | |
1018 | return 0; | |
1019 | } | |
1020 | ||
11fdf7f2 | 1021 | float fratio = osdmap.get_full_ratio(); |
31f18b77 FG |
1022 | |
1023 | int64_t min = -1; | |
1024 | for (auto p = wm.begin(); p != wm.end(); ++p) { | |
1025 | auto osd_info = osd_stat.find(p->first); | |
1026 | if (osd_info != osd_stat.end()) { | |
11fdf7f2 | 1027 | if (osd_info->second.statfs.total == 0 || p->second == 0) { |
31f18b77 FG |
1028 | // osd must be out, hence its stats have been zeroed |
1029 | // (unless we somehow managed to have a disk with size 0...) | |
1030 | // | |
1031 | // (p->second == 0), if osd weight is 0, no need to | |
1032 | // calculate proj below. | |
1033 | continue; | |
1034 | } | |
11fdf7f2 | 1035 | double unusable = (double)osd_info->second.statfs.kb() * |
31f18b77 | 1036 | (1.0 - fratio); |
11fdf7f2 | 1037 | double avail = std::max(0.0, (double)osd_info->second.statfs.kb_avail() - unusable); |
31f18b77 FG |
1038 | avail *= 1024.0; |
1039 | int64_t proj = (int64_t)(avail / (double)p->second); | |
1040 | if (min < 0 || proj < min) { | |
1041 | min = proj; | |
1042 | } | |
1043 | } else { | |
94b18763 FG |
1044 | if (osdmap.is_up(p->first)) { |
1045 | // This is a level 4 rather than an error, because we might have | |
1046 | // only just started, and not received the first stats message yet. | |
1047 | dout(4) << "OSD " << p->first << " is up, but has no stats" << dendl; | |
1048 | } | |
31f18b77 FG |
1049 | } |
1050 | } | |
1051 | return min; | |
1052 | } | |
1053 | ||
1054 | void PGMap::get_rules_avail(const OSDMap& osdmap, | |
1055 | std::map<int,int64_t> *avail_map) const | |
1056 | { | |
1057 | avail_map->clear(); | |
1058 | for (auto p : osdmap.get_pools()) { | |
1059 | int64_t pool_id = p.first; | |
1060 | if ((pool_id < 0) || (pg_pool_sum.count(pool_id) == 0)) | |
1061 | continue; | |
1062 | const pg_pool_t *pool = osdmap.get_pg_pool(pool_id); | |
1063 | int ruleno = osdmap.crush->find_rule(pool->get_crush_rule(), | |
1064 | pool->get_type(), | |
1065 | pool->get_size()); | |
1066 | if (avail_map->count(ruleno) == 0) | |
1067 | (*avail_map)[ruleno] = get_rule_avail(osdmap, ruleno); | |
1068 | } | |
1069 | } | |
1070 | ||
1071 | // --------------------- | |
1072 | // PGMap | |
1073 | ||
9f95a23c | 1074 | void PGMap::Incremental::dump(ceph::Formatter *f) const |
7c673cae FG |
1075 | { |
1076 | f->dump_unsigned("version", version); | |
1077 | f->dump_stream("stamp") << stamp; | |
31f18b77 FG |
1078 | f->dump_unsigned("osdmap_epoch", osdmap_epoch); |
1079 | f->dump_unsigned("pg_scan_epoch", pg_scan); | |
7c673cae | 1080 | |
31f18b77 FG |
1081 | f->open_array_section("pg_stat_updates"); |
1082 | for (auto p = pg_stat_updates.begin(); p != pg_stat_updates.end(); ++p) { | |
1083 | f->open_object_section("pg_stat"); | |
1084 | f->dump_stream("pgid") << p->first; | |
1085 | p->second.dump(f); | |
1086 | f->close_section(); | |
1087 | } | |
7c673cae FG |
1088 | f->close_section(); |
1089 | ||
31f18b77 FG |
1090 | f->open_array_section("osd_stat_updates"); |
1091 | for (auto p = osd_stat_updates.begin(); p != osd_stat_updates.end(); ++p) { | |
1092 | f->open_object_section("osd_stat"); | |
1093 | f->dump_int("osd", p->first); | |
1094 | p->second.dump(f); | |
7c673cae FG |
1095 | f->close_section(); |
1096 | } | |
1097 | f->close_section(); | |
11fdf7f2 TL |
1098 | f->open_array_section("pool_statfs_updates"); |
1099 | for (auto p = pool_statfs_updates.begin(); p != pool_statfs_updates.end(); ++p) { | |
1100 | f->open_object_section("pool_statfs"); | |
1101 | f->dump_stream("poolid/osd") << p->first; | |
1102 | p->second.dump(f); | |
1103 | f->close_section(); | |
1104 | } | |
1105 | f->close_section(); | |
7c673cae | 1106 | |
31f18b77 FG |
1107 | f->open_array_section("osd_stat_removals"); |
1108 | for (auto p = osd_stat_rm.begin(); p != osd_stat_rm.end(); ++p) | |
1109 | f->dump_int("osd", *p); | |
7c673cae | 1110 | f->close_section(); |
7c673cae | 1111 | |
31f18b77 FG |
1112 | f->open_array_section("pg_removals"); |
1113 | for (auto p = pg_remove.begin(); p != pg_remove.end(); ++p) | |
1114 | f->dump_stream("pgid") << *p; | |
7c673cae FG |
1115 | f->close_section(); |
1116 | } | |
1117 | ||
31f18b77 | 1118 | void PGMap::Incremental::generate_test_instances(list<PGMap::Incremental*>& o) |
7c673cae | 1119 | { |
31f18b77 FG |
1120 | o.push_back(new Incremental); |
1121 | o.push_back(new Incremental); | |
1122 | o.back()->version = 1; | |
1123 | o.back()->stamp = utime_t(123,345); | |
1124 | o.push_back(new Incremental); | |
1125 | o.back()->version = 2; | |
11fdf7f2 | 1126 | o.back()->pg_stat_updates[pg_t(1,2)] = pg_stat_t(); |
31f18b77 | 1127 | o.back()->osd_stat_updates[5] = osd_stat_t(); |
31f18b77 FG |
1128 | o.push_back(new Incremental); |
1129 | o.back()->version = 3; | |
1130 | o.back()->osdmap_epoch = 1; | |
1131 | o.back()->pg_scan = 2; | |
11fdf7f2 | 1132 | o.back()->pg_stat_updates[pg_t(4,5)] = pg_stat_t(); |
31f18b77 | 1133 | o.back()->osd_stat_updates[6] = osd_stat_t(); |
11fdf7f2 | 1134 | o.back()->pg_remove.insert(pg_t(1,2)); |
31f18b77 | 1135 | o.back()->osd_stat_rm.insert(5); |
11fdf7f2 | 1136 | o.back()->pool_statfs_updates[std::make_pair(1234,4)] = store_statfs_t(); |
7c673cae FG |
1137 | } |
1138 | ||
31f18b77 FG |
1139 | // -- |
1140 | ||
1141 | void PGMap::apply_incremental(CephContext *cct, const Incremental& inc) | |
7c673cae | 1142 | { |
11fdf7f2 | 1143 | ceph_assert(inc.version == version+1); |
31f18b77 | 1144 | version++; |
7c673cae | 1145 | |
31f18b77 | 1146 | pool_stat_t pg_sum_old = pg_sum; |
11fdf7f2 TL |
1147 | mempool::pgmap::unordered_map<int32_t, pool_stat_t> pg_pool_sum_old; |
1148 | pg_pool_sum_old = pg_pool_sum; | |
7c673cae | 1149 | |
31f18b77 FG |
1150 | for (auto p = inc.pg_stat_updates.begin(); |
1151 | p != inc.pg_stat_updates.end(); | |
1152 | ++p) { | |
1153 | const pg_t &update_pg(p->first); | |
11fdf7f2 | 1154 | auto update_pool = update_pg.pool(); |
31f18b77 | 1155 | const pg_stat_t &update_stat(p->second); |
7c673cae | 1156 | |
11fdf7f2 TL |
1157 | auto pg_stat_iter = pg_stat.find(update_pg); |
1158 | pool_stat_t &pool_sum_ref = pg_pool_sum[update_pool]; | |
1159 | if (pg_stat_iter == pg_stat.end()) { | |
31f18b77 FG |
1160 | pg_stat.insert(make_pair(update_pg, update_stat)); |
1161 | } else { | |
11fdf7f2 TL |
1162 | stat_pg_sub(update_pg, pg_stat_iter->second); |
1163 | pool_sum_ref.sub(pg_stat_iter->second); | |
1164 | pg_stat_iter->second = update_stat; | |
7c673cae | 1165 | } |
31f18b77 | 1166 | stat_pg_add(update_pg, update_stat); |
11fdf7f2 | 1167 | pool_sum_ref.add(update_stat); |
7c673cae | 1168 | } |
11fdf7f2 TL |
1169 | |
1170 | for (auto p = inc.pool_statfs_updates.begin(); | |
1171 | p != inc.pool_statfs_updates.end(); | |
1172 | ++p) { | |
1173 | auto update_pool = p->first.first; | |
1174 | auto update_osd = p->first.second; | |
1175 | auto& statfs_inc = p->second; | |
1176 | ||
1177 | auto pool_statfs_iter = | |
1178 | pool_statfs.find(std::make_pair(update_pool, update_osd)); | |
eafe8130 TL |
1179 | if (pg_pool_sum.count(update_pool)) { |
1180 | pool_stat_t &pool_sum_ref = pg_pool_sum[update_pool]; | |
1181 | if (pool_statfs_iter == pool_statfs.end()) { | |
1182 | pool_statfs.emplace(std::make_pair(update_pool, update_osd), statfs_inc); | |
1183 | } else { | |
1184 | pool_sum_ref.sub(pool_statfs_iter->second); | |
1185 | pool_statfs_iter->second = statfs_inc; | |
1186 | } | |
1187 | pool_sum_ref.add(statfs_inc); | |
11fdf7f2 | 1188 | } |
11fdf7f2 TL |
1189 | } |
1190 | ||
31f18b77 FG |
1191 | for (auto p = inc.get_osd_stat_updates().begin(); |
1192 | p != inc.get_osd_stat_updates().end(); | |
1193 | ++p) { | |
1194 | int osd = p->first; | |
1195 | const osd_stat_t &new_stats(p->second); | |
7c673cae | 1196 | |
31f18b77 FG |
1197 | auto t = osd_stat.find(osd); |
1198 | if (t == osd_stat.end()) { | |
1199 | osd_stat.insert(make_pair(osd, new_stats)); | |
1200 | } else { | |
1201 | stat_osd_sub(t->first, t->second); | |
1202 | t->second = new_stats; | |
1203 | } | |
31f18b77 | 1204 | stat_osd_add(osd, new_stats); |
31f18b77 FG |
1205 | } |
1206 | set<int64_t> deleted_pools; | |
1207 | for (auto p = inc.pg_remove.begin(); | |
1208 | p != inc.pg_remove.end(); | |
1209 | ++p) { | |
1210 | const pg_t &removed_pg(*p); | |
1211 | auto s = pg_stat.find(removed_pg); | |
11fdf7f2 | 1212 | bool pool_erased = false; |
31f18b77 | 1213 | if (s != pg_stat.end()) { |
11fdf7f2 | 1214 | pool_erased = stat_pg_sub(removed_pg, s->second); |
31f18b77 | 1215 | pg_stat.erase(s); |
11fdf7f2 TL |
1216 | if (pool_erased) { |
1217 | deleted_pools.insert(removed_pg.pool()); | |
1218 | } | |
31f18b77 | 1219 | } |
7c673cae FG |
1220 | } |
1221 | ||
31f18b77 FG |
1222 | for (auto p = inc.get_osd_stat_rm().begin(); |
1223 | p != inc.get_osd_stat_rm().end(); | |
7c673cae | 1224 | ++p) { |
31f18b77 FG |
1225 | auto t = osd_stat.find(*p); |
1226 | if (t != osd_stat.end()) { | |
1227 | stat_osd_sub(t->first, t->second); | |
1228 | osd_stat.erase(t); | |
31f18b77 | 1229 | } |
11fdf7f2 TL |
1230 | for (auto i = pool_statfs.begin(); i != pool_statfs.end(); ++i) { |
1231 | if (i->first.second == *p) { | |
1232 | pg_pool_sum[i->first.first].sub(i->second); | |
1233 | pool_statfs.erase(i); | |
1234 | } | |
1235 | } | |
7c673cae FG |
1236 | } |
1237 | ||
b32b8144 FG |
1238 | // skip calculating delta while sum was not synchronized |
1239 | if (!stamp.is_zero() && !pg_sum_old.stats.sum.is_zero()) { | |
1240 | utime_t delta_t; | |
1241 | delta_t = inc.stamp; | |
1242 | delta_t -= stamp; | |
1243 | // calculate a delta, and average over the last 2 deltas. | |
1244 | pool_stat_t d = pg_sum; | |
1245 | d.stats.sub(pg_sum_old.stats); | |
1246 | pg_sum_deltas.push_back(make_pair(d, delta_t)); | |
1247 | stamp_delta += delta_t; | |
1248 | pg_sum_delta.stats.add(d.stats); | |
1249 | auto smooth_intervals = | |
11fdf7f2 TL |
1250 | cct ? cct->_conf.get_val<uint64_t>("mon_stat_smooth_intervals") : 1; |
1251 | while (pg_sum_deltas.size() > smooth_intervals) { | |
b32b8144 FG |
1252 | pg_sum_delta.stats.sub(pg_sum_deltas.front().first.stats); |
1253 | stamp_delta -= pg_sum_deltas.front().second; | |
1254 | pg_sum_deltas.pop_front(); | |
1255 | } | |
31f18b77 | 1256 | } |
b32b8144 | 1257 | stamp = inc.stamp; |
7c673cae | 1258 | |
31f18b77 | 1259 | update_pool_deltas(cct, inc.stamp, pg_pool_sum_old); |
7c673cae | 1260 | |
31f18b77 FG |
1261 | for (auto p : deleted_pools) { |
1262 | if (cct) | |
1263 | dout(20) << " deleted pool " << p << dendl; | |
1264 | deleted_pool(p); | |
1265 | } | |
7c673cae | 1266 | |
31f18b77 FG |
1267 | if (inc.osdmap_epoch) |
1268 | last_osdmap_epoch = inc.osdmap_epoch; | |
1269 | if (inc.pg_scan) | |
1270 | last_pg_scan = inc.pg_scan; | |
7c673cae FG |
1271 | } |
1272 | ||
31f18b77 | 1273 | void PGMap::calc_stats() |
7c673cae | 1274 | { |
31f18b77 FG |
1275 | num_pg = 0; |
1276 | num_pg_active = 0; | |
1277 | num_pg_unknown = 0; | |
1278 | num_osd = 0; | |
1279 | pg_pool_sum.clear(); | |
1280 | num_pg_by_pool.clear(); | |
1281 | pg_by_osd.clear(); | |
1282 | pg_sum = pool_stat_t(); | |
1283 | osd_sum = osd_stat_t(); | |
11fdf7f2 | 1284 | osd_sum_by_class.clear(); |
31f18b77 | 1285 | num_pg_by_state.clear(); |
11fdf7f2 | 1286 | num_pg_by_pool_state.clear(); |
31f18b77 | 1287 | num_pg_by_osd.clear(); |
7c673cae | 1288 | |
31f18b77 FG |
1289 | for (auto p = pg_stat.begin(); |
1290 | p != pg_stat.end(); | |
1291 | ++p) { | |
11fdf7f2 TL |
1292 | auto pg = p->first; |
1293 | stat_pg_add(pg, p->second); | |
1294 | pg_pool_sum[pg.pool()].add(p->second); | |
1295 | } | |
1296 | for (auto p = pool_statfs.begin(); | |
1297 | p != pool_statfs.end(); | |
1298 | ++p) { | |
1299 | auto pool = p->first.first; | |
1300 | pg_pool_sum[pool].add(p->second); | |
31f18b77 FG |
1301 | } |
1302 | for (auto p = osd_stat.begin(); | |
1303 | p != osd_stat.end(); | |
1304 | ++p) | |
1305 | stat_osd_add(p->first, p->second); | |
7c673cae FG |
1306 | } |
1307 | ||
31f18b77 FG |
1308 | void PGMap::stat_pg_add(const pg_t &pgid, const pg_stat_t &s, |
1309 | bool sameosds) | |
7c673cae | 1310 | { |
11fdf7f2 | 1311 | auto pool = pgid.pool(); |
31f18b77 | 1312 | pg_sum.add(s); |
7c673cae | 1313 | |
31f18b77 FG |
1314 | num_pg++; |
1315 | num_pg_by_state[s.state]++; | |
11fdf7f2 TL |
1316 | num_pg_by_pool_state[pgid.pool()][s.state]++; |
1317 | num_pg_by_pool[pool]++; | |
7c673cae | 1318 | |
31f18b77 FG |
1319 | if ((s.state & PG_STATE_CREATING) && |
1320 | s.parent_split_bits == 0) { | |
1321 | creating_pgs.insert(pgid); | |
1322 | if (s.acting_primary >= 0) { | |
1323 | creating_pgs_by_osd_epoch[s.acting_primary][s.mapping_epoch].insert(pgid); | |
7c673cae FG |
1324 | } |
1325 | } | |
1326 | ||
31f18b77 FG |
1327 | if (s.state & PG_STATE_ACTIVE) { |
1328 | ++num_pg_active; | |
1329 | } | |
1330 | if (s.state == 0) { | |
1331 | ++num_pg_unknown; | |
7c673cae FG |
1332 | } |
1333 | ||
31f18b77 FG |
1334 | if (sameosds) |
1335 | return; | |
7c673cae | 1336 | |
31f18b77 FG |
1337 | for (auto p = s.blocked_by.begin(); |
1338 | p != s.blocked_by.end(); | |
1339 | ++p) { | |
1340 | ++blocked_by_sum[*p]; | |
7c673cae | 1341 | } |
31f18b77 FG |
1342 | |
1343 | for (auto p = s.acting.begin(); p != s.acting.end(); ++p) { | |
1344 | pg_by_osd[*p].insert(pgid); | |
1345 | num_pg_by_osd[*p].acting++; | |
1346 | } | |
1347 | for (auto p = s.up.begin(); p != s.up.end(); ++p) { | |
81eedcae TL |
1348 | auto& t = pg_by_osd[*p]; |
1349 | if (t.find(pgid) == t.end()) { | |
1350 | t.insert(pgid); | |
1351 | num_pg_by_osd[*p].up_not_acting++; | |
1352 | } | |
7c673cae | 1353 | } |
7c673cae | 1354 | |
31f18b77 FG |
1355 | if (s.up_primary >= 0) { |
1356 | num_pg_by_osd[s.up_primary].primary++; | |
7c673cae | 1357 | } |
7c673cae | 1358 | } |
31f18b77 | 1359 | |
11fdf7f2 | 1360 | bool PGMap::stat_pg_sub(const pg_t &pgid, const pg_stat_t &s, |
31f18b77 | 1361 | bool sameosds) |
7c673cae | 1362 | { |
11fdf7f2 | 1363 | bool pool_erased = false; |
31f18b77 FG |
1364 | pg_sum.sub(s); |
1365 | ||
1366 | num_pg--; | |
1367 | int end = --num_pg_by_state[s.state]; | |
11fdf7f2 | 1368 | ceph_assert(end >= 0); |
31f18b77 FG |
1369 | if (end == 0) |
1370 | num_pg_by_state.erase(s.state); | |
11fdf7f2 TL |
1371 | if (--num_pg_by_pool_state[pgid.pool()][s.state] == 0) { |
1372 | num_pg_by_pool_state[pgid.pool()].erase(s.state); | |
1373 | } | |
31f18b77 FG |
1374 | end = --num_pg_by_pool[pgid.pool()]; |
1375 | if (end == 0) { | |
11fdf7f2 | 1376 | pool_erased = true; |
7c673cae | 1377 | } |
7c673cae | 1378 | |
31f18b77 FG |
1379 | if ((s.state & PG_STATE_CREATING) && |
1380 | s.parent_split_bits == 0) { | |
1381 | creating_pgs.erase(pgid); | |
1382 | if (s.acting_primary >= 0) { | |
1383 | map<epoch_t,set<pg_t> >& r = creating_pgs_by_osd_epoch[s.acting_primary]; | |
1384 | r[s.mapping_epoch].erase(pgid); | |
1385 | if (r[s.mapping_epoch].empty()) | |
1386 | r.erase(s.mapping_epoch); | |
1387 | if (r.empty()) | |
1388 | creating_pgs_by_osd_epoch.erase(s.acting_primary); | |
7c673cae FG |
1389 | } |
1390 | } | |
31f18b77 FG |
1391 | |
1392 | if (s.state & PG_STATE_ACTIVE) { | |
1393 | --num_pg_active; | |
1394 | } | |
1395 | if (s.state == 0) { | |
1396 | --num_pg_unknown; | |
1397 | } | |
1398 | ||
1399 | if (sameosds) | |
11fdf7f2 | 1400 | return pool_erased; |
31f18b77 FG |
1401 | |
1402 | for (auto p = s.blocked_by.begin(); | |
1403 | p != s.blocked_by.end(); | |
1404 | ++p) { | |
1405 | auto q = blocked_by_sum.find(*p); | |
11fdf7f2 | 1406 | ceph_assert(q != blocked_by_sum.end()); |
31f18b77 FG |
1407 | --q->second; |
1408 | if (q->second == 0) | |
1409 | blocked_by_sum.erase(q); | |
1410 | } | |
1411 | ||
81eedcae | 1412 | set<int32_t> actingset; |
31f18b77 | 1413 | for (auto p = s.acting.begin(); p != s.acting.end(); ++p) { |
81eedcae | 1414 | actingset.insert(*p); |
31f18b77 FG |
1415 | auto& oset = pg_by_osd[*p]; |
1416 | oset.erase(pgid); | |
1417 | if (oset.empty()) | |
1418 | pg_by_osd.erase(*p); | |
1419 | auto it = num_pg_by_osd.find(*p); | |
1420 | if (it != num_pg_by_osd.end() && it->second.acting > 0) | |
1421 | it->second.acting--; | |
1422 | } | |
1423 | for (auto p = s.up.begin(); p != s.up.end(); ++p) { | |
1424 | auto& oset = pg_by_osd[*p]; | |
1425 | oset.erase(pgid); | |
1426 | if (oset.empty()) | |
1427 | pg_by_osd.erase(*p); | |
81eedcae TL |
1428 | if (actingset.count(*p)) |
1429 | continue; | |
31f18b77 | 1430 | auto it = num_pg_by_osd.find(*p); |
81eedcae TL |
1431 | if (it != num_pg_by_osd.end() && it->second.up_not_acting > 0) |
1432 | it->second.up_not_acting--; | |
31f18b77 FG |
1433 | } |
1434 | ||
1435 | if (s.up_primary >= 0) { | |
1436 | auto it = num_pg_by_osd.find(s.up_primary); | |
1437 | if (it != num_pg_by_osd.end() && it->second.primary > 0) | |
1438 | it->second.primary--; | |
1439 | } | |
11fdf7f2 TL |
1440 | return pool_erased; |
1441 | } | |
1442 | ||
1443 | void PGMap::calc_purged_snaps() | |
1444 | { | |
1445 | purged_snaps.clear(); | |
1446 | set<int64_t> unknown; | |
1447 | for (auto& i : pg_stat) { | |
1448 | if (i.second.state == 0) { | |
1449 | unknown.insert(i.first.pool()); | |
1450 | purged_snaps.erase(i.first.pool()); | |
1451 | continue; | |
1452 | } else if (unknown.count(i.first.pool())) { | |
1453 | continue; | |
1454 | } | |
1455 | auto j = purged_snaps.find(i.first.pool()); | |
1456 | if (j == purged_snaps.end()) { | |
1457 | // base case | |
1458 | purged_snaps[i.first.pool()] = i.second.purged_snaps; | |
1459 | } else { | |
1460 | j->second.intersection_of(i.second.purged_snaps); | |
1461 | } | |
1462 | } | |
31f18b77 FG |
1463 | } |
1464 | ||
11fdf7f2 | 1465 | void PGMap::calc_osd_sum_by_class(const OSDMap& osdmap) |
31f18b77 | 1466 | { |
11fdf7f2 TL |
1467 | osd_sum_by_class.clear(); |
1468 | for (auto& i : osd_stat) { | |
1469 | const char *class_name = osdmap.crush->get_item_class(i.first); | |
1470 | if (class_name) { | |
1471 | osd_sum_by_class[class_name].add(i.second); | |
1472 | } | |
1473 | } | |
31f18b77 FG |
1474 | } |
1475 | ||
1476 | void PGMap::stat_osd_add(int osd, const osd_stat_t &s) | |
1477 | { | |
1478 | num_osd++; | |
1479 | osd_sum.add(s); | |
1480 | if (osd >= (int)osd_last_seq.size()) { | |
1481 | osd_last_seq.resize(osd + 1); | |
1482 | } | |
1483 | osd_last_seq[osd] = s.seq; | |
1484 | } | |
1485 | ||
1486 | void PGMap::stat_osd_sub(int osd, const osd_stat_t &s) | |
1487 | { | |
1488 | num_osd--; | |
1489 | osd_sum.sub(s); | |
11fdf7f2 | 1490 | ceph_assert(osd < (int)osd_last_seq.size()); |
31f18b77 FG |
1491 | osd_last_seq[osd] = 0; |
1492 | } | |
1493 | ||
31f18b77 | 1494 | void PGMap::encode_digest(const OSDMap& osdmap, |
11fdf7f2 | 1495 | bufferlist& bl, uint64_t features) |
31f18b77 FG |
1496 | { |
1497 | get_rules_avail(osdmap, &avail_space_by_rule); | |
11fdf7f2 TL |
1498 | calc_osd_sum_by_class(osdmap); |
1499 | calc_purged_snaps(); | |
31f18b77 FG |
1500 | PGMapDigest::encode(bl, features); |
1501 | } | |
1502 | ||
1503 | void PGMap::encode(bufferlist &bl, uint64_t features) const | |
1504 | { | |
11fdf7f2 TL |
1505 | ENCODE_START(8, 8, bl); |
1506 | encode(version, bl); | |
1507 | encode(pg_stat, bl); | |
1508 | encode(osd_stat, bl, features); | |
1509 | encode(last_osdmap_epoch, bl); | |
1510 | encode(last_pg_scan, bl); | |
1511 | encode(stamp, bl); | |
1512 | encode(pool_statfs, bl, features); | |
31f18b77 FG |
1513 | ENCODE_FINISH(bl); |
1514 | } | |
1515 | ||
11fdf7f2 | 1516 | void PGMap::decode(bufferlist::const_iterator &bl) |
31f18b77 | 1517 | { |
11fdf7f2 TL |
1518 | DECODE_START(8, bl); |
1519 | decode(version, bl); | |
1520 | decode(pg_stat, bl); | |
1521 | decode(osd_stat, bl); | |
1522 | decode(last_osdmap_epoch, bl); | |
1523 | decode(last_pg_scan, bl); | |
1524 | decode(stamp, bl); | |
1525 | decode(pool_statfs, bl); | |
31f18b77 FG |
1526 | DECODE_FINISH(bl); |
1527 | ||
1528 | calc_stats(); | |
7c673cae FG |
1529 | } |
1530 | ||
9f95a23c | 1531 | void PGMap::dump(ceph::Formatter *f, bool with_net) const |
31f18b77 FG |
1532 | { |
1533 | dump_basic(f); | |
1534 | dump_pg_stats(f, false); | |
1535 | dump_pool_stats(f); | |
9f95a23c | 1536 | dump_osd_stats(f, with_net); |
31f18b77 FG |
1537 | } |
1538 | ||
9f95a23c | 1539 | void PGMap::dump_basic(ceph::Formatter *f) const |
31f18b77 FG |
1540 | { |
1541 | f->dump_unsigned("version", version); | |
1542 | f->dump_stream("stamp") << stamp; | |
1543 | f->dump_unsigned("last_osdmap_epoch", last_osdmap_epoch); | |
1544 | f->dump_unsigned("last_pg_scan", last_pg_scan); | |
31f18b77 FG |
1545 | |
1546 | f->open_object_section("pg_stats_sum"); | |
1547 | pg_sum.dump(f); | |
1548 | f->close_section(); | |
1549 | ||
1550 | f->open_object_section("osd_stats_sum"); | |
1551 | osd_sum.dump(f); | |
1552 | f->close_section(); | |
1553 | ||
31f18b77 FG |
1554 | dump_delta(f); |
1555 | } | |
1556 | ||
9f95a23c | 1557 | void PGMap::dump_delta(ceph::Formatter *f) const |
31f18b77 FG |
1558 | { |
1559 | f->open_object_section("pg_stats_delta"); | |
1560 | pg_sum_delta.dump(f); | |
11fdf7f2 | 1561 | f->dump_stream("stamp_delta") << stamp_delta; |
31f18b77 FG |
1562 | f->close_section(); |
1563 | } | |
1564 | ||
9f95a23c | 1565 | void PGMap::dump_pg_stats(ceph::Formatter *f, bool brief) const |
31f18b77 FG |
1566 | { |
1567 | f->open_array_section("pg_stats"); | |
1568 | for (auto i = pg_stat.begin(); | |
1569 | i != pg_stat.end(); | |
1570 | ++i) { | |
1571 | f->open_object_section("pg_stat"); | |
1572 | f->dump_stream("pgid") << i->first; | |
1573 | if (brief) | |
1574 | i->second.dump_brief(f); | |
1575 | else | |
1576 | i->second.dump(f); | |
1577 | f->close_section(); | |
1578 | } | |
1579 | f->close_section(); | |
1580 | } | |
1581 | ||
9f95a23c | 1582 | void PGMap::dump_pool_stats(ceph::Formatter *f) const |
31f18b77 FG |
1583 | { |
1584 | f->open_array_section("pool_stats"); | |
1585 | for (auto p = pg_pool_sum.begin(); | |
1586 | p != pg_pool_sum.end(); | |
1587 | ++p) { | |
1588 | f->open_object_section("pool_stat"); | |
1589 | f->dump_int("poolid", p->first); | |
1590 | auto q = num_pg_by_pool.find(p->first); | |
1591 | if (q != num_pg_by_pool.end()) | |
1592 | f->dump_unsigned("num_pg", q->second); | |
1593 | p->second.dump(f); | |
1594 | f->close_section(); | |
1595 | } | |
1596 | f->close_section(); | |
1597 | } | |
1598 | ||
9f95a23c | 1599 | void PGMap::dump_osd_stats(ceph::Formatter *f, bool with_net) const |
31f18b77 FG |
1600 | { |
1601 | f->open_array_section("osd_stats"); | |
1602 | for (auto q = osd_stat.begin(); | |
1603 | q != osd_stat.end(); | |
1604 | ++q) { | |
1605 | f->open_object_section("osd_stat"); | |
1606 | f->dump_int("osd", q->first); | |
ded94939 | 1607 | q->second.dump(f, with_net); |
31f18b77 FG |
1608 | f->close_section(); |
1609 | } | |
1610 | f->close_section(); | |
1611 | } | |
1612 | ||
9f95a23c TL |
1613 | void PGMap::dump_osd_ping_times(ceph::Formatter *f) const |
1614 | { | |
1615 | f->open_array_section("osd_ping_times"); | |
1616 | for (auto& [osd, stat] : osd_stat) { | |
1617 | f->open_object_section("osd_ping_time"); | |
1618 | f->dump_int("osd", osd); | |
1619 | stat.dump_ping_time(f); | |
1620 | f->close_section(); | |
1621 | } | |
1622 | f->close_section(); | |
1623 | } | |
1624 | ||
31f18b77 FG |
1625 | void PGMap::dump_pg_stats_plain( |
1626 | ostream& ss, | |
1627 | const mempool::pgmap::unordered_map<pg_t, pg_stat_t>& pg_stats, | |
1628 | bool brief) const | |
1629 | { | |
1630 | TextTable tab; | |
1631 | ||
1632 | if (brief){ | |
1633 | tab.define_column("PG_STAT", TextTable::LEFT, TextTable::LEFT); | |
1634 | tab.define_column("STATE", TextTable::LEFT, TextTable::RIGHT); | |
1635 | tab.define_column("UP", TextTable::LEFT, TextTable::RIGHT); | |
1636 | tab.define_column("UP_PRIMARY", TextTable::LEFT, TextTable::RIGHT); | |
1637 | tab.define_column("ACTING", TextTable::LEFT, TextTable::RIGHT); | |
1638 | tab.define_column("ACTING_PRIMARY", TextTable::LEFT, TextTable::RIGHT); | |
1639 | } | |
1640 | else { | |
1641 | tab.define_column("PG_STAT", TextTable::LEFT, TextTable::LEFT); | |
1642 | tab.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT); | |
1643 | tab.define_column("MISSING_ON_PRIMARY", TextTable::LEFT, TextTable::RIGHT); | |
1644 | tab.define_column("DEGRADED", TextTable::LEFT, TextTable::RIGHT); | |
1645 | tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT); | |
1646 | tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT); | |
1647 | tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT); | |
11fdf7f2 TL |
1648 | tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT); |
1649 | tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT); | |
31f18b77 FG |
1650 | tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT); |
1651 | tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT); | |
1652 | tab.define_column("STATE", TextTable::LEFT, TextTable::RIGHT); | |
1653 | tab.define_column("STATE_STAMP", TextTable::LEFT, TextTable::RIGHT); | |
1654 | tab.define_column("VERSION", TextTable::LEFT, TextTable::RIGHT); | |
1655 | tab.define_column("REPORTED", TextTable::LEFT, TextTable::RIGHT); | |
1656 | tab.define_column("UP", TextTable::LEFT, TextTable::RIGHT); | |
1657 | tab.define_column("UP_PRIMARY", TextTable::LEFT, TextTable::RIGHT); | |
1658 | tab.define_column("ACTING", TextTable::LEFT, TextTable::RIGHT); | |
1659 | tab.define_column("ACTING_PRIMARY", TextTable::LEFT, TextTable::RIGHT); | |
1660 | tab.define_column("LAST_SCRUB", TextTable::LEFT, TextTable::RIGHT); | |
1661 | tab.define_column("SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT); | |
1662 | tab.define_column("LAST_DEEP_SCRUB", TextTable::LEFT, TextTable::RIGHT); | |
1663 | tab.define_column("DEEP_SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT); | |
b32b8144 | 1664 | tab.define_column("SNAPTRIMQ_LEN", TextTable::LEFT, TextTable::RIGHT); |
31f18b77 FG |
1665 | } |
1666 | ||
1667 | for (auto i = pg_stats.begin(); | |
1668 | i != pg_stats.end(); ++i) { | |
1669 | const pg_stat_t &st(i->second); | |
1670 | if (brief) { | |
1671 | tab << i->first | |
1672 | << pg_state_string(st.state) | |
1673 | << st.up | |
1674 | << st.up_primary | |
1675 | << st.acting | |
1676 | << st.acting_primary | |
1677 | << TextTable::endrow; | |
7c673cae | 1678 | } else { |
31f18b77 FG |
1679 | ostringstream reported; |
1680 | reported << st.reported_epoch << ":" << st.reported_seq; | |
1681 | ||
1682 | tab << i->first | |
1683 | << st.stats.sum.num_objects | |
1684 | << st.stats.sum.num_objects_missing_on_primary | |
1685 | << st.stats.sum.num_objects_degraded | |
1686 | << st.stats.sum.num_objects_misplaced | |
1687 | << st.stats.sum.num_objects_unfound | |
1688 | << st.stats.sum.num_bytes | |
11fdf7f2 TL |
1689 | << st.stats.sum.num_omap_bytes |
1690 | << st.stats.sum.num_omap_keys | |
31f18b77 FG |
1691 | << st.log_size |
1692 | << st.ondisk_log_size | |
1693 | << pg_state_string(st.state) | |
1694 | << st.last_change | |
1695 | << st.version | |
1696 | << reported.str() | |
1697 | << pg_vector_string(st.up) | |
1698 | << st.up_primary | |
1699 | << pg_vector_string(st.acting) | |
1700 | << st.acting_primary | |
1701 | << st.last_scrub | |
1702 | << st.last_scrub_stamp | |
1703 | << st.last_deep_scrub | |
1704 | << st.last_deep_scrub_stamp | |
b32b8144 | 1705 | << st.snaptrimq_len |
31f18b77 | 1706 | << TextTable::endrow; |
7c673cae FG |
1707 | } |
1708 | } | |
7c673cae | 1709 | |
31f18b77 FG |
1710 | ss << tab; |
1711 | } | |
1712 | ||
1713 | void PGMap::dump(ostream& ss) const | |
1714 | { | |
1715 | dump_basic(ss); | |
1716 | dump_pg_stats(ss, false); | |
1717 | dump_pool_stats(ss, false); | |
1718 | dump_pg_sum_stats(ss, false); | |
1719 | dump_osd_stats(ss); | |
1720 | } | |
1721 | ||
1722 | void PGMap::dump_basic(ostream& ss) const | |
1723 | { | |
1724 | ss << "version " << version << std::endl; | |
1725 | ss << "stamp " << stamp << std::endl; | |
1726 | ss << "last_osdmap_epoch " << last_osdmap_epoch << std::endl; | |
1727 | ss << "last_pg_scan " << last_pg_scan << std::endl; | |
31f18b77 FG |
1728 | } |
1729 | ||
1730 | void PGMap::dump_pg_stats(ostream& ss, bool brief) const | |
1731 | { | |
1732 | dump_pg_stats_plain(ss, pg_stat, brief); | |
1733 | } | |
1734 | ||
1735 | void PGMap::dump_pool_stats(ostream& ss, bool header) const | |
1736 | { | |
1737 | TextTable tab; | |
1738 | ||
1739 | if (header) { | |
1740 | tab.define_column("POOLID", TextTable::LEFT, TextTable::LEFT); | |
1741 | tab.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT); | |
1742 | tab.define_column("MISSING_ON_PRIMARY", TextTable::LEFT, TextTable::RIGHT); | |
1743 | tab.define_column("DEGRADED", TextTable::LEFT, TextTable::RIGHT); | |
1744 | tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT); | |
1745 | tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT); | |
1746 | tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT); | |
11fdf7f2 TL |
1747 | tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT); |
1748 | tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT); | |
31f18b77 FG |
1749 | tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT); |
1750 | tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT); | |
1751 | } else { | |
1752 | tab.define_column("", TextTable::LEFT, TextTable::LEFT); | |
1753 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1754 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1755 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1756 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1757 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1758 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1759 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1760 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
11fdf7f2 TL |
1761 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); |
1762 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
31f18b77 FG |
1763 | } |
1764 | ||
1765 | for (auto p = pg_pool_sum.begin(); | |
1766 | p != pg_pool_sum.end(); | |
1767 | ++p) { | |
1768 | tab << p->first | |
1769 | << p->second.stats.sum.num_objects | |
1770 | << p->second.stats.sum.num_objects_missing_on_primary | |
1771 | << p->second.stats.sum.num_objects_degraded | |
1772 | << p->second.stats.sum.num_objects_misplaced | |
1773 | << p->second.stats.sum.num_objects_unfound | |
1774 | << p->second.stats.sum.num_bytes | |
11fdf7f2 TL |
1775 | << p->second.stats.sum.num_omap_bytes |
1776 | << p->second.stats.sum.num_omap_keys | |
31f18b77 FG |
1777 | << p->second.log_size |
1778 | << p->second.ondisk_log_size | |
1779 | << TextTable::endrow; | |
1780 | } | |
1781 | ||
1782 | ss << tab; | |
1783 | } | |
1784 | ||
1785 | void PGMap::dump_pg_sum_stats(ostream& ss, bool header) const | |
1786 | { | |
1787 | TextTable tab; | |
1788 | ||
1789 | if (header) { | |
1790 | tab.define_column("PG_STAT", TextTable::LEFT, TextTable::LEFT); | |
1791 | tab.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT); | |
1792 | tab.define_column("MISSING_ON_PRIMARY", TextTable::LEFT, TextTable::RIGHT); | |
1793 | tab.define_column("DEGRADED", TextTable::LEFT, TextTable::RIGHT); | |
1794 | tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT); | |
1795 | tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT); | |
1796 | tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT); | |
11fdf7f2 TL |
1797 | tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT); |
1798 | tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT); | |
31f18b77 FG |
1799 | tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT); |
1800 | tab.define_column("DISK_LOG", TextTable::LEFT, TextTable::RIGHT); | |
1801 | } else { | |
1802 | tab.define_column("", TextTable::LEFT, TextTable::LEFT); | |
1803 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1804 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1805 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1806 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1807 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1808 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1809 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
1810 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
11fdf7f2 TL |
1811 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); |
1812 | tab.define_column("", TextTable::LEFT, TextTable::RIGHT); | |
31f18b77 FG |
1813 | }; |
1814 | ||
1815 | tab << "sum" | |
1816 | << pg_sum.stats.sum.num_objects | |
1817 | << pg_sum.stats.sum.num_objects_missing_on_primary | |
1818 | << pg_sum.stats.sum.num_objects_degraded | |
1819 | << pg_sum.stats.sum.num_objects_misplaced | |
1820 | << pg_sum.stats.sum.num_objects_unfound | |
1821 | << pg_sum.stats.sum.num_bytes | |
11fdf7f2 TL |
1822 | << pg_sum.stats.sum.num_omap_bytes |
1823 | << pg_sum.stats.sum.num_omap_keys | |
31f18b77 FG |
1824 | << pg_sum.log_size |
1825 | << pg_sum.ondisk_log_size | |
1826 | << TextTable::endrow; | |
1827 | ||
1828 | ss << tab; | |
1829 | } | |
1830 | ||
1831 | void PGMap::dump_osd_stats(ostream& ss) const | |
1832 | { | |
1833 | TextTable tab; | |
1834 | ||
1835 | tab.define_column("OSD_STAT", TextTable::LEFT, TextTable::LEFT); | |
1836 | tab.define_column("USED", TextTable::LEFT, TextTable::RIGHT); | |
1837 | tab.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT); | |
11fdf7f2 | 1838 | tab.define_column("USED_RAW", TextTable::LEFT, TextTable::RIGHT); |
31f18b77 FG |
1839 | tab.define_column("TOTAL", TextTable::LEFT, TextTable::RIGHT); |
1840 | tab.define_column("HB_PEERS", TextTable::LEFT, TextTable::RIGHT); | |
1841 | tab.define_column("PG_SUM", TextTable::LEFT, TextTable::RIGHT); | |
1842 | tab.define_column("PRIMARY_PG_SUM", TextTable::LEFT, TextTable::RIGHT); | |
1843 | ||
1844 | for (auto p = osd_stat.begin(); | |
1845 | p != osd_stat.end(); | |
1846 | ++p) { | |
1847 | tab << p->first | |
11fdf7f2 TL |
1848 | << byte_u_t(p->second.statfs.get_used()) |
1849 | << byte_u_t(p->second.statfs.available) | |
1850 | << byte_u_t(p->second.statfs.get_used_raw()) | |
1851 | << byte_u_t(p->second.statfs.total) | |
31f18b77 FG |
1852 | << p->second.hb_peers |
1853 | << get_num_pg_by_osd(p->first) | |
1854 | << get_num_primary_pg_by_osd(p->first) | |
1855 | << TextTable::endrow; | |
1856 | } | |
1857 | ||
1858 | tab << "sum" | |
11fdf7f2 TL |
1859 | << byte_u_t(osd_sum.statfs.get_used()) |
1860 | << byte_u_t(osd_sum.statfs.available) | |
1861 | << byte_u_t(osd_sum.statfs.get_used_raw()) | |
1862 | << byte_u_t(osd_sum.statfs.total) | |
31f18b77 | 1863 | << TextTable::endrow; |
7c673cae | 1864 | |
31f18b77 | 1865 | ss << tab; |
7c673cae FG |
1866 | } |
1867 | ||
31f18b77 | 1868 | void PGMap::dump_osd_sum_stats(ostream& ss) const |
7c673cae | 1869 | { |
31f18b77 | 1870 | TextTable tab; |
7c673cae | 1871 | |
31f18b77 FG |
1872 | tab.define_column("OSD_STAT", TextTable::LEFT, TextTable::LEFT); |
1873 | tab.define_column("USED", TextTable::LEFT, TextTable::RIGHT); | |
1874 | tab.define_column("AVAIL", TextTable::LEFT, TextTable::RIGHT); | |
11fdf7f2 | 1875 | tab.define_column("USED_RAW", TextTable::LEFT, TextTable::RIGHT); |
31f18b77 | 1876 | tab.define_column("TOTAL", TextTable::LEFT, TextTable::RIGHT); |
7c673cae | 1877 | |
31f18b77 | 1878 | tab << "sum" |
11fdf7f2 TL |
1879 | << byte_u_t(osd_sum.statfs.get_used()) |
1880 | << byte_u_t(osd_sum.statfs.available) | |
1881 | << byte_u_t(osd_sum.statfs.get_used_raw()) | |
1882 | << byte_u_t(osd_sum.statfs.total) | |
31f18b77 | 1883 | << TextTable::endrow; |
7c673cae | 1884 | |
31f18b77 | 1885 | ss << tab; |
7c673cae FG |
1886 | } |
1887 | ||
31f18b77 FG |
1888 | void PGMap::get_stuck_stats( |
1889 | int types, const utime_t cutoff, | |
1890 | mempool::pgmap::unordered_map<pg_t, pg_stat_t>& stuck_pgs) const | |
7c673cae | 1891 | { |
11fdf7f2 | 1892 | ceph_assert(types != 0); |
31f18b77 FG |
1893 | for (auto i = pg_stat.begin(); |
1894 | i != pg_stat.end(); | |
1895 | ++i) { | |
1896 | utime_t val = cutoff; // don't care about >= cutoff so that is infinity | |
1897 | ||
1898 | if ((types & STUCK_INACTIVE) && !(i->second.state & PG_STATE_ACTIVE)) { | |
1899 | if (i->second.last_active < val) | |
1900 | val = i->second.last_active; | |
7c673cae | 1901 | } |
31f18b77 FG |
1902 | |
1903 | if ((types & STUCK_UNCLEAN) && !(i->second.state & PG_STATE_CLEAN)) { | |
1904 | if (i->second.last_clean < val) | |
1905 | val = i->second.last_clean; | |
7c673cae | 1906 | } |
31f18b77 FG |
1907 | |
1908 | if ((types & STUCK_DEGRADED) && (i->second.state & PG_STATE_DEGRADED)) { | |
1909 | if (i->second.last_undegraded < val) | |
1910 | val = i->second.last_undegraded; | |
7c673cae | 1911 | } |
7c673cae | 1912 | |
31f18b77 FG |
1913 | if ((types & STUCK_UNDERSIZED) && (i->second.state & PG_STATE_UNDERSIZED)) { |
1914 | if (i->second.last_fullsized < val) | |
1915 | val = i->second.last_fullsized; | |
1916 | } | |
7c673cae | 1917 | |
31f18b77 FG |
1918 | if ((types & STUCK_STALE) && (i->second.state & PG_STATE_STALE)) { |
1919 | if (i->second.last_unstale < val) | |
1920 | val = i->second.last_unstale; | |
1921 | } | |
7c673cae | 1922 | |
31f18b77 FG |
1923 | // val is now the earliest any of the requested stuck states began |
1924 | if (val < cutoff) { | |
1925 | stuck_pgs[i->first] = i->second; | |
1926 | } | |
1927 | } | |
7c673cae FG |
1928 | } |
1929 | ||
31f18b77 | 1930 | bool PGMap::get_stuck_counts(const utime_t cutoff, map<string, int>& note) const |
7c673cae | 1931 | { |
31f18b77 FG |
1932 | int inactive = 0; |
1933 | int unclean = 0; | |
1934 | int degraded = 0; | |
1935 | int undersized = 0; | |
1936 | int stale = 0; | |
7c673cae | 1937 | |
31f18b77 FG |
1938 | for (auto i = pg_stat.begin(); |
1939 | i != pg_stat.end(); | |
1940 | ++i) { | |
1941 | if (! (i->second.state & PG_STATE_ACTIVE)) { | |
1942 | if (i->second.last_active < cutoff) | |
1943 | ++inactive; | |
7c673cae | 1944 | } |
31f18b77 FG |
1945 | if (! (i->second.state & PG_STATE_CLEAN)) { |
1946 | if (i->second.last_clean < cutoff) | |
1947 | ++unclean; | |
7c673cae | 1948 | } |
31f18b77 FG |
1949 | if (i->second.state & PG_STATE_DEGRADED) { |
1950 | if (i->second.last_undegraded < cutoff) | |
1951 | ++degraded; | |
7c673cae | 1952 | } |
31f18b77 FG |
1953 | if (i->second.state & PG_STATE_UNDERSIZED) { |
1954 | if (i->second.last_fullsized < cutoff) | |
1955 | ++undersized; | |
7c673cae | 1956 | } |
31f18b77 FG |
1957 | if (i->second.state & PG_STATE_STALE) { |
1958 | if (i->second.last_unstale < cutoff) | |
1959 | ++stale; | |
7c673cae FG |
1960 | } |
1961 | } | |
31f18b77 FG |
1962 | |
1963 | if (inactive) | |
1964 | note["stuck inactive"] = inactive; | |
1965 | ||
1966 | if (unclean) | |
1967 | note["stuck unclean"] = unclean; | |
1968 | ||
1969 | if (undersized) | |
1970 | note["stuck undersized"] = undersized; | |
1971 | ||
1972 | if (degraded) | |
1973 | note["stuck degraded"] = degraded; | |
1974 | ||
1975 | if (stale) | |
1976 | note["stuck stale"] = stale; | |
1977 | ||
1978 | return inactive || unclean || undersized || degraded || stale; | |
1979 | } | |
1980 | ||
9f95a23c | 1981 | void PGMap::dump_stuck(ceph::Formatter *f, int types, utime_t cutoff) const |
31f18b77 FG |
1982 | { |
1983 | mempool::pgmap::unordered_map<pg_t, pg_stat_t> stuck_pg_stats; | |
1984 | get_stuck_stats(types, cutoff, stuck_pg_stats); | |
1985 | f->open_array_section("stuck_pg_stats"); | |
1986 | for (auto i = stuck_pg_stats.begin(); | |
1987 | i != stuck_pg_stats.end(); | |
1988 | ++i) { | |
1989 | f->open_object_section("pg_stat"); | |
1990 | f->dump_stream("pgid") << i->first; | |
1991 | i->second.dump(f); | |
1992 | f->close_section(); | |
1993 | } | |
1994 | f->close_section(); | |
1995 | } | |
1996 | ||
1997 | void PGMap::dump_stuck_plain(ostream& ss, int types, utime_t cutoff) const | |
1998 | { | |
1999 | mempool::pgmap::unordered_map<pg_t, pg_stat_t> stuck_pg_stats; | |
2000 | get_stuck_stats(types, cutoff, stuck_pg_stats); | |
2001 | if (!stuck_pg_stats.empty()) | |
2002 | dump_pg_stats_plain(ss, stuck_pg_stats, true); | |
2003 | } | |
2004 | ||
2005 | int PGMap::dump_stuck_pg_stats( | |
2006 | stringstream &ds, | |
9f95a23c | 2007 | ceph::Formatter *f, |
31f18b77 FG |
2008 | int threshold, |
2009 | vector<string>& args) const | |
2010 | { | |
2011 | int stuck_types = 0; | |
2012 | ||
2013 | for (auto i = args.begin(); i != args.end(); ++i) { | |
2014 | if (*i == "inactive") | |
2015 | stuck_types |= PGMap::STUCK_INACTIVE; | |
2016 | else if (*i == "unclean") | |
2017 | stuck_types |= PGMap::STUCK_UNCLEAN; | |
2018 | else if (*i == "undersized") | |
2019 | stuck_types |= PGMap::STUCK_UNDERSIZED; | |
2020 | else if (*i == "degraded") | |
2021 | stuck_types |= PGMap::STUCK_DEGRADED; | |
2022 | else if (*i == "stale") | |
2023 | stuck_types |= PGMap::STUCK_STALE; | |
2024 | else { | |
2025 | ds << "Unknown type: " << *i << std::endl; | |
2026 | return -EINVAL; | |
7c673cae FG |
2027 | } |
2028 | } | |
31f18b77 FG |
2029 | |
2030 | utime_t now(ceph_clock_now()); | |
2031 | utime_t cutoff = now - utime_t(threshold, 0); | |
2032 | ||
2033 | if (!f) { | |
2034 | dump_stuck_plain(ds, stuck_types, cutoff); | |
2035 | } else { | |
2036 | dump_stuck(f, stuck_types, cutoff); | |
2037 | f->flush(ds); | |
7c673cae | 2038 | } |
31f18b77 FG |
2039 | |
2040 | return 0; | |
7c673cae FG |
2041 | } |
2042 | ||
9f95a23c | 2043 | void PGMap::dump_osd_perf_stats(ceph::Formatter *f) const |
7c673cae | 2044 | { |
31f18b77 FG |
2045 | f->open_array_section("osd_perf_infos"); |
2046 | for (auto i = osd_stat.begin(); | |
2047 | i != osd_stat.end(); | |
2048 | ++i) { | |
2049 | f->open_object_section("osd"); | |
2050 | f->dump_int("id", i->first); | |
2051 | { | |
2052 | f->open_object_section("perf_stats"); | |
2053 | i->second.os_perf_stat.dump(f); | |
2054 | f->close_section(); | |
2055 | } | |
2056 | f->close_section(); | |
2057 | } | |
2058 | f->close_section(); | |
7c673cae | 2059 | } |
31f18b77 | 2060 | void PGMap::print_osd_perf_stats(std::ostream *ss) const |
7c673cae | 2061 | { |
31f18b77 FG |
2062 | TextTable tab; |
2063 | tab.define_column("osd", TextTable::LEFT, TextTable::RIGHT); | |
2064 | tab.define_column("commit_latency(ms)", TextTable::LEFT, TextTable::RIGHT); | |
2065 | tab.define_column("apply_latency(ms)", TextTable::LEFT, TextTable::RIGHT); | |
2066 | for (auto i = osd_stat.begin(); | |
2067 | i != osd_stat.end(); | |
2068 | ++i) { | |
2069 | tab << i->first; | |
11fdf7f2 TL |
2070 | tab << i->second.os_perf_stat.os_commit_latency_ns / 1000000ull; |
2071 | tab << i->second.os_perf_stat.os_apply_latency_ns / 1000000ull; | |
31f18b77 FG |
2072 | tab << TextTable::endrow; |
2073 | } | |
2074 | (*ss) << tab; | |
2075 | } | |
7c673cae | 2076 | |
9f95a23c | 2077 | void PGMap::dump_osd_blocked_by_stats(ceph::Formatter *f) const |
31f18b77 FG |
2078 | { |
2079 | f->open_array_section("osd_blocked_by_infos"); | |
2080 | for (auto i = blocked_by_sum.begin(); | |
2081 | i != blocked_by_sum.end(); | |
2082 | ++i) { | |
2083 | f->open_object_section("osd"); | |
2084 | f->dump_int("id", i->first); | |
2085 | f->dump_int("num_blocked", i->second); | |
2086 | f->close_section(); | |
2087 | } | |
2088 | f->close_section(); | |
2089 | } | |
2090 | void PGMap::print_osd_blocked_by_stats(std::ostream *ss) const | |
2091 | { | |
2092 | TextTable tab; | |
2093 | tab.define_column("osd", TextTable::LEFT, TextTable::RIGHT); | |
2094 | tab.define_column("num_blocked", TextTable::LEFT, TextTable::RIGHT); | |
2095 | for (auto i = blocked_by_sum.begin(); | |
2096 | i != blocked_by_sum.end(); | |
2097 | ++i) { | |
2098 | tab << i->first; | |
2099 | tab << i->second; | |
2100 | tab << TextTable::endrow; | |
2101 | } | |
2102 | (*ss) << tab; | |
7c673cae FG |
2103 | } |
2104 | ||
31f18b77 | 2105 | |
7c673cae FG |
2106 | /** |
2107 | * update aggregated delta | |
2108 | * | |
2109 | * @param cct ceph context | |
2110 | * @param ts Timestamp for the stats being delta'ed | |
2111 | * @param old_pool_sum Previous stats sum | |
2112 | * @param last_ts Last timestamp for pool | |
2113 | * @param result_pool_sum Resulting stats | |
2114 | * @param result_pool_delta Resulting pool delta | |
2115 | * @param result_ts_delta Resulting timestamp delta | |
2116 | * @param delta_avg_list List of last N computed deltas, used to average | |
2117 | */ | |
31f18b77 FG |
2118 | void PGMap::update_delta( |
2119 | CephContext *cct, | |
2120 | const utime_t ts, | |
2121 | const pool_stat_t& old_pool_sum, | |
2122 | utime_t *last_ts, | |
2123 | const pool_stat_t& current_pool_sum, | |
2124 | pool_stat_t *result_pool_delta, | |
2125 | utime_t *result_ts_delta, | |
2126 | mempool::pgmap::list<pair<pool_stat_t,utime_t> > *delta_avg_list) | |
7c673cae FG |
2127 | { |
2128 | /* @p ts is the timestamp we want to associate with the data | |
2129 | * in @p old_pool_sum, and on which we will base ourselves to | |
2130 | * calculate the delta, stored in 'delta_t'. | |
2131 | */ | |
2132 | utime_t delta_t; | |
2133 | delta_t = ts; // start with the provided timestamp | |
2134 | delta_t -= *last_ts; // take the last timestamp we saw | |
2135 | *last_ts = ts; // @p ts becomes the last timestamp we saw | |
2136 | ||
31f18b77 FG |
2137 | // adjust delta_t, quick start if there is no update in a long period |
2138 | delta_t = std::min(delta_t, | |
2139 | utime_t(2 * (cct ? cct->_conf->mon_delta_reset_interval : 10), 0)); | |
2140 | ||
2141 | // calculate a delta, and average over the last 6 deltas by default. | |
7c673cae FG |
2142 | /* start by taking a copy of our current @p result_pool_sum, and by |
2143 | * taking out the stats from @p old_pool_sum. This generates a stats | |
2144 | * delta. Stash this stats delta in @p delta_avg_list, along with the | |
2145 | * timestamp delta for these results. | |
2146 | */ | |
2147 | pool_stat_t d = current_pool_sum; | |
2148 | d.stats.sub(old_pool_sum.stats); | |
7c673cae FG |
2149 | |
2150 | /* Aggregate current delta, and take out the last seen delta (if any) to | |
2151 | * average it out. | |
b32b8144 | 2152 | * Skip calculating delta while sum was not synchronized. |
7c673cae | 2153 | */ |
b32b8144 FG |
2154 | if(!old_pool_sum.stats.sum.is_zero()) { |
2155 | delta_avg_list->push_back(make_pair(d,delta_t)); | |
2156 | *result_ts_delta += delta_t; | |
2157 | result_pool_delta->stats.add(d.stats); | |
2158 | } | |
11fdf7f2 TL |
2159 | size_t s = cct ? cct->_conf.get_val<uint64_t>("mon_stat_smooth_intervals") : 1; |
2160 | while (delta_avg_list->size() > s) { | |
7c673cae FG |
2161 | result_pool_delta->stats.sub(delta_avg_list->front().first.stats); |
2162 | *result_ts_delta -= delta_avg_list->front().second; | |
2163 | delta_avg_list->pop_front(); | |
2164 | } | |
2165 | } | |
2166 | ||
7c673cae FG |
2167 | /** |
2168 | * Update a given pool's deltas | |
2169 | * | |
2170 | * @param cct Ceph Context | |
2171 | * @param ts Timestamp for the stats being delta'ed | |
2172 | * @param pool Pool's id | |
2173 | * @param old_pool_sum Previous stats sum | |
2174 | */ | |
31f18b77 FG |
2175 | void PGMap::update_one_pool_delta( |
2176 | CephContext *cct, | |
2177 | const utime_t ts, | |
11fdf7f2 | 2178 | const int64_t pool, |
31f18b77 | 2179 | const pool_stat_t& old_pool_sum) |
7c673cae FG |
2180 | { |
2181 | if (per_pool_sum_deltas.count(pool) == 0) { | |
11fdf7f2 TL |
2182 | ceph_assert(per_pool_sum_deltas_stamps.count(pool) == 0); |
2183 | ceph_assert(per_pool_sum_delta.count(pool) == 0); | |
7c673cae FG |
2184 | } |
2185 | ||
31f18b77 | 2186 | auto& sum_delta = per_pool_sum_delta[pool]; |
7c673cae FG |
2187 | |
2188 | update_delta(cct, ts, old_pool_sum, &sum_delta.second, pg_pool_sum[pool], | |
2189 | &sum_delta.first, &per_pool_sum_deltas_stamps[pool], | |
2190 | &per_pool_sum_deltas[pool]); | |
2191 | } | |
2192 | ||
2193 | /** | |
2194 | * Update pools' deltas | |
2195 | * | |
2196 | * @param cct CephContext | |
2197 | * @param ts Timestamp for the stats being delta'ed | |
2198 | * @param pg_pool_sum_old Map of pool stats for delta calcs. | |
2199 | */ | |
31f18b77 FG |
2200 | void PGMap::update_pool_deltas( |
2201 | CephContext *cct, const utime_t ts, | |
11fdf7f2 | 2202 | const mempool::pgmap::unordered_map<int32_t,pool_stat_t>& pg_pool_sum_old) |
7c673cae | 2203 | { |
31f18b77 | 2204 | for (auto it = pg_pool_sum_old.begin(); |
7c673cae FG |
2205 | it != pg_pool_sum_old.end(); ++it) { |
2206 | update_one_pool_delta(cct, ts, it->first, it->second); | |
2207 | } | |
2208 | } | |
2209 | ||
2210 | void PGMap::clear_delta() | |
2211 | { | |
2212 | pg_sum_delta = pool_stat_t(); | |
2213 | pg_sum_deltas.clear(); | |
2214 | stamp_delta = utime_t(); | |
2215 | } | |
2216 | ||
7c673cae FG |
2217 | void PGMap::generate_test_instances(list<PGMap*>& o) |
2218 | { | |
2219 | o.push_back(new PGMap); | |
2220 | list<Incremental*> inc; | |
2221 | Incremental::generate_test_instances(inc); | |
2222 | delete inc.front(); | |
2223 | inc.pop_front(); | |
2224 | while (!inc.empty()) { | |
2225 | PGMap *pmp = new PGMap(); | |
2226 | *pmp = *o.back(); | |
2227 | o.push_back(pmp); | |
2228 | o.back()->apply_incremental(NULL, *inc.front()); | |
2229 | delete inc.front(); | |
2230 | inc.pop_front(); | |
2231 | } | |
2232 | } | |
2233 | ||
11fdf7f2 | 2234 | void PGMap::get_filtered_pg_stats(uint64_t state, int64_t poolid, int64_t osdid, |
7c673cae FG |
2235 | bool primary, set<pg_t>& pgs) const |
2236 | { | |
31f18b77 | 2237 | for (auto i = pg_stat.begin(); |
7c673cae FG |
2238 | i != pg_stat.end(); |
2239 | ++i) { | |
11fdf7f2 | 2240 | if ((poolid >= 0) && (poolid != i->first.pool())) |
7c673cae FG |
2241 | continue; |
2242 | if ((osdid >= 0) && !(i->second.is_acting_osd(osdid,primary))) | |
2243 | continue; | |
11fdf7f2 TL |
2244 | if (state == (uint64_t)-1 || // "all" |
2245 | (i->second.state & state) || // matches a state bit | |
2246 | (state == 0 && i->second.state == 0)) { // matches "unknown" (== 0) | |
2247 | pgs.insert(i->first); | |
2248 | } | |
7c673cae FG |
2249 | } |
2250 | } | |
2251 | ||
9f95a23c | 2252 | void PGMap::dump_filtered_pg_stats(ceph::Formatter *f, set<pg_t>& pgs) const |
7c673cae FG |
2253 | { |
2254 | f->open_array_section("pg_stats"); | |
31f18b77 | 2255 | for (auto i = pgs.begin(); i != pgs.end(); ++i) { |
7c673cae FG |
2256 | const pg_stat_t& st = pg_stat.at(*i); |
2257 | f->open_object_section("pg_stat"); | |
2258 | f->dump_stream("pgid") << *i; | |
2259 | st.dump(f); | |
2260 | f->close_section(); | |
2261 | } | |
2262 | f->close_section(); | |
2263 | } | |
2264 | ||
2265 | void PGMap::dump_filtered_pg_stats(ostream& ss, set<pg_t>& pgs) const | |
2266 | { | |
2267 | TextTable tab; | |
11fdf7f2 | 2268 | utime_t now = ceph_clock_now(); |
7c673cae | 2269 | |
11fdf7f2 | 2270 | tab.define_column("PG", TextTable::LEFT, TextTable::LEFT); |
7c673cae | 2271 | tab.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT); |
7c673cae FG |
2272 | tab.define_column("DEGRADED", TextTable::LEFT, TextTable::RIGHT); |
2273 | tab.define_column("MISPLACED", TextTable::LEFT, TextTable::RIGHT); | |
2274 | tab.define_column("UNFOUND", TextTable::LEFT, TextTable::RIGHT); | |
2275 | tab.define_column("BYTES", TextTable::LEFT, TextTable::RIGHT); | |
11fdf7f2 TL |
2276 | tab.define_column("OMAP_BYTES*", TextTable::LEFT, TextTable::RIGHT); |
2277 | tab.define_column("OMAP_KEYS*", TextTable::LEFT, TextTable::RIGHT); | |
7c673cae | 2278 | tab.define_column("LOG", TextTable::LEFT, TextTable::RIGHT); |
7c673cae | 2279 | tab.define_column("STATE", TextTable::LEFT, TextTable::RIGHT); |
11fdf7f2 | 2280 | tab.define_column("SINCE", TextTable::LEFT, TextTable::RIGHT); |
7c673cae FG |
2281 | tab.define_column("VERSION", TextTable::LEFT, TextTable::RIGHT); |
2282 | tab.define_column("REPORTED", TextTable::LEFT, TextTable::RIGHT); | |
2283 | tab.define_column("UP", TextTable::LEFT, TextTable::RIGHT); | |
7c673cae | 2284 | tab.define_column("ACTING", TextTable::LEFT, TextTable::RIGHT); |
7c673cae | 2285 | tab.define_column("SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT); |
7c673cae FG |
2286 | tab.define_column("DEEP_SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT); |
2287 | ||
31f18b77 | 2288 | for (auto i = pgs.begin(); i != pgs.end(); ++i) { |
7c673cae FG |
2289 | const pg_stat_t& st = pg_stat.at(*i); |
2290 | ||
2291 | ostringstream reported; | |
2292 | reported << st.reported_epoch << ":" << st.reported_seq; | |
2293 | ||
11fdf7f2 | 2294 | ostringstream upstr, actingstr; |
9f95a23c TL |
2295 | upstr << pg_vector_string(st.up) << 'p' << st.up_primary; |
2296 | actingstr << pg_vector_string(st.acting) << 'p' << st.acting_primary; | |
7c673cae FG |
2297 | tab << *i |
2298 | << st.stats.sum.num_objects | |
7c673cae FG |
2299 | << st.stats.sum.num_objects_degraded |
2300 | << st.stats.sum.num_objects_misplaced | |
2301 | << st.stats.sum.num_objects_unfound | |
2302 | << st.stats.sum.num_bytes | |
11fdf7f2 TL |
2303 | << st.stats.sum.num_omap_bytes |
2304 | << st.stats.sum.num_omap_keys | |
7c673cae | 2305 | << st.log_size |
7c673cae | 2306 | << pg_state_string(st.state) |
11fdf7f2 | 2307 | << utimespan_str(now - st.last_change) |
7c673cae FG |
2308 | << st.version |
2309 | << reported.str() | |
11fdf7f2 TL |
2310 | << upstr.str() |
2311 | << actingstr.str() | |
7c673cae | 2312 | << st.last_scrub_stamp |
7c673cae FG |
2313 | << st.last_deep_scrub_stamp |
2314 | << TextTable::endrow; | |
2315 | } | |
2316 | ||
2317 | ss << tab; | |
2318 | } | |
2319 | ||
11fdf7f2 | 2320 | void PGMap::dump_pool_stats_and_io_rate(int64_t poolid, const OSDMap &osd_map, |
9f95a23c | 2321 | ceph::Formatter *f, |
11fdf7f2 TL |
2322 | stringstream *rs) const { |
2323 | string pool_name = osd_map.get_pool_name(poolid); | |
2324 | if (f) { | |
2325 | f->open_object_section("pool"); | |
2326 | f->dump_string("pool_name", pool_name.c_str()); | |
2327 | f->dump_int("pool_id", poolid); | |
2328 | f->open_object_section("recovery"); | |
2329 | } | |
2330 | list<string> sl; | |
2331 | stringstream tss; | |
2332 | pool_recovery_summary(f, &sl, poolid); | |
2333 | if (!f && !sl.empty()) { | |
2334 | for (auto &p : sl) | |
2335 | tss << " " << p << "\n"; | |
2336 | } | |
2337 | if (f) { | |
2338 | f->close_section(); // object section recovery | |
2339 | f->open_object_section("recovery_rate"); | |
2340 | } | |
2341 | ostringstream rss; | |
2342 | pool_recovery_rate_summary(f, &rss, poolid); | |
2343 | if (!f && !rss.str().empty()) | |
2344 | tss << " recovery io " << rss.str() << "\n"; | |
2345 | if (f) { | |
2346 | f->close_section(); // object section recovery_rate | |
2347 | f->open_object_section("client_io_rate"); | |
2348 | } | |
2349 | rss.clear(); | |
2350 | rss.str(""); | |
2351 | pool_client_io_rate_summary(f, &rss, poolid); | |
2352 | if (!f && !rss.str().empty()) | |
2353 | tss << " client io " << rss.str() << "\n"; | |
2354 | // dump cache tier IO rate for cache pool | |
2355 | const pg_pool_t *pool = osd_map.get_pg_pool(poolid); | |
2356 | if (pool->is_tier()) { | |
2357 | if (f) { | |
2358 | f->close_section(); // object section client_io_rate | |
2359 | f->open_object_section("cache_io_rate"); | |
7c673cae | 2360 | } |
11fdf7f2 TL |
2361 | rss.clear(); |
2362 | rss.str(""); | |
2363 | pool_cache_io_rate_summary(f, &rss, poolid); | |
2364 | if (!f && !rss.str().empty()) | |
2365 | tss << " cache tier io " << rss.str() << "\n"; | |
2366 | } | |
2367 | if (f) { | |
2368 | f->close_section(); // object section cache_io_rate | |
2369 | f->close_section(); // object section pool | |
2370 | } else { | |
2371 | *rs << "pool " << pool_name << " id " << poolid << "\n"; | |
2372 | if (!tss.str().empty()) | |
2373 | *rs << tss.str() << "\n"; | |
2374 | else | |
2375 | *rs << " nothing is going on\n\n"; | |
7c673cae | 2376 | } |
7c673cae FG |
2377 | } |
2378 | ||
9f95a23c TL |
2379 | // Get crush parentage for an osd (skip root) |
2380 | set<std::string> PGMap::osd_parentage(const OSDMap& osdmap, int id) const | |
2381 | { | |
2382 | set<std::string> reporters_by_subtree; | |
2383 | auto reporter_subtree_level = g_conf().get_val<string>("mon_osd_reporter_subtree_level"); | |
2384 | ||
2385 | auto loc = osdmap.crush->get_full_location(id); | |
2386 | for (auto& [parent_bucket_type, parent_id] : loc) { | |
2387 | // Should we show the root? Might not be too informative like "default" | |
2388 | if (parent_bucket_type != "root" && | |
2389 | parent_bucket_type != reporter_subtree_level) { | |
2390 | reporters_by_subtree.insert(parent_id); | |
2391 | } | |
2392 | } | |
2393 | return reporters_by_subtree; | |
2394 | } | |
2395 | ||
11fdf7f2 | 2396 | void PGMap::get_health_checks( |
31f18b77 | 2397 | CephContext *cct, |
11fdf7f2 TL |
2398 | const OSDMap& osdmap, |
2399 | health_check_map_t *checks) const | |
7c673cae | 2400 | { |
11fdf7f2 TL |
2401 | utime_t now = ceph_clock_now(); |
2402 | const auto max = cct->_conf.get_val<uint64_t>("mon_health_max_detail"); | |
2403 | const auto& pools = osdmap.get_pools(); | |
224ce89b | 2404 | |
224ce89b WB |
2405 | typedef enum pg_consequence_t { |
2406 | UNAVAILABLE = 1, // Client IO to the pool may block | |
2407 | DEGRADED = 2, // Fewer than the requested number of replicas are present | |
eafe8130 TL |
2408 | BACKFILL_FULL = 3, // Backfill is blocked for space considerations |
2409 | // This may or may not be a deadlock condition. | |
2410 | DAMAGED = 4, // The data may be missing or inconsistent on disk and | |
224ce89b | 2411 | // requires repair |
eafe8130 | 2412 | RECOVERY_FULL = 5 // Recovery is blocked because OSDs are full |
224ce89b WB |
2413 | } pg_consequence_t; |
2414 | ||
2415 | // For a given PG state, how should it be reported at the pool level? | |
2416 | class PgStateResponse { | |
2417 | public: | |
2418 | pg_consequence_t consequence; | |
2419 | typedef std::function< utime_t(const pg_stat_t&) > stuck_cb; | |
2420 | stuck_cb stuck_since; | |
2421 | bool invert; | |
2422 | ||
11fdf7f2 TL |
2423 | PgStateResponse(const pg_consequence_t& c, stuck_cb&& s) |
2424 | : consequence(c), stuck_since(std::move(s)), invert(false) | |
224ce89b WB |
2425 | { |
2426 | } | |
2427 | ||
11fdf7f2 TL |
2428 | PgStateResponse(const pg_consequence_t& c, stuck_cb&& s, bool i) |
2429 | : consequence(c), stuck_since(std::move(s)), invert(i) | |
224ce89b WB |
2430 | { |
2431 | } | |
2432 | }; | |
2433 | ||
2434 | // Record the PG state counts that contributed to a reported pool state | |
2435 | class PgCauses { | |
2436 | public: | |
2437 | // Map of PG_STATE_* to number of pgs in that state. | |
2438 | std::map<unsigned, unsigned> states; | |
2439 | ||
2440 | // List of all PG IDs that had a state contributing | |
2441 | // to this health condition. | |
2442 | std::set<pg_t> pgs; | |
2443 | ||
2444 | std::map<pg_t, std::string> pg_messages; | |
2445 | }; | |
2446 | ||
2447 | // Map of PG state to how to respond to it | |
2448 | std::map<unsigned, PgStateResponse> state_to_response = { | |
2449 | // Immediate reports | |
2450 | { PG_STATE_INCONSISTENT, {DAMAGED, {}} }, | |
c07f9fc5 | 2451 | { PG_STATE_INCOMPLETE, {UNAVAILABLE, {}} }, |
224ce89b | 2452 | { PG_STATE_SNAPTRIM_ERROR, {DAMAGED, {}} }, |
b32b8144 FG |
2453 | { PG_STATE_RECOVERY_UNFOUND, {DAMAGED, {}} }, |
2454 | { PG_STATE_BACKFILL_UNFOUND, {DAMAGED, {}} }, | |
eafe8130 TL |
2455 | { PG_STATE_BACKFILL_TOOFULL, {BACKFILL_FULL, {}} }, |
2456 | { PG_STATE_RECOVERY_TOOFULL, {RECOVERY_FULL, {}} }, | |
224ce89b WB |
2457 | { PG_STATE_DEGRADED, {DEGRADED, {}} }, |
2458 | { PG_STATE_DOWN, {UNAVAILABLE, {}} }, | |
2459 | // Delayed (wait until stuck) reports | |
2460 | { PG_STATE_PEERING, {UNAVAILABLE, [](const pg_stat_t &p){return p.last_peered;} } }, | |
2461 | { PG_STATE_UNDERSIZED, {DEGRADED, [](const pg_stat_t &p){return p.last_fullsized;} } }, | |
2462 | { PG_STATE_STALE, {UNAVAILABLE, [](const pg_stat_t &p){return p.last_unstale;} } }, | |
2463 | // Delayed and inverted reports | |
b32b8144 | 2464 | { PG_STATE_ACTIVE, {UNAVAILABLE, [](const pg_stat_t &p){return p.last_active;}, true} } |
224ce89b WB |
2465 | }; |
2466 | ||
2467 | // Specialized state printer that takes account of inversion of | |
2468 | // ACTIVE, CLEAN checks. | |
11fdf7f2 | 2469 | auto state_name = [](const uint64_t &state) { |
224ce89b WB |
2470 | // Special cases for the states that are inverted checks |
2471 | if (state == PG_STATE_CLEAN) { | |
2472 | return std::string("unclean"); | |
2473 | } else if (state == PG_STATE_ACTIVE) { | |
2474 | return std::string("inactive"); | |
2475 | } else { | |
2476 | return pg_state_string(state); | |
2477 | } | |
2478 | }; | |
2479 | ||
2480 | // Map of what is wrong to information about why, implicitly also stores | |
2481 | // the list of what is wrong. | |
2482 | std::map<pg_consequence_t, PgCauses> detected; | |
2483 | ||
2484 | // Optimisation: trim down the number of checks to apply based on | |
2485 | // the summary counters | |
2486 | std::map<unsigned, PgStateResponse> possible_responses; | |
2487 | for (const auto &i : num_pg_by_state) { | |
2488 | for (const auto &j : state_to_response) { | |
2489 | if (!j.second.invert) { | |
2490 | // Check for normal tests by seeing if any pgs have the flag | |
2491 | if (i.first & j.first) { | |
2492 | possible_responses.insert(j); | |
2493 | } | |
2494 | } | |
2495 | } | |
2496 | } | |
2497 | ||
2498 | for (const auto &j : state_to_response) { | |
2499 | if (j.second.invert) { | |
2500 | // Check for inverted tests by seeing if not-all pgs have the flag | |
2501 | const auto &found = num_pg_by_state.find(j.first); | |
2502 | if (found == num_pg_by_state.end() || found->second != num_pg) { | |
2503 | possible_responses.insert(j); | |
2504 | } | |
2505 | } | |
2506 | } | |
2507 | ||
11fdf7f2 | 2508 | utime_t cutoff = now - utime_t(cct->_conf.get_val<int64_t>("mon_pg_stuck_threshold"), 0); |
224ce89b WB |
2509 | // Loop over all PGs, if there are any possibly-unhealthy states in there |
2510 | if (!possible_responses.empty()) { | |
2511 | for (const auto& i : pg_stat) { | |
2512 | const auto &pg_id = i.first; | |
2513 | const auto &pg_info = i.second; | |
2514 | ||
2515 | for (const auto &j : state_to_response) { | |
2516 | const auto &pg_response_state = j.first; | |
2517 | const auto &pg_response = j.second; | |
2518 | ||
2519 | // Apply the state test | |
2520 | if (!(bool(pg_info.state & pg_response_state) != pg_response.invert)) { | |
2521 | continue; | |
2522 | } | |
2523 | ||
2524 | // Apply stuckness test if needed | |
2525 | if (pg_response.stuck_since) { | |
2526 | // Delayed response, check for stuckness | |
2527 | utime_t last_whatever = pg_response.stuck_since(pg_info); | |
2528 | if (last_whatever >= cutoff) { | |
2529 | // Not stuck enough, ignore. | |
2530 | continue; | |
2531 | } else { | |
2532 | ||
2533 | } | |
2534 | } | |
2535 | ||
2536 | auto &causes = detected[pg_response.consequence]; | |
2537 | causes.states[pg_response_state]++; | |
2538 | causes.pgs.insert(pg_id); | |
2539 | ||
2540 | // Don't bother composing detail string if we have already recorded | |
2541 | // too many | |
2542 | if (causes.pg_messages.size() > max) { | |
2543 | continue; | |
2544 | } | |
2545 | ||
2546 | std::ostringstream ss; | |
2547 | if (pg_response.stuck_since) { | |
2548 | utime_t since = pg_response.stuck_since(pg_info); | |
2549 | ss << "pg " << pg_id << " is stuck " << state_name(pg_response_state); | |
2550 | if (since == utime_t()) { | |
2551 | ss << " since forever"; | |
2552 | } else { | |
2553 | utime_t dur = now - since; | |
9f95a23c | 2554 | ss << " for " << utimespan_str(dur); |
224ce89b WB |
2555 | } |
2556 | ss << ", current state " << pg_state_string(pg_info.state) | |
2557 | << ", last acting " << pg_info.acting; | |
2558 | } else { | |
2559 | ss << "pg " << pg_id << " is " | |
2560 | << pg_state_string(pg_info.state); | |
2561 | ss << ", acting " << pg_info.acting; | |
2562 | if (pg_info.stats.sum.num_objects_unfound) { | |
2563 | ss << ", " << pg_info.stats.sum.num_objects_unfound | |
2564 | << " unfound"; | |
2565 | } | |
2566 | } | |
2567 | ||
2568 | if (pg_info.state & PG_STATE_INCOMPLETE) { | |
2569 | const pg_pool_t *pi = osdmap.get_pg_pool(pg_id.pool()); | |
2570 | if (pi && pi->min_size > 1) { | |
2571 | ss << " (reducing pool " | |
2572 | << osdmap.get_pool_name(pg_id.pool()) | |
2573 | << " min_size from " << (int)pi->min_size | |
2574 | << " may help; search ceph.com/docs for 'incomplete')"; | |
2575 | } | |
2576 | } | |
2577 | ||
2578 | causes.pg_messages[pg_id] = ss.str(); | |
2579 | } | |
2580 | } | |
2581 | } else { | |
2582 | dout(10) << __func__ << " skipping loop over PGs: counters look OK" << dendl; | |
2583 | } | |
2584 | ||
2585 | for (const auto &i : detected) { | |
2586 | std::string health_code; | |
2587 | health_status_t sev; | |
2588 | std::string summary; | |
2589 | switch(i.first) { | |
2590 | case UNAVAILABLE: | |
2591 | health_code = "PG_AVAILABILITY"; | |
2592 | sev = HEALTH_WARN; | |
2593 | summary = "Reduced data availability: "; | |
2594 | break; | |
2595 | case DEGRADED: | |
2596 | health_code = "PG_DEGRADED"; | |
2597 | summary = "Degraded data redundancy: "; | |
2598 | sev = HEALTH_WARN; | |
2599 | break; | |
eafe8130 TL |
2600 | case BACKFILL_FULL: |
2601 | health_code = "PG_BACKFILL_FULL"; | |
2602 | summary = "Low space hindering backfill (add storage if this doesn't resolve itself): "; | |
2603 | sev = HEALTH_WARN; | |
224ce89b WB |
2604 | break; |
2605 | case DAMAGED: | |
2606 | health_code = "PG_DAMAGED"; | |
2607 | summary = "Possible data damage: "; | |
2608 | sev = HEALTH_ERR; | |
2609 | break; | |
eafe8130 TL |
2610 | case RECOVERY_FULL: |
2611 | health_code = "PG_RECOVERY_FULL"; | |
2612 | summary = "Full OSDs blocking recovery: "; | |
2613 | sev = HEALTH_ERR; | |
2614 | break; | |
224ce89b | 2615 | default: |
11fdf7f2 | 2616 | ceph_abort(); |
224ce89b WB |
2617 | } |
2618 | ||
2619 | if (i.first == DEGRADED) { | |
2620 | if (pg_sum.stats.sum.num_objects_degraded && | |
2621 | pg_sum.stats.sum.num_object_copies > 0) { | |
2622 | double pc = (double)pg_sum.stats.sum.num_objects_degraded / | |
2623 | (double)pg_sum.stats.sum.num_object_copies * (double)100.0; | |
2624 | char b[20]; | |
2625 | snprintf(b, sizeof(b), "%.3lf", pc); | |
2626 | ostringstream ss; | |
2627 | ss << pg_sum.stats.sum.num_objects_degraded | |
2628 | << "/" << pg_sum.stats.sum.num_object_copies << " objects degraded (" | |
2629 | << b << "%)"; | |
2630 | ||
2631 | // Throw in a comma for the benefit of the following PG counts | |
2632 | summary += ss.str() + ", "; | |
2633 | } | |
2634 | } | |
2635 | ||
2636 | // Compose summary message saying how many PGs in what states led | |
2637 | // to this health check failing | |
2638 | std::vector<std::string> pg_msgs; | |
9f95a23c | 2639 | int64_t count = 0; |
224ce89b WB |
2640 | for (const auto &j : i.second.states) { |
2641 | std::ostringstream msg; | |
2642 | msg << j.second << (j.second > 1 ? " pgs " : " pg ") << state_name(j.first); | |
2643 | pg_msgs.push_back(msg.str()); | |
9f95a23c | 2644 | count += j.second; |
224ce89b WB |
2645 | } |
2646 | summary += joinify(pg_msgs.begin(), pg_msgs.end(), std::string(", ")); | |
2647 | ||
224ce89b WB |
2648 | health_check_t *check = &checks->add( |
2649 | health_code, | |
2650 | sev, | |
9f95a23c TL |
2651 | summary, |
2652 | count); | |
224ce89b WB |
2653 | |
2654 | // Compose list of PGs contributing to this health check failing | |
2655 | for (const auto &j : i.second.pg_messages) { | |
2656 | check->detail.push_back(j.second); | |
2657 | } | |
2658 | } | |
2659 | ||
224ce89b WB |
2660 | // OSD_SCRUB_ERRORS |
2661 | if (pg_sum.stats.sum.num_scrub_errors) { | |
2662 | ostringstream ss; | |
2663 | ss << pg_sum.stats.sum.num_scrub_errors << " scrub errors"; | |
9f95a23c TL |
2664 | checks->add("OSD_SCRUB_ERRORS", HEALTH_ERR, ss.str(), |
2665 | pg_sum.stats.sum.num_scrub_errors); | |
224ce89b WB |
2666 | } |
2667 | ||
28e407b8 AA |
2668 | // LARGE_OMAP_OBJECTS |
2669 | if (pg_sum.stats.sum.num_large_omap_objects) { | |
2670 | list<string> detail; | |
2671 | for (auto &pool : pools) { | |
2672 | const string& pool_name = osdmap.get_pool_name(pool.first); | |
2673 | auto it2 = pg_pool_sum.find(pool.first); | |
2674 | if (it2 == pg_pool_sum.end()) { | |
2675 | continue; | |
2676 | } | |
2677 | const pool_stat_t *pstat = &it2->second; | |
2678 | if (pstat == nullptr) { | |
2679 | continue; | |
2680 | } | |
2681 | const object_stat_sum_t& sum = pstat->stats.sum; | |
2682 | if (sum.num_large_omap_objects) { | |
2683 | stringstream ss; | |
2684 | ss << sum.num_large_omap_objects << " large objects found in pool " | |
2685 | << "'" << pool_name << "'"; | |
2686 | detail.push_back(ss.str()); | |
2687 | } | |
2688 | } | |
2689 | if (!detail.empty()) { | |
2690 | ostringstream ss; | |
2691 | ss << pg_sum.stats.sum.num_large_omap_objects << " large omap objects"; | |
9f95a23c TL |
2692 | auto& d = checks->add("LARGE_OMAP_OBJECTS", HEALTH_WARN, ss.str(), |
2693 | pg_sum.stats.sum.num_large_omap_objects); | |
28e407b8 AA |
2694 | stringstream tip; |
2695 | tip << "Search the cluster log for 'Large omap object found' for more " | |
2696 | << "details."; | |
2697 | detail.push_back(tip.str()); | |
2698 | d.detail.swap(detail); | |
2699 | } | |
2700 | } | |
2701 | ||
224ce89b WB |
2702 | // CACHE_POOL_NEAR_FULL |
2703 | { | |
2704 | list<string> detail; | |
2705 | unsigned num_pools = 0; | |
2706 | for (auto& p : pools) { | |
2707 | if ((!p.second.target_max_objects && !p.second.target_max_bytes) || | |
2708 | !pg_pool_sum.count(p.first)) { | |
2709 | continue; | |
2710 | } | |
2711 | bool nearfull = false; | |
2712 | const string& name = osdmap.get_pool_name(p.first); | |
2713 | const pool_stat_t& st = get_pg_pool_sum_stat(p.first); | |
2714 | uint64_t ratio = p.second.cache_target_full_ratio_micro + | |
2715 | ((1000000 - p.second.cache_target_full_ratio_micro) * | |
2716 | cct->_conf->mon_cache_target_full_warn_ratio); | |
2717 | if (p.second.target_max_objects && | |
2718 | (uint64_t)(st.stats.sum.num_objects - | |
2719 | st.stats.sum.num_objects_hit_set_archive) > | |
2720 | p.second.target_max_objects * (ratio / 1000000.0)) { | |
2721 | ostringstream ss; | |
2722 | ss << "cache pool '" << name << "' with " | |
1adf2230 | 2723 | << si_u_t(st.stats.sum.num_objects) |
224ce89b | 2724 | << " objects at/near target max " |
1adf2230 | 2725 | << si_u_t(p.second.target_max_objects) << " objects"; |
224ce89b WB |
2726 | detail.push_back(ss.str()); |
2727 | nearfull = true; | |
2728 | } | |
2729 | if (p.second.target_max_bytes && | |
2730 | (uint64_t)(st.stats.sum.num_bytes - | |
2731 | st.stats.sum.num_bytes_hit_set_archive) > | |
2732 | p.second.target_max_bytes * (ratio / 1000000.0)) { | |
2733 | ostringstream ss; | |
2734 | ss << "cache pool '" << name | |
1adf2230 AA |
2735 | << "' with " << byte_u_t(st.stats.sum.num_bytes) |
2736 | << " at/near target max " | |
2737 | << byte_u_t(p.second.target_max_bytes); | |
224ce89b WB |
2738 | detail.push_back(ss.str()); |
2739 | nearfull = true; | |
2740 | } | |
2741 | if (nearfull) { | |
2742 | ++num_pools; | |
2743 | } | |
2744 | } | |
2745 | if (!detail.empty()) { | |
2746 | ostringstream ss; | |
2747 | ss << num_pools << " cache pools at or near target size"; | |
9f95a23c TL |
2748 | auto& d = checks->add("CACHE_POOL_NEAR_FULL", HEALTH_WARN, ss.str(), |
2749 | num_pools); | |
224ce89b WB |
2750 | d.detail.swap(detail); |
2751 | } | |
2752 | } | |
2753 | ||
2754 | // TOO_FEW_PGS | |
3efd9988 FG |
2755 | unsigned num_in = osdmap.get_num_in_osds(); |
2756 | auto sum_pg_up = std::max(static_cast<size_t>(pg_sum.up), pg_stat.size()); | |
2757 | const auto min_pg_per_osd = | |
11fdf7f2 | 2758 | cct->_conf.get_val<uint64_t>("mon_pg_warn_min_per_osd"); |
3efd9988 FG |
2759 | if (num_in && min_pg_per_osd > 0 && osdmap.get_pools().size() > 0) { |
2760 | auto per = sum_pg_up / num_in; | |
2761 | if (per < min_pg_per_osd && per) { | |
224ce89b WB |
2762 | ostringstream ss; |
2763 | ss << "too few PGs per OSD (" << per | |
3efd9988 | 2764 | << " < min " << min_pg_per_osd << ")"; |
9f95a23c TL |
2765 | checks->add("TOO_FEW_PGS", HEALTH_WARN, ss.str(), |
2766 | min_pg_per_osd - per); | |
224ce89b WB |
2767 | } |
2768 | } | |
2769 | ||
2770 | // TOO_MANY_PGS | |
11fdf7f2 | 2771 | auto max_pg_per_osd = cct->_conf.get_val<uint64_t>("mon_max_pg_per_osd"); |
3efd9988 FG |
2772 | if (num_in && max_pg_per_osd > 0) { |
2773 | auto per = sum_pg_up / num_in; | |
2774 | if (per > max_pg_per_osd) { | |
224ce89b WB |
2775 | ostringstream ss; |
2776 | ss << "too many PGs per OSD (" << per | |
3efd9988 | 2777 | << " > max " << max_pg_per_osd << ")"; |
9f95a23c TL |
2778 | checks->add("TOO_MANY_PGS", HEALTH_WARN, ss.str(), |
2779 | per - max_pg_per_osd); | |
224ce89b WB |
2780 | } |
2781 | } | |
2782 | ||
eafe8130 TL |
2783 | // TOO_FEW_OSDS |
2784 | auto warn_too_few_osds = cct->_conf.get_val<bool>("mon_warn_on_too_few_osds"); | |
2785 | auto osd_pool_default_size = cct->_conf.get_val<uint64_t>("osd_pool_default_size"); | |
2786 | if (warn_too_few_osds && osdmap.get_num_osds() < osd_pool_default_size) { | |
2787 | ostringstream ss; | |
2788 | ss << "OSD count " << osdmap.get_num_osds() | |
2789 | << " < osd_pool_default_size " << osd_pool_default_size; | |
9f95a23c TL |
2790 | checks->add("TOO_FEW_OSDS", HEALTH_WARN, ss.str(), |
2791 | osd_pool_default_size - osdmap.get_num_osds()); | |
eafe8130 TL |
2792 | } |
2793 | ||
2794 | // SLOW_PING_TIME | |
2795 | // Convert milliseconds to microseconds | |
2796 | auto warn_slow_ping_time = cct->_conf.get_val<double>("mon_warn_on_slow_ping_time") * 1000; | |
2797 | auto grace = cct->_conf.get_val<int64_t>("osd_heartbeat_grace"); | |
2798 | if (warn_slow_ping_time == 0) { | |
2799 | double ratio = cct->_conf.get_val<double>("mon_warn_on_slow_ping_ratio"); | |
2800 | warn_slow_ping_time = grace; | |
2801 | warn_slow_ping_time *= 1000000 * ratio; // Seconds of grace to microseconds at ratio | |
2802 | } | |
2803 | if (warn_slow_ping_time > 0) { | |
2804 | ||
2805 | struct mon_ping_item_t { | |
2806 | uint32_t pingtime; | |
2807 | int from; | |
2808 | int to; | |
2809 | bool improving; | |
2810 | ||
2811 | bool operator<(const mon_ping_item_t& rhs) const { | |
2812 | if (pingtime < rhs.pingtime) | |
2813 | return true; | |
2814 | if (pingtime > rhs.pingtime) | |
2815 | return false; | |
2816 | if (from < rhs.from) | |
2817 | return true; | |
2818 | if (from > rhs.from) | |
2819 | return false; | |
2820 | return to < rhs.to; | |
2821 | } | |
2822 | }; | |
2823 | ||
2824 | list<string> detail_back; | |
2825 | list<string> detail_front; | |
2826 | set<mon_ping_item_t> back_sorted, front_sorted; | |
2827 | for (auto i : osd_stat) { | |
2828 | for (auto j : i.second.hb_pingtime) { | |
2829 | ||
2830 | // Maybe source info is old | |
2831 | if (now.sec() - j.second.last_update > grace * 60) | |
2832 | continue; | |
2833 | ||
2834 | mon_ping_item_t back; | |
2835 | back.pingtime = std::max(j.second.back_pingtime[0], j.second.back_pingtime[1]); | |
2836 | back.pingtime = std::max(back.pingtime, j.second.back_pingtime[2]); | |
2837 | back.from = i.first; | |
2838 | back.to = j.first; | |
2839 | if (back.pingtime > warn_slow_ping_time) { | |
2840 | back.improving = (j.second.back_pingtime[0] < j.second.back_pingtime[1] | |
2841 | && j.second.back_pingtime[1] < j.second.back_pingtime[2]); | |
2842 | back_sorted.emplace(back); | |
2843 | } | |
2844 | ||
2845 | mon_ping_item_t front; | |
2846 | front.pingtime = std::max(j.second.front_pingtime[0], j.second.front_pingtime[1]); | |
2847 | front.pingtime = std::max(front.pingtime, j.second.front_pingtime[2]); | |
2848 | front.from = i.first; | |
2849 | front.to = j.first; | |
2850 | if (front.pingtime > warn_slow_ping_time) { | |
2851 | front.improving = (j.second.front_pingtime[0] < j.second.front_pingtime[1] | |
2852 | && j.second.front_pingtime[1] < j.second.back_pingtime[2]); | |
2853 | front_sorted.emplace(front); | |
2854 | } | |
2855 | } | |
2856 | } | |
2857 | int max_detail = 10; | |
2858 | for (auto &sback : boost::adaptors::reverse(back_sorted)) { | |
2859 | ostringstream ss; | |
2860 | if (max_detail == 0) { | |
2861 | ss << "Truncated long network list. Use ceph daemon mgr.# dump_osd_network for more information"; | |
2862 | detail_back.push_back(ss.str()); | |
2863 | break; | |
2864 | } | |
2865 | max_detail--; | |
9f95a23c TL |
2866 | ss << "Slow OSD heartbeats on back from osd." << sback.from |
2867 | << " [" << osd_parentage(osdmap, sback.from) << "]" | |
eafe8130 TL |
2868 | << (osdmap.is_down(sback.from) ? " (down)" : "") |
2869 | << " to osd." << sback.to | |
9f95a23c | 2870 | << " [" << osd_parentage(osdmap, sback.to) << "]" |
eafe8130 TL |
2871 | << (osdmap.is_down(sback.to) ? " (down)" : "") |
2872 | << " " << fixed_u_to_string(sback.pingtime, 3) << " msec" | |
2873 | << (sback.improving ? " possibly improving" : ""); | |
2874 | detail_back.push_back(ss.str()); | |
2875 | } | |
2876 | max_detail = 10; | |
2877 | for (auto &sfront : boost::adaptors::reverse(front_sorted)) { | |
2878 | ostringstream ss; | |
2879 | if (max_detail == 0) { | |
2880 | ss << "Truncated long network list. Use ceph daemon mgr.# dump_osd_network for more information"; | |
2881 | detail_front.push_back(ss.str()); | |
2882 | break; | |
2883 | } | |
2884 | max_detail--; | |
9f95a23c TL |
2885 | // Get crush parentage for each osd |
2886 | ss << "Slow OSD heartbeats on front from osd." << sfront.from | |
2887 | << " [" << osd_parentage(osdmap, sfront.from) << "]" | |
eafe8130 TL |
2888 | << (osdmap.is_down(sfront.from) ? " (down)" : "") |
2889 | << " to osd." << sfront.to | |
9f95a23c | 2890 | << " [" << osd_parentage(osdmap, sfront.to) << "]" |
eafe8130 TL |
2891 | << (osdmap.is_down(sfront.to) ? " (down)" : "") |
2892 | << " " << fixed_u_to_string(sfront.pingtime, 3) << " msec" | |
2893 | << (sfront.improving ? " possibly improving" : ""); | |
2894 | detail_front.push_back(ss.str()); | |
2895 | } | |
2896 | if (detail_back.size() != 0) { | |
2897 | ostringstream ss; | |
9f95a23c TL |
2898 | ss << "Slow OSD heartbeats on back (longest " |
2899 | << fixed_u_to_string(back_sorted.rbegin()->pingtime, 3) << "ms)"; | |
2900 | auto& d = checks->add("OSD_SLOW_PING_TIME_BACK", HEALTH_WARN, ss.str(), | |
2901 | back_sorted.size()); | |
eafe8130 TL |
2902 | d.detail.swap(detail_back); |
2903 | } | |
2904 | if (detail_front.size() != 0) { | |
2905 | ostringstream ss; | |
9f95a23c TL |
2906 | ss << "Slow OSD heartbeats on front (longest " |
2907 | << fixed_u_to_string(front_sorted.rbegin()->pingtime, 3) << "ms)"; | |
2908 | auto& d = checks->add("OSD_SLOW_PING_TIME_FRONT", HEALTH_WARN, ss.str(), | |
2909 | front_sorted.size()); | |
eafe8130 TL |
2910 | d.detail.swap(detail_front); |
2911 | } | |
2912 | } | |
2913 | ||
224ce89b WB |
2914 | // SMALLER_PGP_NUM |
2915 | // MANY_OBJECTS_PER_PG | |
2916 | if (!pg_stat.empty()) { | |
2917 | list<string> pgp_detail, many_detail; | |
b32b8144 | 2918 | const auto mon_pg_warn_min_objects = |
11fdf7f2 | 2919 | cct->_conf.get_val<int64_t>("mon_pg_warn_min_objects"); |
b32b8144 | 2920 | const auto mon_pg_warn_min_pool_objects = |
11fdf7f2 | 2921 | cct->_conf.get_val<int64_t>("mon_pg_warn_min_pool_objects"); |
b32b8144 | 2922 | const auto mon_pg_warn_max_object_skew = |
11fdf7f2 | 2923 | cct->_conf.get_val<double>("mon_pg_warn_max_object_skew"); |
224ce89b WB |
2924 | for (auto p = pg_pool_sum.begin(); |
2925 | p != pg_pool_sum.end(); | |
2926 | ++p) { | |
2927 | const pg_pool_t *pi = osdmap.get_pg_pool(p->first); | |
2928 | if (!pi) | |
2929 | continue; // in case osdmap changes haven't propagated to PGMap yet | |
2930 | const string& name = osdmap.get_pool_name(p->first); | |
11fdf7f2 TL |
2931 | // NOTE: we use pg_num_target and pgp_num_target for the purposes of |
2932 | // the warnings. If the cluster is failing to converge on the target | |
2933 | // values that is a separate issue! | |
2934 | if (pi->get_pg_num_target() > pi->get_pgp_num_target() && | |
224ce89b WB |
2935 | !(name.find(".DELETED") != string::npos && |
2936 | cct->_conf->mon_fake_pool_delete)) { | |
2937 | ostringstream ss; | |
2938 | ss << "pool " << name << " pg_num " | |
11fdf7f2 TL |
2939 | << pi->get_pg_num_target() |
2940 | << " > pgp_num " << pi->get_pgp_num_target(); | |
224ce89b WB |
2941 | pgp_detail.push_back(ss.str()); |
2942 | } | |
2943 | int average_objects_per_pg = pg_sum.stats.sum.num_objects / pg_stat.size(); | |
2944 | if (average_objects_per_pg > 0 && | |
b32b8144 FG |
2945 | pg_sum.stats.sum.num_objects >= mon_pg_warn_min_objects && |
2946 | p->second.stats.sum.num_objects >= mon_pg_warn_min_pool_objects) { | |
11fdf7f2 TL |
2947 | int objects_per_pg = p->second.stats.sum.num_objects / |
2948 | pi->get_pg_num_target(); | |
224ce89b | 2949 | float ratio = (float)objects_per_pg / (float)average_objects_per_pg; |
b32b8144 FG |
2950 | if (mon_pg_warn_max_object_skew > 0 && |
2951 | ratio > mon_pg_warn_max_object_skew) { | |
224ce89b WB |
2952 | ostringstream ss; |
2953 | ss << "pool " << name << " objects per pg (" | |
2954 | << objects_per_pg << ") is more than " << ratio | |
2955 | << " times cluster average (" | |
2956 | << average_objects_per_pg << ")"; | |
2957 | many_detail.push_back(ss.str()); | |
2958 | } | |
2959 | } | |
2960 | } | |
2961 | if (!pgp_detail.empty()) { | |
2962 | ostringstream ss; | |
2963 | ss << pgp_detail.size() << " pools have pg_num > pgp_num"; | |
9f95a23c TL |
2964 | auto& d = checks->add("SMALLER_PGP_NUM", HEALTH_WARN, ss.str(), |
2965 | pgp_detail.size()); | |
224ce89b WB |
2966 | d.detail.swap(pgp_detail); |
2967 | } | |
2968 | if (!many_detail.empty()) { | |
2969 | ostringstream ss; | |
2970 | ss << many_detail.size() << " pools have many more objects per pg than" | |
2971 | << " average"; | |
9f95a23c TL |
2972 | auto& d = checks->add("MANY_OBJECTS_PER_PG", HEALTH_WARN, ss.str(), |
2973 | many_detail.size()); | |
224ce89b WB |
2974 | d.detail.swap(many_detail); |
2975 | } | |
2976 | } | |
2977 | ||
2978 | // POOL_FULL | |
2979 | // POOL_NEAR_FULL | |
2980 | { | |
11fdf7f2 TL |
2981 | float warn_threshold = (float)g_conf().get_val<int64_t>("mon_pool_quota_warn_threshold")/100; |
2982 | float crit_threshold = (float)g_conf().get_val<int64_t>("mon_pool_quota_crit_threshold")/100; | |
224ce89b WB |
2983 | list<string> full_detail, nearfull_detail; |
2984 | unsigned full_pools = 0, nearfull_pools = 0; | |
2985 | for (auto it : pools) { | |
2986 | auto it2 = pg_pool_sum.find(it.first); | |
2987 | if (it2 == pg_pool_sum.end()) { | |
2988 | continue; | |
2989 | } | |
2990 | const pool_stat_t *pstat = &it2->second; | |
2991 | const object_stat_sum_t& sum = pstat->stats.sum; | |
2992 | const string& pool_name = osdmap.get_pool_name(it.first); | |
2993 | const pg_pool_t &pool = it.second; | |
2994 | bool full = false, nearfull = false; | |
2995 | if (pool.quota_max_objects > 0) { | |
2996 | stringstream ss; | |
2997 | if ((uint64_t)sum.num_objects >= pool.quota_max_objects) { | |
2998 | } else if (crit_threshold > 0 && | |
2999 | sum.num_objects >= pool.quota_max_objects*crit_threshold) { | |
3000 | ss << "pool '" << pool_name | |
3001 | << "' has " << sum.num_objects << " objects" | |
3002 | << " (max " << pool.quota_max_objects << ")"; | |
3003 | full_detail.push_back(ss.str()); | |
3004 | full = true; | |
3005 | } else if (warn_threshold > 0 && | |
3006 | sum.num_objects >= pool.quota_max_objects*warn_threshold) { | |
3007 | ss << "pool '" << pool_name | |
3008 | << "' has " << sum.num_objects << " objects" | |
3009 | << " (max " << pool.quota_max_objects << ")"; | |
3010 | nearfull_detail.push_back(ss.str()); | |
3011 | nearfull = true; | |
3012 | } | |
3013 | } | |
3014 | if (pool.quota_max_bytes > 0) { | |
3015 | stringstream ss; | |
3016 | if ((uint64_t)sum.num_bytes >= pool.quota_max_bytes) { | |
3017 | } else if (crit_threshold > 0 && | |
3018 | sum.num_bytes >= pool.quota_max_bytes*crit_threshold) { | |
3019 | ss << "pool '" << pool_name | |
1adf2230 AA |
3020 | << "' has " << byte_u_t(sum.num_bytes) |
3021 | << " (max " << byte_u_t(pool.quota_max_bytes) << ")"; | |
224ce89b WB |
3022 | full_detail.push_back(ss.str()); |
3023 | full = true; | |
3024 | } else if (warn_threshold > 0 && | |
3025 | sum.num_bytes >= pool.quota_max_bytes*warn_threshold) { | |
3026 | ss << "pool '" << pool_name | |
1adf2230 AA |
3027 | << "' has " << byte_u_t(sum.num_bytes) |
3028 | << " (max " << byte_u_t(pool.quota_max_bytes) << ")"; | |
224ce89b WB |
3029 | nearfull_detail.push_back(ss.str()); |
3030 | nearfull = true; | |
3031 | } | |
3032 | } | |
3033 | if (full) { | |
3034 | ++full_pools; | |
3035 | } | |
3036 | if (nearfull) { | |
3037 | ++nearfull_pools; | |
3038 | } | |
3039 | } | |
3040 | if (full_pools) { | |
3041 | ostringstream ss; | |
3042 | ss << full_pools << " pools full"; | |
9f95a23c | 3043 | auto& d = checks->add("POOL_FULL", HEALTH_ERR, ss.str(), full_pools); |
224ce89b WB |
3044 | d.detail.swap(full_detail); |
3045 | } | |
3046 | if (nearfull_pools) { | |
3047 | ostringstream ss; | |
11fdf7f2 | 3048 | ss << nearfull_pools << " pools nearfull"; |
9f95a23c | 3049 | auto& d = checks->add("POOL_NEAR_FULL", HEALTH_WARN, ss.str(), nearfull_pools); |
224ce89b WB |
3050 | d.detail.swap(nearfull_detail); |
3051 | } | |
3052 | } | |
3053 | ||
3054 | // OBJECT_MISPLACED | |
3055 | if (pg_sum.stats.sum.num_objects_misplaced && | |
11fdf7f2 TL |
3056 | pg_sum.stats.sum.num_object_copies > 0 && |
3057 | cct->_conf->mon_warn_on_misplaced) { | |
224ce89b WB |
3058 | double pc = (double)pg_sum.stats.sum.num_objects_misplaced / |
3059 | (double)pg_sum.stats.sum.num_object_copies * (double)100.0; | |
3060 | char b[20]; | |
3061 | snprintf(b, sizeof(b), "%.3lf", pc); | |
3062 | ostringstream ss; | |
3063 | ss << pg_sum.stats.sum.num_objects_misplaced | |
3064 | << "/" << pg_sum.stats.sum.num_object_copies << " objects misplaced (" | |
3065 | << b << "%)"; | |
9f95a23c TL |
3066 | checks->add("OBJECT_MISPLACED", HEALTH_WARN, ss.str(), |
3067 | pg_sum.stats.sum.num_objects_misplaced); | |
224ce89b WB |
3068 | } |
3069 | ||
3070 | // OBJECT_UNFOUND | |
3071 | if (pg_sum.stats.sum.num_objects_unfound && | |
3072 | pg_sum.stats.sum.num_objects) { | |
3073 | double pc = (double)pg_sum.stats.sum.num_objects_unfound / | |
3074 | (double)pg_sum.stats.sum.num_objects * (double)100.0; | |
3075 | char b[20]; | |
3076 | snprintf(b, sizeof(b), "%.3lf", pc); | |
3077 | ostringstream ss; | |
3078 | ss << pg_sum.stats.sum.num_objects_unfound | |
b5b8bbf5 | 3079 | << "/" << pg_sum.stats.sum.num_objects << " objects unfound (" << b << "%)"; |
9f95a23c TL |
3080 | auto& d = checks->add("OBJECT_UNFOUND", HEALTH_WARN, ss.str(), |
3081 | pg_sum.stats.sum.num_objects_unfound); | |
c07f9fc5 FG |
3082 | |
3083 | for (auto& p : pg_stat) { | |
3084 | if (p.second.stats.sum.num_objects_unfound) { | |
3085 | ostringstream ss; | |
3086 | ss << "pg " << p.first | |
3087 | << " has " << p.second.stats.sum.num_objects_unfound | |
3088 | << " unfound objects"; | |
3089 | d.detail.push_back(ss.str()); | |
3090 | if (d.detail.size() > max) { | |
3091 | d.detail.push_back("(additional pgs left out for brevity)"); | |
3092 | break; | |
3093 | } | |
3094 | } | |
3095 | } | |
224ce89b WB |
3096 | } |
3097 | ||
3098 | // REQUEST_SLOW | |
3099 | // REQUEST_STUCK | |
11fdf7f2 | 3100 | // SLOW_OPS unifies them in mimic. |
9f95a23c | 3101 | if (osdmap.require_osd_release < ceph_release_t::mimic && |
11fdf7f2 | 3102 | cct->_conf->mon_osd_warn_op_age > 0 && |
c07f9fc5 FG |
3103 | !osd_sum.op_queue_age_hist.h.empty() && |
3104 | osd_sum.op_queue_age_hist.upper_bound() / 1000.0 > | |
224ce89b WB |
3105 | cct->_conf->mon_osd_warn_op_age) { |
3106 | list<string> warn_detail, error_detail; | |
3107 | unsigned warn = 0, error = 0; | |
3108 | float err_age = | |
3109 | cct->_conf->mon_osd_warn_op_age * cct->_conf->mon_osd_err_op_age_ratio; | |
3110 | const pow2_hist_t& h = osd_sum.op_queue_age_hist; | |
3111 | for (unsigned i = h.h.size() - 1; i > 0; --i) { | |
3112 | float ub = (float)(1 << i) / 1000.0; | |
3113 | if (ub < cct->_conf->mon_osd_warn_op_age) | |
3114 | break; | |
3115 | if (h.h[i]) { | |
3116 | ostringstream ss; | |
3117 | ss << h.h[i] << " ops are blocked > " << ub << " sec"; | |
3118 | if (ub > err_age) { | |
3119 | error += h.h[i]; | |
3120 | error_detail.push_back(ss.str()); | |
3121 | } else { | |
3122 | warn += h.h[i]; | |
3123 | warn_detail.push_back(ss.str()); | |
3124 | } | |
3125 | } | |
3126 | } | |
3127 | ||
3128 | map<float,set<int>> warn_osd_by_max; // max -> osds | |
3129 | map<float,set<int>> error_osd_by_max; // max -> osds | |
3130 | if (!warn_detail.empty() || !error_detail.empty()) { | |
3131 | for (auto& p : osd_stat) { | |
3132 | const pow2_hist_t& h = p.second.op_queue_age_hist; | |
3133 | for (unsigned i = h.h.size() - 1; i > 0; --i) { | |
3134 | float ub = (float)(1 << i) / 1000.0; | |
3135 | if (ub < cct->_conf->mon_osd_warn_op_age) | |
3136 | break; | |
3137 | if (h.h[i]) { | |
3138 | if (ub > err_age) { | |
3139 | error_osd_by_max[ub].insert(p.first); | |
3140 | } else { | |
3141 | warn_osd_by_max[ub].insert(p.first); | |
3142 | } | |
3143 | break; | |
3144 | } | |
3145 | } | |
3146 | } | |
3147 | } | |
3148 | ||
3149 | if (!warn_detail.empty()) { | |
11fdf7f2 TL |
3150 | ostringstream ss; |
3151 | ss << warn << " slow requests are blocked > " | |
3152 | << cct->_conf->mon_osd_warn_op_age << " sec"; | |
9f95a23c | 3153 | auto& d = checks->add("REQUEST_SLOW", HEALTH_WARN, ss.str(), warn); |
11fdf7f2 | 3154 | d.detail.swap(warn_detail); |
224ce89b WB |
3155 | int left = max; |
3156 | for (auto& p : warn_osd_by_max) { | |
3157 | ostringstream ss; | |
3158 | if (p.second.size() > 1) { | |
c07f9fc5 FG |
3159 | ss << "osds " << p.second |
3160 | << " have blocked requests > " << p.first << " sec"; | |
224ce89b | 3161 | } else { |
c07f9fc5 FG |
3162 | ss << "osd." << *p.second.begin() |
3163 | << " has blocked requests > " << p.first << " sec"; | |
224ce89b | 3164 | } |
11fdf7f2 | 3165 | d.detail.push_back(ss.str()); |
224ce89b WB |
3166 | if (--left == 0) { |
3167 | break; | |
3168 | } | |
3169 | } | |
3170 | } | |
3171 | if (!error_detail.empty()) { | |
11fdf7f2 TL |
3172 | ostringstream ss; |
3173 | ss << error << " stuck requests are blocked > " | |
3174 | << err_age << " sec"; | |
9f95a23c | 3175 | auto& d = checks->add("REQUEST_STUCK", HEALTH_ERR, ss.str(), error); |
11fdf7f2 | 3176 | d.detail.swap(error_detail); |
224ce89b WB |
3177 | int left = max; |
3178 | for (auto& p : error_osd_by_max) { | |
3179 | ostringstream ss; | |
3180 | if (p.second.size() > 1) { | |
c07f9fc5 FG |
3181 | ss << "osds " << p.second |
3182 | << " have stuck requests > " << p.first << " sec"; | |
224ce89b | 3183 | } else { |
c07f9fc5 FG |
3184 | ss << "osd." << *p.second.begin() |
3185 | << " has stuck requests > " << p.first << " sec"; | |
224ce89b | 3186 | } |
11fdf7f2 | 3187 | d.detail.push_back(ss.str()); |
224ce89b WB |
3188 | if (--left == 0) { |
3189 | break; | |
3190 | } | |
3191 | } | |
3192 | } | |
3193 | } | |
7c673cae | 3194 | |
11fdf7f2 TL |
3195 | // OBJECT_STORE_WARN |
3196 | if (osd_sum.os_alerts.size()) { | |
3197 | map<string, pair<size_t, list<string>>> os_alerts_sum; | |
3198 | ||
3199 | for (auto& a : osd_sum.os_alerts) { | |
3200 | int left = max; | |
3201 | string s0 = " osd."; | |
3202 | s0 += stringify(a.first); | |
3203 | for (auto& aa : a.second) { | |
3204 | string s(s0); | |
3205 | s += " "; | |
3206 | s += aa.second; | |
3207 | auto it = os_alerts_sum.find(aa.first); | |
3208 | if (it == os_alerts_sum.end()) { | |
3209 | list<string> d; | |
3210 | d.emplace_back(s); | |
3211 | os_alerts_sum.emplace(aa.first, std::make_pair(1, d)); | |
3212 | } else { | |
3213 | auto& p = it->second; | |
3214 | ++p.first; | |
3215 | p.second.emplace_back(s); | |
3216 | } | |
3217 | if (--left == 0) { | |
3218 | break; | |
3219 | } | |
3220 | } | |
3221 | } | |
3222 | ||
3223 | for (auto& asum : os_alerts_sum) { | |
9f95a23c | 3224 | string summary = stringify(asum.second.first) + " OSD(s)"; |
11fdf7f2 | 3225 | if (asum.first == "BLUEFS_SPILLOVER") { |
9f95a23c | 3226 | summary += " experiencing BlueFS spillover"; |
11fdf7f2 | 3227 | } else if (asum.first == "BLUESTORE_NO_COMPRESSION") { |
9f95a23c | 3228 | summary += " have broken BlueStore compression"; |
81eedcae | 3229 | } else if (asum.first == "BLUESTORE_LEGACY_STATFS") { |
9f95a23c | 3230 | summary += " reporting legacy (not per-pool) BlueStore stats"; |
81eedcae | 3231 | } else if (asum.first == "BLUESTORE_DISK_SIZE_MISMATCH") { |
9f95a23c TL |
3232 | summary += " have dangerous mismatch between BlueStore block device and free list sizes"; |
3233 | } else if (asum.first == "BLUESTORE_NO_PER_POOL_OMAP") { | |
3234 | summary += " reporting legacy (not per-pool) BlueStore omap usage stats"; | |
11fdf7f2 | 3235 | } |
9f95a23c | 3236 | auto& d = checks->add(asum.first, HEALTH_WARN, summary, asum.second.first); |
11fdf7f2 TL |
3237 | for (auto& s : asum.second.second) { |
3238 | d.detail.push_back(s); | |
3239 | } | |
3240 | } | |
3241 | } | |
224ce89b WB |
3242 | // PG_NOT_SCRUBBED |
3243 | // PG_NOT_DEEP_SCRUBBED | |
11fdf7f2 TL |
3244 | if (cct->_conf->mon_warn_pg_not_scrubbed_ratio || |
3245 | cct->_conf->mon_warn_pg_not_deep_scrubbed_ratio) { | |
a8e16298 TL |
3246 | list<string> detail, deep_detail; |
3247 | int detail_max = max, deep_detail_max = max; | |
3248 | int detail_more = 0, deep_detail_more = 0; | |
3249 | int detail_total = 0, deep_detail_total = 0; | |
3250 | for (auto& p : pg_stat) { | |
3251 | int64_t pnum = p.first.pool(); | |
3252 | auto pool = osdmap.get_pg_pool(pnum); | |
3253 | if (!pool) | |
3254 | continue; | |
11fdf7f2 | 3255 | if (cct->_conf->mon_warn_pg_not_scrubbed_ratio) { |
a8e16298 TL |
3256 | double scrub_max_interval = 0; |
3257 | pool->opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &scrub_max_interval); | |
3258 | if (scrub_max_interval <= 0) { | |
3259 | scrub_max_interval = cct->_conf->osd_scrub_max_interval; | |
c07f9fc5 | 3260 | } |
11fdf7f2 | 3261 | const double age = (cct->_conf->mon_warn_pg_not_scrubbed_ratio * scrub_max_interval) + |
a8e16298 TL |
3262 | scrub_max_interval; |
3263 | utime_t cutoff = now; | |
3264 | cutoff -= age; | |
3265 | if (p.second.last_scrub_stamp < cutoff) { | |
3266 | if (detail_max > 0) { | |
3267 | ostringstream ss; | |
3268 | ss << "pg " << p.first << " not scrubbed since " | |
3269 | << p.second.last_scrub_stamp; | |
3270 | detail.push_back(ss.str()); | |
3271 | --detail_max; | |
3272 | } else { | |
3273 | ++detail_more; | |
3274 | } | |
3275 | ++detail_total; | |
3276 | } | |
3277 | } | |
11fdf7f2 | 3278 | if (cct->_conf->mon_warn_pg_not_deep_scrubbed_ratio) { |
a8e16298 TL |
3279 | double deep_scrub_interval = 0; |
3280 | pool->opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &deep_scrub_interval); | |
3281 | if (deep_scrub_interval <= 0) { | |
3282 | deep_scrub_interval = cct->_conf->osd_deep_scrub_interval; | |
3283 | } | |
11fdf7f2 | 3284 | double deep_age = (cct->_conf->mon_warn_pg_not_deep_scrubbed_ratio * deep_scrub_interval) + |
a8e16298 TL |
3285 | deep_scrub_interval; |
3286 | utime_t deep_cutoff = now; | |
3287 | deep_cutoff -= deep_age; | |
3288 | if (p.second.last_deep_scrub_stamp < deep_cutoff) { | |
3289 | if (deep_detail_max > 0) { | |
3290 | ostringstream ss; | |
3291 | ss << "pg " << p.first << " not deep-scrubbed since " | |
3292 | << p.second.last_deep_scrub_stamp; | |
3293 | deep_detail.push_back(ss.str()); | |
3294 | --deep_detail_max; | |
3295 | } else { | |
3296 | ++deep_detail_more; | |
3297 | } | |
3298 | ++deep_detail_total; | |
c07f9fc5 | 3299 | } |
224ce89b | 3300 | } |
a8e16298 TL |
3301 | } |
3302 | if (detail_total) { | |
3303 | ostringstream ss; | |
3304 | ss << detail_total << " pgs not scrubbed in time"; | |
9f95a23c | 3305 | auto& d = checks->add("PG_NOT_SCRUBBED", HEALTH_WARN, ss.str(), detail_total); |
a8e16298 | 3306 | |
c07f9fc5 | 3307 | if (!detail.empty()) { |
c07f9fc5 | 3308 | d.detail.swap(detail); |
a8e16298 TL |
3309 | |
3310 | if (detail_more) { | |
3311 | ostringstream ss; | |
3312 | ss << detail_more << " more pgs... "; | |
3313 | d.detail.push_back(ss.str()); | |
3314 | } | |
c07f9fc5 | 3315 | } |
a8e16298 TL |
3316 | } |
3317 | if (deep_detail_total) { | |
3318 | ostringstream ss; | |
3319 | ss << deep_detail_total << " pgs not deep-scrubbed in time"; | |
9f95a23c TL |
3320 | auto& d = checks->add("PG_NOT_DEEP_SCRUBBED", HEALTH_WARN, ss.str(), |
3321 | deep_detail_total); | |
a8e16298 | 3322 | |
c07f9fc5 | 3323 | if (!deep_detail.empty()) { |
c07f9fc5 | 3324 | d.detail.swap(deep_detail); |
a8e16298 TL |
3325 | |
3326 | if (deep_detail_more) { | |
3327 | ostringstream ss; | |
3328 | ss << deep_detail_more << " more pgs... "; | |
3329 | d.detail.push_back(ss.str()); | |
3330 | } | |
c07f9fc5 FG |
3331 | } |
3332 | } | |
3333 | } | |
3334 | ||
3335 | // POOL_APP | |
11fdf7f2 | 3336 | if (g_conf().get_val<bool>("mon_warn_on_pool_no_app")) { |
c07f9fc5 FG |
3337 | list<string> detail; |
3338 | for (auto &it : pools) { | |
3339 | const pg_pool_t &pool = it.second; | |
3340 | const string& pool_name = osdmap.get_pool_name(it.first); | |
3341 | auto it2 = pg_pool_sum.find(it.first); | |
3342 | if (it2 == pg_pool_sum.end()) { | |
3343 | continue; | |
3344 | } | |
3345 | const pool_stat_t *pstat = &it2->second; | |
3346 | if (pstat == nullptr) { | |
3347 | continue; | |
3348 | } | |
3349 | const object_stat_sum_t& sum = pstat->stats.sum; | |
3350 | // application metadata is not encoded until luminous is minimum | |
3351 | // required release | |
11fdf7f2 TL |
3352 | if (sum.num_objects > 0 && pool.application_metadata.empty() && |
3353 | !pool.is_tier()) { | |
c07f9fc5 FG |
3354 | stringstream ss; |
3355 | ss << "application not enabled on pool '" << pool_name << "'"; | |
3356 | detail.push_back(ss.str()); | |
224ce89b WB |
3357 | } |
3358 | } | |
3359 | if (!detail.empty()) { | |
3360 | ostringstream ss; | |
9f95a23c TL |
3361 | ss << detail.size() << " pool(s) do not have an application enabled"; |
3362 | auto& d = checks->add("POOL_APP_NOT_ENABLED", HEALTH_WARN, ss.str(), | |
3363 | detail.size()); | |
c07f9fc5 FG |
3364 | stringstream tip; |
3365 | tip << "use 'ceph osd pool application enable <pool-name> " | |
3366 | << "<app-name>', where <app-name> is 'cephfs', 'rbd', 'rgw', " | |
3367 | << "or freeform for custom applications."; | |
3368 | detail.push_back(tip.str()); | |
224ce89b WB |
3369 | d.detail.swap(detail); |
3370 | } | |
31f18b77 | 3371 | } |
b32b8144 FG |
3372 | |
3373 | // PG_SLOW_SNAP_TRIMMING | |
3374 | if (!pg_stat.empty() && cct->_conf->mon_osd_snap_trim_queue_warn_on > 0) { | |
3375 | uint32_t snapthreshold = cct->_conf->mon_osd_snap_trim_queue_warn_on; | |
3376 | uint64_t snaptrimq_exceeded = 0; | |
3377 | uint32_t longest_queue = 0; | |
3378 | const pg_t* longest_q_pg = nullptr; | |
3379 | list<string> detail; | |
3380 | ||
3381 | for (auto& i: pg_stat) { | |
3382 | uint32_t current_len = i.second.snaptrimq_len; | |
3383 | if (current_len >= snapthreshold) { | |
3384 | snaptrimq_exceeded++; | |
3385 | if (longest_queue <= current_len) { | |
3386 | longest_q_pg = &i.first; | |
3387 | longest_queue = current_len; | |
3388 | } | |
3389 | if (detail.size() < max - 1) { | |
3390 | stringstream ss; | |
3391 | ss << "snap trim queue for pg " << i.first << " at " << current_len; | |
3392 | detail.push_back(ss.str()); | |
3393 | continue; | |
3394 | } | |
3395 | if (detail.size() < max) { | |
3396 | detail.push_back("...more pgs affected"); | |
3397 | continue; | |
3398 | } | |
3399 | } | |
3400 | } | |
3401 | ||
3402 | if (snaptrimq_exceeded) { | |
3403 | { | |
3404 | ostringstream ss; | |
3405 | ss << "longest queue on pg " << *longest_q_pg << " at " << longest_queue; | |
3406 | detail.push_back(ss.str()); | |
3407 | } | |
3408 | ||
3409 | stringstream ss; | |
3410 | ss << "snap trim queue for " << snaptrimq_exceeded << " pg(s) >= " << snapthreshold << " (mon_osd_snap_trim_queue_warn_on)"; | |
9f95a23c TL |
3411 | auto& d = checks->add("PG_SLOW_SNAP_TRIMMING", HEALTH_WARN, ss.str(), |
3412 | snaptrimq_exceeded); | |
b32b8144 FG |
3413 | detail.push_back("try decreasing \"osd snap trim sleep\" and/or increasing \"osd pg max concurrent snap trims\"."); |
3414 | d.detail.swap(detail); | |
3415 | } | |
3416 | } | |
31f18b77 | 3417 | } |
7c673cae | 3418 | |
9f95a23c TL |
3419 | void PGMap::print_summary(ceph::Formatter *f, ostream *out) const |
3420 | { | |
3421 | if (f) { | |
3422 | f->open_array_section("pgs_by_pool_state"); | |
3423 | for (auto& i: num_pg_by_pool_state) { | |
3424 | f->open_object_section("per_pool_pgs_by_state"); | |
3425 | f->dump_int("pool_id", i.first); | |
3426 | f->open_array_section("pg_state_counts"); | |
3427 | for (auto& j : i.second) { | |
3428 | f->open_object_section("pg_state_count"); | |
3429 | f->dump_string("state_name", pg_state_string(j.first)); | |
3430 | f->dump_int("count", j.second); | |
3431 | f->close_section(); | |
3432 | } | |
3433 | f->close_section(); | |
3434 | f->close_section(); | |
3435 | } | |
3436 | f->close_section(); | |
3437 | } | |
3438 | PGMapDigest::print_summary(f, out); | |
3439 | } | |
3440 | ||
7c673cae FG |
3441 | int process_pg_map_command( |
3442 | const string& orig_prefix, | |
11fdf7f2 | 3443 | const cmdmap_t& orig_cmdmap, |
7c673cae FG |
3444 | const PGMap& pg_map, |
3445 | const OSDMap& osdmap, | |
9f95a23c | 3446 | ceph::Formatter *f, |
7c673cae FG |
3447 | stringstream *ss, |
3448 | bufferlist *odata) | |
3449 | { | |
3450 | string prefix = orig_prefix; | |
11fdf7f2 TL |
3451 | auto cmdmap = orig_cmdmap; |
3452 | ||
3453 | string omap_stats_note = | |
3454 | "\n* NOTE: Omap statistics are gathered during deep scrub and " | |
9f95a23c | 3455 | "may be inaccurate soon afterwards depending on utilization. See " |
11fdf7f2 TL |
3456 | "http://docs.ceph.com/docs/master/dev/placement-group/#omap-statistics " |
3457 | "for further details.\n"; | |
3458 | bool omap_stats_note_required = false; | |
7c673cae FG |
3459 | |
3460 | // perhaps these would be better in the parsing, but it's weird | |
3461 | bool primary = false; | |
3462 | if (prefix == "pg dump_json") { | |
3463 | vector<string> v; | |
3464 | v.push_back(string("all")); | |
3465 | cmd_putval(g_ceph_context, cmdmap, "format", string("json")); | |
3466 | cmd_putval(g_ceph_context, cmdmap, "dumpcontents", v); | |
3467 | prefix = "pg dump"; | |
3468 | } else if (prefix == "pg dump_pools_json") { | |
3469 | vector<string> v; | |
3470 | v.push_back(string("pools")); | |
3471 | cmd_putval(g_ceph_context, cmdmap, "format", string("json")); | |
3472 | cmd_putval(g_ceph_context, cmdmap, "dumpcontents", v); | |
3473 | prefix = "pg dump"; | |
3474 | } else if (prefix == "pg ls-by-primary") { | |
3475 | primary = true; | |
3476 | prefix = "pg ls"; | |
3477 | } else if (prefix == "pg ls-by-osd") { | |
3478 | prefix = "pg ls"; | |
3479 | } else if (prefix == "pg ls-by-pool") { | |
3480 | prefix = "pg ls"; | |
3481 | string poolstr; | |
9f95a23c | 3482 | cmd_getval(cmdmap, "poolstr", poolstr); |
7c673cae FG |
3483 | int64_t pool = osdmap.lookup_pg_pool_name(poolstr.c_str()); |
3484 | if (pool < 0) { | |
3485 | *ss << "pool " << poolstr << " does not exist"; | |
3486 | return -ENOENT; | |
3487 | } | |
3488 | cmd_putval(g_ceph_context, cmdmap, "pool", pool); | |
3489 | } | |
3490 | ||
7c673cae FG |
3491 | stringstream ds; |
3492 | if (prefix == "pg stat") { | |
3493 | if (f) { | |
3494 | f->open_object_section("pg_summary"); | |
3495 | pg_map.print_oneline_summary(f, NULL); | |
3496 | f->close_section(); | |
3497 | f->flush(ds); | |
3498 | } else { | |
3499 | ds << pg_map; | |
3500 | } | |
3501 | odata->append(ds); | |
3502 | return 0; | |
3503 | } | |
3504 | ||
3505 | if (prefix == "pg getmap") { | |
3506 | pg_map.encode(*odata); | |
3507 | *ss << "got pgmap version " << pg_map.version; | |
3508 | return 0; | |
3509 | } | |
3510 | ||
3511 | if (prefix == "pg dump") { | |
3512 | string val; | |
3513 | vector<string> dumpcontents; | |
3514 | set<string> what; | |
9f95a23c | 3515 | if (cmd_getval(cmdmap, "dumpcontents", dumpcontents)) { |
7c673cae FG |
3516 | copy(dumpcontents.begin(), dumpcontents.end(), |
3517 | inserter(what, what.end())); | |
3518 | } | |
3519 | if (what.empty()) | |
3520 | what.insert("all"); | |
3521 | if (f) { | |
3522 | if (what.count("all")) { | |
3523 | f->open_object_section("pg_map"); | |
3524 | pg_map.dump(f); | |
3525 | f->close_section(); | |
3526 | } else if (what.count("summary") || what.count("sum")) { | |
3527 | f->open_object_section("pg_map"); | |
3528 | pg_map.dump_basic(f); | |
3529 | f->close_section(); | |
3530 | } else { | |
3531 | if (what.count("pools")) { | |
3532 | pg_map.dump_pool_stats(f); | |
3533 | } | |
3534 | if (what.count("osds")) { | |
3535 | pg_map.dump_osd_stats(f); | |
3536 | } | |
3537 | if (what.count("pgs")) { | |
3538 | pg_map.dump_pg_stats(f, false); | |
3539 | } | |
3540 | if (what.count("pgs_brief")) { | |
3541 | pg_map.dump_pg_stats(f, true); | |
3542 | } | |
3543 | if (what.count("delta")) { | |
3544 | f->open_object_section("delta"); | |
3545 | pg_map.dump_delta(f); | |
3546 | f->close_section(); | |
3547 | } | |
3548 | } | |
3549 | f->flush(*odata); | |
3550 | } else { | |
3551 | if (what.count("all")) { | |
3552 | pg_map.dump(ds); | |
11fdf7f2 | 3553 | omap_stats_note_required = true; |
7c673cae FG |
3554 | } else if (what.count("summary") || what.count("sum")) { |
3555 | pg_map.dump_basic(ds); | |
3556 | pg_map.dump_pg_sum_stats(ds, true); | |
3557 | pg_map.dump_osd_sum_stats(ds); | |
11fdf7f2 | 3558 | omap_stats_note_required = true; |
7c673cae FG |
3559 | } else { |
3560 | if (what.count("pgs_brief")) { | |
3561 | pg_map.dump_pg_stats(ds, true); | |
3562 | } | |
3563 | bool header = true; | |
3564 | if (what.count("pgs")) { | |
3565 | pg_map.dump_pg_stats(ds, false); | |
3566 | header = false; | |
11fdf7f2 | 3567 | omap_stats_note_required = true; |
7c673cae FG |
3568 | } |
3569 | if (what.count("pools")) { | |
3570 | pg_map.dump_pool_stats(ds, header); | |
11fdf7f2 | 3571 | omap_stats_note_required = true; |
7c673cae FG |
3572 | } |
3573 | if (what.count("osds")) { | |
3574 | pg_map.dump_osd_stats(ds); | |
3575 | } | |
3576 | } | |
3577 | odata->append(ds); | |
11fdf7f2 TL |
3578 | if (omap_stats_note_required) { |
3579 | odata->append(omap_stats_note); | |
3580 | } | |
7c673cae FG |
3581 | } |
3582 | *ss << "dumped " << what; | |
3583 | return 0; | |
3584 | } | |
3585 | ||
3586 | if (prefix == "pg ls") { | |
3587 | int64_t osd = -1; | |
3588 | int64_t pool = -1; | |
3589 | vector<string>states; | |
3590 | set<pg_t> pgs; | |
9f95a23c TL |
3591 | cmd_getval(cmdmap, "pool", pool); |
3592 | cmd_getval(cmdmap, "osd", osd); | |
3593 | cmd_getval(cmdmap, "states", states); | |
7c673cae FG |
3594 | if (pool >= 0 && !osdmap.have_pg_pool(pool)) { |
3595 | *ss << "pool " << pool << " does not exist"; | |
3596 | return -ENOENT; | |
3597 | } | |
3598 | if (osd >= 0 && !osdmap.is_up(osd)) { | |
3599 | *ss << "osd " << osd << " is not up"; | |
3600 | return -EAGAIN; | |
3601 | } | |
3602 | if (states.empty()) | |
3603 | states.push_back("all"); | |
3604 | ||
11fdf7f2 | 3605 | uint64_t state = 0; |
7c673cae FG |
3606 | |
3607 | while (!states.empty()) { | |
3608 | string state_str = states.back(); | |
3609 | ||
3610 | if (state_str == "all") { | |
3611 | state = -1; | |
3612 | break; | |
3613 | } else { | |
3efd9988 FG |
3614 | auto filter = pg_string_state(state_str); |
3615 | if (!filter) { | |
c07f9fc5 FG |
3616 | *ss << "'" << state_str << "' is not a valid pg state," |
3617 | << " available choices: " << pg_state_string(0xFFFFFFFF); | |
3618 | return -EINVAL; | |
3619 | } | |
3efd9988 | 3620 | state |= *filter; |
7c673cae FG |
3621 | } |
3622 | ||
3623 | states.pop_back(); | |
3624 | } | |
3625 | ||
3626 | pg_map.get_filtered_pg_stats(state, pool, osd, primary, pgs); | |
3627 | ||
3628 | if (f && !pgs.empty()) { | |
3629 | pg_map.dump_filtered_pg_stats(f, pgs); | |
3630 | f->flush(*odata); | |
3631 | } else if (!pgs.empty()) { | |
3632 | pg_map.dump_filtered_pg_stats(ds, pgs); | |
3633 | odata->append(ds); | |
11fdf7f2 | 3634 | odata->append(omap_stats_note); |
7c673cae FG |
3635 | } |
3636 | return 0; | |
3637 | } | |
3638 | ||
3639 | if (prefix == "pg dump_stuck") { | |
3640 | vector<string> stuckop_vec; | |
9f95a23c | 3641 | cmd_getval(cmdmap, "stuckops", stuckop_vec); |
7c673cae FG |
3642 | if (stuckop_vec.empty()) |
3643 | stuckop_vec.push_back("unclean"); | |
3644 | int64_t threshold; | |
9f95a23c | 3645 | cmd_getval(cmdmap, "threshold", threshold, |
11fdf7f2 | 3646 | g_conf().get_val<int64_t>("mon_pg_stuck_threshold")); |
7c673cae | 3647 | |
11fdf7f2 | 3648 | if (pg_map.dump_stuck_pg_stats(ds, f, (int)threshold, stuckop_vec) < 0) { |
7c673cae | 3649 | *ss << "failed"; |
11fdf7f2 | 3650 | } else { |
7c673cae | 3651 | *ss << "ok"; |
11fdf7f2 TL |
3652 | } |
3653 | odata->append(ds); | |
7c673cae FG |
3654 | return 0; |
3655 | } | |
3656 | ||
3657 | if (prefix == "pg debug") { | |
3658 | string debugop; | |
9f95a23c | 3659 | cmd_getval(cmdmap, "debugop", debugop, |
7c673cae FG |
3660 | string("unfound_objects_exist")); |
3661 | if (debugop == "unfound_objects_exist") { | |
3662 | bool unfound_objects_exist = false; | |
3663 | for (const auto& p : pg_map.pg_stat) { | |
3664 | if (p.second.stats.sum.num_objects_unfound > 0) { | |
3665 | unfound_objects_exist = true; | |
3666 | break; | |
3667 | } | |
3668 | } | |
3669 | if (unfound_objects_exist) | |
3670 | ds << "TRUE"; | |
3671 | else | |
3672 | ds << "FALSE"; | |
3673 | odata->append(ds); | |
3674 | return 0; | |
3675 | } | |
3676 | if (debugop == "degraded_pgs_exist") { | |
3677 | bool degraded_pgs_exist = false; | |
3678 | for (const auto& p : pg_map.pg_stat) { | |
3679 | if (p.second.stats.sum.num_objects_degraded > 0) { | |
3680 | degraded_pgs_exist = true; | |
3681 | break; | |
3682 | } | |
3683 | } | |
3684 | if (degraded_pgs_exist) | |
3685 | ds << "TRUE"; | |
3686 | else | |
3687 | ds << "FALSE"; | |
3688 | odata->append(ds); | |
3689 | return 0; | |
3690 | } | |
3691 | } | |
3692 | ||
3693 | if (prefix == "osd perf") { | |
3694 | if (f) { | |
3695 | f->open_object_section("osdstats"); | |
3696 | pg_map.dump_osd_perf_stats(f); | |
3697 | f->close_section(); | |
3698 | f->flush(ds); | |
3699 | } else { | |
3700 | pg_map.print_osd_perf_stats(&ds); | |
3701 | } | |
3702 | odata->append(ds); | |
3703 | return 0; | |
3704 | } | |
3705 | ||
3706 | if (prefix == "osd blocked-by") { | |
3707 | if (f) { | |
3708 | f->open_object_section("osd_blocked_by"); | |
3709 | pg_map.dump_osd_blocked_by_stats(f); | |
3710 | f->close_section(); | |
3711 | f->flush(ds); | |
3712 | } else { | |
3713 | pg_map.print_osd_blocked_by_stats(&ds); | |
3714 | } | |
3715 | odata->append(ds); | |
3716 | return 0; | |
3717 | } | |
3718 | ||
7c673cae FG |
3719 | return -EOPNOTSUPP; |
3720 | } | |
3721 | ||
31f18b77 FG |
3722 | void PGMapUpdater::check_osd_map( |
3723 | CephContext *cct, | |
3724 | const OSDMap& osdmap, | |
3725 | const PGMap& pgmap, | |
3726 | PGMap::Incremental *pending_inc) | |
3727 | { | |
3728 | for (auto& p : pgmap.osd_stat) { | |
3729 | if (!osdmap.exists(p.first)) { | |
3730 | // remove osd_stat | |
3731 | pending_inc->rm_stat(p.first); | |
3732 | } else if (osdmap.is_out(p.first)) { | |
3733 | // zero osd_stat | |
11fdf7f2 TL |
3734 | if (p.second.statfs.total != 0) { |
3735 | pending_inc->stat_osd_out(p.first); | |
31f18b77 FG |
3736 | } |
3737 | } else if (!osdmap.is_up(p.first)) { | |
3738 | // zero the op_queue_age_hist | |
3739 | if (!p.second.op_queue_age_hist.empty()) { | |
11fdf7f2 | 3740 | pending_inc->stat_osd_down_up(p.first, pgmap); |
31f18b77 FG |
3741 | } |
3742 | } | |
3743 | } | |
3744 | ||
3745 | // deleted pgs (pools)? | |
3746 | for (auto& p : pgmap.pg_pool_sum) { | |
3747 | if (!osdmap.have_pg_pool(p.first)) { | |
3748 | ldout(cct, 10) << __func__ << " pool " << p.first << " gone, removing pgs" | |
3749 | << dendl; | |
3750 | for (auto& q : pgmap.pg_stat) { | |
11fdf7f2 | 3751 | if (q.first.pool() == p.first) { |
31f18b77 FG |
3752 | pending_inc->pg_remove.insert(q.first); |
3753 | } | |
3754 | } | |
3755 | auto q = pending_inc->pg_stat_updates.begin(); | |
3756 | while (q != pending_inc->pg_stat_updates.end()) { | |
11fdf7f2 | 3757 | if (q->first.pool() == p.first) { |
31f18b77 FG |
3758 | q = pending_inc->pg_stat_updates.erase(q); |
3759 | } else { | |
3760 | ++q; | |
3761 | } | |
3762 | } | |
3763 | } | |
3764 | } | |
3765 | ||
11fdf7f2 TL |
3766 | // new (split or new pool) or merged pgs? |
3767 | map<int64_t,unsigned> new_pg_num; | |
31f18b77 FG |
3768 | for (auto& p : osdmap.get_pools()) { |
3769 | int64_t poolid = p.first; | |
3770 | const pg_pool_t& pi = p.second; | |
3771 | auto q = pgmap.num_pg_by_pool.find(poolid); | |
3772 | unsigned my_pg_num = 0; | |
3773 | if (q != pgmap.num_pg_by_pool.end()) | |
3774 | my_pg_num = q->second; | |
3775 | unsigned pg_num = pi.get_pg_num(); | |
11fdf7f2 TL |
3776 | new_pg_num[poolid] = pg_num; |
3777 | if (my_pg_num < pg_num) { | |
224ce89b | 3778 | ldout(cct,10) << __func__ << " pool " << poolid << " pg_num " << pg_num |
11fdf7f2 | 3779 | << " > my pg_num " << my_pg_num << dendl; |
31f18b77 FG |
3780 | for (unsigned ps = my_pg_num; ps < pg_num; ++ps) { |
3781 | pg_t pgid(ps, poolid); | |
3782 | if (pending_inc->pg_stat_updates.count(pgid) == 0) { | |
224ce89b | 3783 | ldout(cct,20) << __func__ << " adding " << pgid << dendl; |
31f18b77 FG |
3784 | pg_stat_t &stats = pending_inc->pg_stat_updates[pgid]; |
3785 | stats.last_fresh = osdmap.get_modified(); | |
3786 | stats.last_active = osdmap.get_modified(); | |
3787 | stats.last_change = osdmap.get_modified(); | |
3788 | stats.last_peered = osdmap.get_modified(); | |
3789 | stats.last_clean = osdmap.get_modified(); | |
3790 | stats.last_unstale = osdmap.get_modified(); | |
3791 | stats.last_undegraded = osdmap.get_modified(); | |
3792 | stats.last_fullsized = osdmap.get_modified(); | |
3793 | stats.last_scrub_stamp = osdmap.get_modified(); | |
3794 | stats.last_deep_scrub_stamp = osdmap.get_modified(); | |
3795 | stats.last_clean_scrub_stamp = osdmap.get_modified(); | |
3796 | } | |
3797 | } | |
11fdf7f2 TL |
3798 | } else if (my_pg_num > pg_num) { |
3799 | ldout(cct,10) << __func__ << " pool " << poolid << " pg_num " << pg_num | |
3800 | << " < my pg_num " << my_pg_num << dendl; | |
3801 | for (unsigned i = pg_num; i < my_pg_num; ++i) { | |
3802 | pg_t pgid(i, poolid); | |
3803 | ldout(cct,20) << __func__ << " removing merged " << pgid << dendl; | |
3804 | if (pgmap.pg_stat.count(pgid)) { | |
3805 | pending_inc->pg_remove.insert(pgid); | |
3806 | } | |
3807 | pending_inc->pg_stat_updates.erase(pgid); | |
7c673cae | 3808 | } |
7c673cae FG |
3809 | } |
3810 | } | |
11fdf7f2 TL |
3811 | auto i = pending_inc->pg_stat_updates.begin(); |
3812 | while (i != pending_inc->pg_stat_updates.end()) { | |
3813 | auto j = new_pg_num.find(i->first.pool()); | |
3814 | if (j == new_pg_num.end() || | |
3815 | i->first.ps() >= j->second) { | |
3816 | ldout(cct,20) << __func__ << " removing pending update to old " | |
3817 | << i->first << dendl; | |
3818 | i = pending_inc->pg_stat_updates.erase(i); | |
3819 | } else { | |
3820 | ++i; | |
7c673cae FG |
3821 | } |
3822 | } | |
7c673cae FG |
3823 | } |
3824 | ||
3825 | static void _try_mark_pg_stale( | |
3826 | const OSDMap& osdmap, | |
3827 | pg_t pgid, | |
3828 | const pg_stat_t& cur, | |
3829 | PGMap::Incremental *pending_inc) | |
3830 | { | |
3831 | if ((cur.state & PG_STATE_STALE) == 0 && | |
3832 | cur.acting_primary != -1 && | |
3833 | osdmap.is_down(cur.acting_primary)) { | |
3834 | pg_stat_t *newstat; | |
3835 | auto q = pending_inc->pg_stat_updates.find(pgid); | |
3836 | if (q != pending_inc->pg_stat_updates.end()) { | |
3837 | if ((q->second.acting_primary == cur.acting_primary) || | |
3838 | ((q->second.state & PG_STATE_STALE) == 0 && | |
3839 | q->second.acting_primary != -1 && | |
3840 | osdmap.is_down(q->second.acting_primary))) { | |
3841 | newstat = &q->second; | |
3842 | } else { | |
3843 | // pending update is no longer down or already stale | |
3844 | return; | |
3845 | } | |
3846 | } else { | |
3847 | newstat = &pending_inc->pg_stat_updates[pgid]; | |
3848 | *newstat = cur; | |
3849 | } | |
3850 | dout(10) << __func__ << " marking pg " << pgid | |
3851 | << " stale (acting_primary " << newstat->acting_primary | |
3852 | << ")" << dendl; | |
3853 | newstat->state |= PG_STATE_STALE; | |
3854 | newstat->last_unstale = ceph_clock_now(); | |
3855 | } | |
3856 | } | |
3857 | ||
3858 | void PGMapUpdater::check_down_pgs( | |
3859 | const OSDMap &osdmap, | |
3860 | const PGMap &pg_map, | |
3861 | bool check_all, | |
3862 | const set<int>& need_check_down_pg_osds, | |
3863 | PGMap::Incremental *pending_inc) | |
3864 | { | |
3865 | // if a large number of osds changed state, just iterate over the whole | |
3866 | // pg map. | |
3867 | if (need_check_down_pg_osds.size() > (unsigned)osdmap.get_num_osds() * | |
11fdf7f2 | 3868 | g_conf().get_val<double>("mon_pg_check_down_all_threshold")) { |
7c673cae FG |
3869 | check_all = true; |
3870 | } | |
3871 | ||
3872 | if (check_all) { | |
3873 | for (const auto& p : pg_map.pg_stat) { | |
3874 | _try_mark_pg_stale(osdmap, p.first, p.second, pending_inc); | |
3875 | } | |
3876 | } else { | |
3877 | for (auto osd : need_check_down_pg_osds) { | |
3878 | if (osdmap.is_down(osd)) { | |
3879 | auto p = pg_map.pg_by_osd.find(osd); | |
3880 | if (p == pg_map.pg_by_osd.end()) { | |
3881 | continue; | |
3882 | } | |
3883 | for (auto pgid : p->second) { | |
3884 | const pg_stat_t &stat = pg_map.pg_stat.at(pgid); | |
11fdf7f2 | 3885 | ceph_assert(stat.acting_primary == osd); |
7c673cae FG |
3886 | _try_mark_pg_stale(osdmap, pgid, stat, pending_inc); |
3887 | } | |
3888 | } | |
3889 | } | |
3890 | } | |
3891 | } | |
3892 | ||
3893 | int reweight::by_utilization( | |
3894 | const OSDMap &osdmap, | |
3895 | const PGMap &pgm, | |
3896 | int oload, | |
3897 | double max_changef, | |
3898 | int max_osds, | |
3899 | bool by_pg, const set<int64_t> *pools, | |
3900 | bool no_increasing, | |
3901 | mempool::osdmap::map<int32_t, uint32_t>* new_weights, | |
3902 | std::stringstream *ss, | |
3903 | std::string *out_str, | |
9f95a23c | 3904 | ceph::Formatter *f) |
7c673cae FG |
3905 | { |
3906 | if (oload <= 100) { | |
3907 | *ss << "You must give a percentage higher than 100. " | |
3908 | "The reweighting threshold will be calculated as <average-utilization> " | |
3909 | "times <input-percentage>. For example, an argument of 200 would " | |
3910 | "reweight OSDs which are twice as utilized as the average OSD.\n"; | |
3911 | return -EINVAL; | |
3912 | } | |
3913 | ||
3914 | vector<int> pgs_by_osd(osdmap.get_max_osd()); | |
3915 | ||
3916 | // Avoid putting a small number (or 0) in the denominator when calculating | |
3917 | // average_util | |
3918 | double average_util; | |
3919 | if (by_pg) { | |
3920 | // by pg mapping | |
3921 | double weight_sum = 0.0; // sum up the crush weights | |
3922 | unsigned num_pg_copies = 0; | |
3923 | int num_osds = 0; | |
3924 | for (const auto& pg : pgm.pg_stat) { | |
3925 | if (pools && pools->count(pg.first.pool()) == 0) | |
3926 | continue; | |
3927 | for (const auto acting : pg.second.acting) { | |
b5b8bbf5 FG |
3928 | if (!osdmap.exists(acting)) { |
3929 | continue; | |
3930 | } | |
7c673cae FG |
3931 | if (acting >= (int)pgs_by_osd.size()) |
3932 | pgs_by_osd.resize(acting); | |
3933 | if (pgs_by_osd[acting] == 0) { | |
3934 | if (osdmap.crush->get_item_weightf(acting) <= 0) { | |
3935 | //skip if we currently can not identify item | |
3936 | continue; | |
3937 | } | |
3938 | weight_sum += osdmap.crush->get_item_weightf(acting); | |
3939 | ++num_osds; | |
3940 | } | |
3941 | ++pgs_by_osd[acting]; | |
3942 | ++num_pg_copies; | |
3943 | } | |
3944 | } | |
3945 | ||
11fdf7f2 | 3946 | if (!num_osds || (num_pg_copies / num_osds < g_conf()->mon_reweight_min_pgs_per_osd)) { |
7c673cae FG |
3947 | *ss << "Refusing to reweight: we only have " << num_pg_copies |
3948 | << " PGs across " << num_osds << " osds!\n"; | |
3949 | return -EDOM; | |
3950 | } | |
3951 | ||
3952 | average_util = (double)num_pg_copies / weight_sum; | |
3953 | } else { | |
3954 | // by osd utilization | |
11fdf7f2 TL |
3955 | int num_osd = std::max<size_t>(1, pgm.osd_stat.size()); |
3956 | if ((uint64_t)pgm.osd_sum.statfs.total / num_osd | |
3957 | < g_conf()->mon_reweight_min_bytes_per_osd) { | |
3958 | *ss << "Refusing to reweight: we only have " << pgm.osd_sum.statfs.kb() | |
7c673cae FG |
3959 | << " kb across all osds!\n"; |
3960 | return -EDOM; | |
3961 | } | |
11fdf7f2 TL |
3962 | if ((uint64_t)pgm.osd_sum.statfs.get_used_raw() / num_osd |
3963 | < g_conf()->mon_reweight_min_bytes_per_osd) { | |
3964 | *ss << "Refusing to reweight: we only have " | |
3965 | << pgm.osd_sum.statfs.kb_used_raw() | |
7c673cae FG |
3966 | << " kb used across all osds!\n"; |
3967 | return -EDOM; | |
3968 | } | |
3969 | ||
11fdf7f2 TL |
3970 | average_util = (double)pgm.osd_sum.statfs.get_used_raw() / |
3971 | (double)pgm.osd_sum.statfs.total; | |
7c673cae FG |
3972 | } |
3973 | ||
3974 | // adjust down only if we are above the threshold | |
3975 | const double overload_util = average_util * (double)oload / 100.0; | |
3976 | ||
3977 | // but aggressively adjust weights up whenever possible. | |
3978 | const double underload_util = average_util; | |
3979 | ||
3980 | const unsigned max_change = (unsigned)(max_changef * (double)0x10000); | |
3981 | ||
3982 | ostringstream oss; | |
3983 | if (f) { | |
3984 | f->open_object_section("reweight_by_utilization"); | |
3985 | f->dump_int("overload_min", oload); | |
3986 | f->dump_float("max_change", max_changef); | |
3987 | f->dump_int("max_change_osds", max_osds); | |
3988 | f->dump_float("average_utilization", average_util); | |
3989 | f->dump_float("overload_utilization", overload_util); | |
3990 | } else { | |
3991 | oss << "oload " << oload << "\n"; | |
3992 | oss << "max_change " << max_changef << "\n"; | |
3993 | oss << "max_change_osds " << max_osds << "\n"; | |
3994 | oss.precision(4); | |
3995 | oss << "average_utilization " << std::fixed << average_util << "\n"; | |
3996 | oss << "overload_utilization " << overload_util << "\n"; | |
3997 | } | |
3998 | int num_changed = 0; | |
3999 | ||
4000 | // precompute util for each OSD | |
4001 | std::vector<std::pair<int, float> > util_by_osd; | |
4002 | for (const auto& p : pgm.osd_stat) { | |
4003 | std::pair<int, float> osd_util; | |
4004 | osd_util.first = p.first; | |
4005 | if (by_pg) { | |
4006 | if (p.first >= (int)pgs_by_osd.size() || | |
4007 | pgs_by_osd[p.first] == 0) { | |
4008 | // skip if this OSD does not contain any pg | |
4009 | // belonging to the specified pool(s). | |
4010 | continue; | |
4011 | } | |
4012 | ||
4013 | if (osdmap.crush->get_item_weightf(p.first) <= 0) { | |
4014 | // skip if we are unable to locate item. | |
4015 | continue; | |
4016 | } | |
4017 | ||
11fdf7f2 TL |
4018 | osd_util.second = |
4019 | pgs_by_osd[p.first] / osdmap.crush->get_item_weightf(p.first); | |
7c673cae | 4020 | } else { |
11fdf7f2 TL |
4021 | osd_util.second = |
4022 | (double)p.second.statfs.get_used_raw() / (double)p.second.statfs.total; | |
7c673cae FG |
4023 | } |
4024 | util_by_osd.push_back(osd_util); | |
4025 | } | |
4026 | ||
4027 | // sort by absolute deviation from the mean utilization, | |
4028 | // in descending order. | |
4029 | std::sort(util_by_osd.begin(), util_by_osd.end(), | |
4030 | [average_util](std::pair<int, float> l, std::pair<int, float> r) { | |
4031 | return abs(l.second - average_util) > abs(r.second - average_util); | |
4032 | } | |
4033 | ); | |
4034 | ||
4035 | if (f) | |
4036 | f->open_array_section("reweights"); | |
4037 | ||
4038 | for (const auto& p : util_by_osd) { | |
4039 | unsigned weight = osdmap.get_weight(p.first); | |
4040 | if (weight == 0) { | |
4041 | // skip if OSD is currently out | |
4042 | continue; | |
4043 | } | |
4044 | float util = p.second; | |
4045 | ||
4046 | if (util >= overload_util) { | |
4047 | // Assign a lower weight to overloaded OSDs. The current weight | |
4048 | // is a factor to take into account the original weights, | |
4049 | // to represent e.g. differing storage capacities | |
4050 | unsigned new_weight = (unsigned)((average_util / util) * (float)weight); | |
4051 | if (weight > max_change) | |
11fdf7f2 | 4052 | new_weight = std::max(new_weight, weight - max_change); |
7c673cae FG |
4053 | new_weights->insert({p.first, new_weight}); |
4054 | if (f) { | |
4055 | f->open_object_section("osd"); | |
4056 | f->dump_int("osd", p.first); | |
4057 | f->dump_float("weight", (float)weight / (float)0x10000); | |
4058 | f->dump_float("new_weight", (float)new_weight / (float)0x10000); | |
4059 | f->close_section(); | |
4060 | } else { | |
4061 | oss << "osd." << p.first << " weight " | |
4062 | << (float)weight / (float)0x10000 << " -> " | |
4063 | << (float)new_weight / (float)0x10000 << "\n"; | |
4064 | } | |
4065 | if (++num_changed >= max_osds) | |
4066 | break; | |
4067 | } | |
4068 | if (!no_increasing && util <= underload_util) { | |
4069 | // assign a higher weight.. if we can. | |
4070 | unsigned new_weight = (unsigned)((average_util / util) * (float)weight); | |
11fdf7f2 | 4071 | new_weight = std::min(new_weight, weight + max_change); |
7c673cae FG |
4072 | if (new_weight > 0x10000) |
4073 | new_weight = 0x10000; | |
4074 | if (new_weight > weight) { | |
4075 | new_weights->insert({p.first, new_weight}); | |
4076 | oss << "osd." << p.first << " weight " | |
4077 | << (float)weight / (float)0x10000 << " -> " | |
4078 | << (float)new_weight / (float)0x10000 << "\n"; | |
4079 | if (++num_changed >= max_osds) | |
4080 | break; | |
4081 | } | |
4082 | } | |
4083 | } | |
4084 | if (f) { | |
4085 | f->close_section(); | |
4086 | } | |
4087 | ||
4088 | OSDMap newmap; | |
4089 | newmap.deepish_copy_from(osdmap); | |
4090 | OSDMap::Incremental newinc; | |
4091 | newinc.fsid = newmap.get_fsid(); | |
4092 | newinc.epoch = newmap.get_epoch() + 1; | |
4093 | newinc.new_weight = *new_weights; | |
4094 | newmap.apply_incremental(newinc); | |
4095 | ||
4096 | osdmap.summarize_mapping_stats(&newmap, pools, out_str, f); | |
4097 | ||
4098 | if (f) { | |
4099 | f->close_section(); | |
4100 | } else { | |
4101 | *out_str += "\n"; | |
4102 | *out_str += oss.str(); | |
4103 | } | |
4104 | return num_changed; | |
4105 | } |