]> git.proxmox.com Git - ceph.git/blob - ceph/src/mon/PGMap.h
import 15.2.0 Octopus source
[ceph.git] / ceph / src / mon / PGMap.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 /*
16 * Placement Group Map. Placement Groups are logical sets of objects
17 * that are replicated by the same set of devices. pgid=(r,hash(o)&m)
18 * where & is a bit-wise AND and m=2^k-1
19 */
20
21 #ifndef CEPH_PGMAP_H
22 #define CEPH_PGMAP_H
23
24 #include "include/health.h"
25 #include "common/debug.h"
26 #include "common/TextTable.h"
27 #include "osd/osd_types.h"
28 #include "include/mempool.h"
29 #include "mon/health_check.h"
30 #include <sstream>
31
32 namespace ceph { class Formatter; }
33
34 class PGMapDigest {
35 public:
36 MEMPOOL_CLASS_HELPERS();
37 virtual ~PGMapDigest() {}
38
39 mempool::pgmap::vector<uint64_t> osd_last_seq;
40
41 mutable std::map<int, int64_t> avail_space_by_rule;
42
43 // aggregate state, populated by PGMap child
44 int64_t num_pg = 0, num_osd = 0;
45 int64_t num_pg_active = 0;
46 int64_t num_pg_unknown = 0;
47 mempool::pgmap::unordered_map<int32_t,pool_stat_t> pg_pool_sum;
48 mempool::pgmap::map<int64_t,int64_t> num_pg_by_pool;
49 pool_stat_t pg_sum;
50 osd_stat_t osd_sum;
51 mempool::pgmap::map<std::string,osd_stat_t> osd_sum_by_class;
52 mempool::pgmap::unordered_map<uint64_t,int32_t> num_pg_by_state;
53 struct pg_count {
54 int32_t acting = 0;
55 int32_t up_not_acting = 0;
56 int32_t primary = 0;
57 void encode(ceph::buffer::list& bl) const {
58 using ceph::encode;
59 encode(acting, bl);
60 encode(up_not_acting, bl);
61 encode(primary, bl);
62 }
63 void decode(ceph::buffer::list::const_iterator& p) {
64 using ceph::decode;
65 decode(acting, p);
66 decode(up_not_acting, p);
67 decode(primary, p);
68 }
69 };
70 mempool::pgmap::unordered_map<int32_t,pg_count> num_pg_by_osd;
71
72 mempool::pgmap::map<int64_t,interval_set<snapid_t>> purged_snaps;
73
74 bool use_per_pool_stats() const {
75 return osd_sum.num_osds == osd_sum.num_per_pool_osds;
76 }
77 bool use_per_pool_omap_stats() const {
78 return osd_sum.num_osds == osd_sum.num_per_pool_omap_osds;
79 }
80
81 // recent deltas, and summation
82 /**
83 * keep track of last deltas for each pool, calculated using
84 * @p pg_pool_sum as baseline.
85 */
86 mempool::pgmap::unordered_map<int64_t, mempool::pgmap::list<std::pair<pool_stat_t, utime_t> > > per_pool_sum_deltas;
87 /**
88 * keep track of per-pool timestamp deltas, according to last update on
89 * each pool.
90 */
91 mempool::pgmap::unordered_map<int64_t, utime_t> per_pool_sum_deltas_stamps;
92 /**
93 * keep track of sum deltas, per-pool, taking into account any previous
94 * deltas existing in @p per_pool_sum_deltas. The utime_t as second member
95 * of the pair is the timestamp referring to the last update (i.e., the first
96 * member of the pair) for a given pool.
97 */
98 mempool::pgmap::unordered_map<int64_t, std::pair<pool_stat_t,utime_t> > per_pool_sum_delta;
99
100 pool_stat_t pg_sum_delta;
101 utime_t stamp_delta;
102
103 void get_recovery_stats(
104 double *misplaced_ratio,
105 double *degraded_ratio,
106 double *inactive_ratio,
107 double *unknown_pgs_ratio) const;
108
109 void print_summary(ceph::Formatter *f, std::ostream *out) const;
110 void print_oneline_summary(ceph::Formatter *f, std::ostream *out) const;
111
112 void recovery_summary(ceph::Formatter *f, std::list<std::string> *psl,
113 const pool_stat_t& pool_sum) const;
114 void overall_recovery_summary(ceph::Formatter *f, std::list<std::string> *psl) const;
115 void pool_recovery_summary(ceph::Formatter *f, std::list<std::string> *psl,
116 uint64_t poolid) const;
117 void recovery_rate_summary(ceph::Formatter *f, std::ostream *out,
118 const pool_stat_t& delta_sum,
119 utime_t delta_stamp) const;
120 void overall_recovery_rate_summary(ceph::Formatter *f, std::ostream *out) const;
121 void pool_recovery_rate_summary(ceph::Formatter *f, std::ostream *out,
122 uint64_t poolid) const;
123 /**
124 * Obtain a formatted/plain output for client I/O, source from stats for a
125 * given @p delta_sum pool over a given @p delta_stamp period of time.
126 */
127 void client_io_rate_summary(ceph::Formatter *f, std::ostream *out,
128 const pool_stat_t& delta_sum,
129 utime_t delta_stamp) const;
130 /**
131 * Obtain a formatted/plain output for the overall client I/O, which is
132 * calculated resorting to @p pg_sum_delta and @p stamp_delta.
133 */
134 void overall_client_io_rate_summary(ceph::Formatter *f, std::ostream *out) const;
135 /**
136 * Obtain a formatted/plain output for client I/O over a given pool
137 * with id @p pool_id. We will then obtain pool-specific data
138 * from @p per_pool_sum_delta.
139 */
140 void pool_client_io_rate_summary(ceph::Formatter *f, std::ostream *out,
141 uint64_t poolid) const;
142 /**
143 * Obtain a formatted/plain output for cache tier IO, source from stats for a
144 * given @p delta_sum pool over a given @p delta_stamp period of time.
145 */
146 void cache_io_rate_summary(ceph::Formatter *f, std::ostream *out,
147 const pool_stat_t& delta_sum,
148 utime_t delta_stamp) const;
149 /**
150 * Obtain a formatted/plain output for the overall cache tier IO, which is
151 * calculated resorting to @p pg_sum_delta and @p stamp_delta.
152 */
153 void overall_cache_io_rate_summary(ceph::Formatter *f, std::ostream *out) const;
154 /**
155 * Obtain a formatted/plain output for cache tier IO over a given pool
156 * with id @p pool_id. We will then obtain pool-specific data
157 * from @p per_pool_sum_delta.
158 */
159 void pool_cache_io_rate_summary(ceph::Formatter *f, std::ostream *out,
160 uint64_t poolid) const;
161
162 /**
163 * Return the number of additional bytes that can be stored in this
164 * pool before the first OSD fills up, accounting for PG overhead.
165 */
166 int64_t get_pool_free_space(const OSDMap &osd_map, int64_t poolid) const;
167
168
169 /**
170 * Dump pool usage and io ops/bytes, used by "ceph df" command
171 */
172 virtual void dump_pool_stats_full(const OSDMap &osd_map, std::stringstream *ss,
173 ceph::Formatter *f, bool verbose) const;
174 void dump_cluster_stats(std::stringstream *ss, ceph::Formatter *f, bool verbose) const;
175 static void dump_object_stat_sum(TextTable &tbl, ceph::Formatter *f,
176 const pool_stat_t &pool_stat,
177 uint64_t avail,
178 float raw_used_rate,
179 bool verbose,
180 bool per_pool,
181 bool per_pool_omap,
182 const pg_pool_t *pool);
183
184 size_t get_num_pg_by_osd(int osd) const {
185 auto p = num_pg_by_osd.find(osd);
186 if (p == num_pg_by_osd.end())
187 return 0;
188 else
189 return p->second.acting;
190 }
191 int get_num_primary_pg_by_osd(int osd) const {
192 auto p = num_pg_by_osd.find(osd);
193 if (p == num_pg_by_osd.end())
194 return 0;
195 else
196 return p->second.primary;
197 }
198
199 ceph_statfs get_statfs(OSDMap &osdmap,
200 boost::optional<int64_t> data_pool) const;
201
202 int64_t get_rule_avail(int ruleno) const {
203 auto i = avail_space_by_rule.find(ruleno);
204 if (i != avail_space_by_rule.end())
205 return avail_space_by_rule[ruleno];
206 else
207 return 0;
208 }
209
210 // kill me post-mimic or -nautilus
211 bool definitely_converted_snapsets() const {
212 // false negative is okay; false positive is not!
213 return
214 num_pg &&
215 num_pg_unknown == 0 &&
216 pg_sum.stats.sum.num_legacy_snapsets == 0;
217 }
218
219 uint64_t get_last_osd_stat_seq(int osd) {
220 if (osd < (int)osd_last_seq.size())
221 return osd_last_seq[osd];
222 return 0;
223 }
224
225 void encode(ceph::buffer::list& bl, uint64_t features) const;
226 void decode(ceph::buffer::list::const_iterator& p);
227 void dump(ceph::Formatter *f) const;
228 static void generate_test_instances(std::list<PGMapDigest*>& ls);
229 };
230 WRITE_CLASS_ENCODER(PGMapDigest::pg_count);
231 WRITE_CLASS_ENCODER_FEATURES(PGMapDigest);
232
233 class PGMap : public PGMapDigest {
234 public:
235 MEMPOOL_CLASS_HELPERS();
236
237 // the map
238 version_t version;
239 epoch_t last_osdmap_epoch; // last osdmap epoch i applied to the pgmap
240 epoch_t last_pg_scan; // osdmap epoch
241 mempool::pgmap::unordered_map<int32_t,osd_stat_t> osd_stat;
242 mempool::pgmap::unordered_map<pg_t,pg_stat_t> pg_stat;
243
244 typedef mempool::pgmap::map<
245 std::pair<int64_t, int>, // <pool, osd>
246 store_statfs_t>
247 per_osd_pool_statfs_t;
248
249 per_osd_pool_statfs_t pool_statfs;
250
251 class Incremental {
252 public:
253 MEMPOOL_CLASS_HELPERS();
254 version_t version;
255 mempool::pgmap::map<pg_t,pg_stat_t> pg_stat_updates;
256 epoch_t osdmap_epoch;
257 epoch_t pg_scan; // osdmap epoch
258 mempool::pgmap::set<pg_t> pg_remove;
259 utime_t stamp;
260 per_osd_pool_statfs_t pool_statfs_updates;
261
262 private:
263 mempool::pgmap::map<int32_t,osd_stat_t> osd_stat_updates;
264 mempool::pgmap::set<int32_t> osd_stat_rm;
265 public:
266
267 const mempool::pgmap::map<int32_t, osd_stat_t> &get_osd_stat_updates() const {
268 return osd_stat_updates;
269 }
270 const mempool::pgmap::set<int32_t> &get_osd_stat_rm() const {
271 return osd_stat_rm;
272 }
273 template<typename OsdStat>
274 void update_stat(int32_t osd, OsdStat&& stat) {
275 osd_stat_updates[osd] = std::forward<OsdStat>(stat);
276 }
277 void stat_osd_out(int32_t osd) {
278 osd_stat_updates[osd] = osd_stat_t();
279 }
280 void stat_osd_down_up(int32_t osd, const PGMap& pg_map) {
281 // 0 the op_queue_age_hist for this osd
282 auto p = osd_stat_updates.find(osd);
283 if (p != osd_stat_updates.end()) {
284 p->second.op_queue_age_hist.clear();
285 return;
286 }
287 auto q = pg_map.osd_stat.find(osd);
288 if (q != pg_map.osd_stat.end()) {
289 osd_stat_t& t = osd_stat_updates[osd] = q->second;
290 t.op_queue_age_hist.clear();
291 }
292 }
293 void rm_stat(int32_t osd) {
294 osd_stat_rm.insert(osd);
295 osd_stat_updates.erase(osd);
296 }
297 void dump(ceph::Formatter *f) const;
298 static void generate_test_instances(std::list<Incremental*>& o);
299
300 Incremental() : version(0), osdmap_epoch(0), pg_scan(0) {}
301 };
302
303
304 // aggregate stats (soft state), generated by calc_stats()
305 mempool::pgmap::unordered_map<int,std::set<pg_t> > pg_by_osd;
306 mempool::pgmap::unordered_map<int,int> blocked_by_sum;
307 mempool::pgmap::list<std::pair<pool_stat_t, utime_t> > pg_sum_deltas;
308 mempool::pgmap::unordered_map<int64_t,mempool::pgmap::unordered_map<uint64_t,int32_t>> num_pg_by_pool_state;
309
310 utime_t stamp;
311
312 void update_pool_deltas(
313 CephContext *cct,
314 const utime_t ts,
315 const mempool::pgmap::unordered_map<int32_t, pool_stat_t>& pg_pool_sum_old);
316 void clear_delta();
317
318 void deleted_pool(int64_t pool) {
319 for (auto i = pool_statfs.begin(); i != pool_statfs.end();) {
320 if (i->first.first == pool) {
321 i = pool_statfs.erase(i);
322 } else {
323 ++i;
324 }
325 }
326
327 pg_pool_sum.erase(pool);
328 num_pg_by_pool_state.erase(pool);
329 num_pg_by_pool.erase(pool);
330 per_pool_sum_deltas.erase(pool);
331 per_pool_sum_deltas_stamps.erase(pool);
332 per_pool_sum_delta.erase(pool);
333 }
334
335 private:
336 void update_delta(
337 CephContext *cct,
338 const utime_t ts,
339 const pool_stat_t& old_pool_sum,
340 utime_t *last_ts,
341 const pool_stat_t& current_pool_sum,
342 pool_stat_t *result_pool_delta,
343 utime_t *result_ts_delta,
344 mempool::pgmap::list<std::pair<pool_stat_t,utime_t> > *delta_avg_list);
345
346 void update_one_pool_delta(CephContext *cct,
347 const utime_t ts,
348 const int64_t pool,
349 const pool_stat_t& old_pool_sum);
350
351 public:
352
353 mempool::pgmap::set<pg_t> creating_pgs;
354 mempool::pgmap::map<int,std::map<epoch_t,std::set<pg_t> > > creating_pgs_by_osd_epoch;
355
356 // Bits that use to be enum StuckPG
357 static const int STUCK_INACTIVE = (1<<0);
358 static const int STUCK_UNCLEAN = (1<<1);
359 static const int STUCK_UNDERSIZED = (1<<2);
360 static const int STUCK_DEGRADED = (1<<3);
361 static const int STUCK_STALE = (1<<4);
362
363 PGMap()
364 : version(0),
365 last_osdmap_epoch(0), last_pg_scan(0)
366 {}
367
368 version_t get_version() const {
369 return version;
370 }
371 void set_version(version_t v) {
372 version = v;
373 }
374 epoch_t get_last_osdmap_epoch() const {
375 return last_osdmap_epoch;
376 }
377 void set_last_osdmap_epoch(epoch_t e) {
378 last_osdmap_epoch = e;
379 }
380 epoch_t get_last_pg_scan() const {
381 return last_pg_scan;
382 }
383 void set_last_pg_scan(epoch_t e) {
384 last_pg_scan = e;
385 }
386 utime_t get_stamp() const {
387 return stamp;
388 }
389 void set_stamp(utime_t s) {
390 stamp = s;
391 }
392
393 pool_stat_t get_pg_pool_sum_stat(int64_t pool) const {
394 auto p = pg_pool_sum.find(pool);
395 if (p != pg_pool_sum.end())
396 return p->second;
397 return pool_stat_t();
398 }
399
400 osd_stat_t get_osd_sum(const std::set<int>& osds) const {
401 if (osds.empty()) // all
402 return osd_sum;
403 osd_stat_t sum;
404 for (auto i : osds) {
405 auto os = get_osd_stat(i);
406 if (os)
407 sum.add(*os);
408 }
409 return sum;
410 }
411
412 const osd_stat_t *get_osd_stat(int osd) const {
413 auto i = osd_stat.find(osd);
414 if (i == osd_stat.end()) {
415 return nullptr;
416 }
417 return &i->second;
418 }
419
420
421 void apply_incremental(CephContext *cct, const Incremental& inc);
422 void calc_stats();
423 void stat_pg_add(const pg_t &pgid, const pg_stat_t &s,
424 bool sameosds=false);
425 bool stat_pg_sub(const pg_t &pgid, const pg_stat_t &s,
426 bool sameosds=false);
427 void calc_purged_snaps();
428 void calc_osd_sum_by_class(const OSDMap& osdmap);
429 void stat_osd_add(int osd, const osd_stat_t &s);
430 void stat_osd_sub(int osd, const osd_stat_t &s);
431
432 void encode(ceph::buffer::list &bl, uint64_t features=-1) const;
433 void decode(ceph::buffer::list::const_iterator &bl);
434
435 /// encode subset of our data to a PGMapDigest
436 void encode_digest(const OSDMap& osdmap,
437 ceph::buffer::list& bl, uint64_t features);
438
439 int64_t get_rule_avail(const OSDMap& osdmap, int ruleno) const;
440 void get_rules_avail(const OSDMap& osdmap,
441 std::map<int,int64_t> *avail_map) const;
442 void dump(ceph::Formatter *f, bool with_net = true) const;
443 void dump_basic(ceph::Formatter *f) const;
444 void dump_pg_stats(ceph::Formatter *f, bool brief) const;
445 void dump_pool_stats(ceph::Formatter *f) const;
446 void dump_osd_stats(ceph::Formatter *f, bool with_net = true) const;
447 void dump_osd_ping_times(ceph::Formatter *f) const;
448 void dump_delta(ceph::Formatter *f) const;
449 void dump_filtered_pg_stats(ceph::Formatter *f, std::set<pg_t>& pgs) const;
450 void dump_pool_stats_full(const OSDMap &osd_map, std::stringstream *ss,
451 ceph::Formatter *f, bool verbose) const override {
452 get_rules_avail(osd_map, &avail_space_by_rule);
453 PGMapDigest::dump_pool_stats_full(osd_map, ss, f, verbose);
454 }
455
456 /*
457 * Dump client io rate, recovery io rate, cache io rate and recovery information.
458 * this function is used by "ceph osd pool stats" command
459 */
460 void dump_pool_stats_and_io_rate(int64_t poolid, const OSDMap &osd_map, ceph::Formatter *f,
461 std::stringstream *ss) const;
462
463 void dump_pg_stats_plain(
464 std::ostream& ss,
465 const mempool::pgmap::unordered_map<pg_t, pg_stat_t>& pg_stats,
466 bool brief) const;
467 void get_stuck_stats(
468 int types, const utime_t cutoff,
469 mempool::pgmap::unordered_map<pg_t, pg_stat_t>& stuck_pgs) const;
470 bool get_stuck_counts(const utime_t cutoff, std::map<std::string, int>& note) const;
471 void dump_stuck(ceph::Formatter *f, int types, utime_t cutoff) const;
472 void dump_stuck_plain(std::ostream& ss, int types, utime_t cutoff) const;
473 int dump_stuck_pg_stats(std::stringstream &ds,
474 ceph::Formatter *f,
475 int threshold,
476 std::vector<std::string>& args) const;
477 void dump(std::ostream& ss) const;
478 void dump_basic(std::ostream& ss) const;
479 void dump_pg_stats(std::ostream& ss, bool brief) const;
480 void dump_pg_sum_stats(std::ostream& ss, bool header) const;
481 void dump_pool_stats(std::ostream& ss, bool header) const;
482 void dump_osd_stats(std::ostream& ss) const;
483 void dump_osd_sum_stats(std::ostream& ss) const;
484 void dump_filtered_pg_stats(std::ostream& ss, std::set<pg_t>& pgs) const;
485
486 void dump_osd_perf_stats(ceph::Formatter *f) const;
487 void print_osd_perf_stats(std::ostream *ss) const;
488
489 void dump_osd_blocked_by_stats(ceph::Formatter *f) const;
490 void print_osd_blocked_by_stats(std::ostream *ss) const;
491
492 void get_filtered_pg_stats(uint64_t state, int64_t poolid, int64_t osdid,
493 bool primary, std::set<pg_t>& pgs) const;
494
495 set<std::string> osd_parentage(const OSDMap& osdmap, int id) const;
496 void get_health_checks(
497 CephContext *cct,
498 const OSDMap& osdmap,
499 health_check_map_t *checks) const;
500 void print_summary(ceph::Formatter *f, ostream *out) const;
501
502 static void generate_test_instances(std::list<PGMap*>& o);
503 };
504 WRITE_CLASS_ENCODER_FEATURES(PGMap)
505
506 inline std::ostream& operator<<(std::ostream& out, const PGMapDigest& m) {
507 m.print_oneline_summary(NULL, &out);
508 return out;
509 }
510
511 int process_pg_map_command(
512 const std::string& prefix,
513 const cmdmap_t& cmdmap,
514 const PGMap& pg_map,
515 const OSDMap& osdmap,
516 ceph::Formatter *f,
517 std::stringstream *ss,
518 ceph::buffer::list *odata);
519
520 class PGMapUpdater
521 {
522 public:
523 static void check_osd_map(
524 CephContext *cct,
525 const OSDMap &osdmap,
526 const PGMap& pg_map,
527 PGMap::Incremental *pending_inc);
528
529 // mark pg's state stale if its acting primary osd is down
530 static void check_down_pgs(
531 const OSDMap &osd_map,
532 const PGMap &pg_map,
533 bool check_all,
534 const std::set<int>& need_check_down_pg_osds,
535 PGMap::Incremental *pending_inc);
536 };
537
538 namespace reweight {
539 /* Assign a lower weight to overloaded OSDs.
540 *
541 * The osds that will get a lower weight are those with with a utilization
542 * percentage 'oload' percent greater than the average utilization.
543 */
544 int by_utilization(const OSDMap &osd_map,
545 const PGMap &pg_map,
546 int oload,
547 double max_changef,
548 int max_osds,
549 bool by_pg, const std::set<int64_t> *pools,
550 bool no_increasing,
551 mempool::osdmap::map<int32_t, uint32_t>* new_weights,
552 std::stringstream *ss,
553 std::string *out_str,
554 ceph::Formatter *f);
555 }
556
557 #endif