]> git.proxmox.com Git - ceph.git/blame - ceph/src/osd/OSDMap.h
update sources to 12.2.2
[ceph.git] / ceph / src / osd / OSDMap.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License version 2.1, as published by the Free Software
14 * Foundation. See file COPYING.
15 *
16 */
17
18
19#ifndef CEPH_OSDMAP_H
20#define CEPH_OSDMAP_H
21
31f18b77
FG
22#include "include/cpp-btree/btree_map.h"
23
7c673cae
FG
24/*
25 * describe properties of the OSD cluster.
26 * disks, disk groups, total # osds,
27 *
28 */
29#include "include/types.h"
30#include "osd_types.h"
31
32//#include "include/ceph_features.h"
33#include "crush/CrushWrapper.h"
34#include <vector>
35#include <list>
36#include <set>
37#include <map>
38#include "include/memory.h"
39using namespace std;
40
41// forward declaration
42class CephContext;
43class CrushWrapper;
224ce89b 44class health_check_map_t;
7c673cae
FG
45
46// FIXME C++11 does not have std::equal for two differently-typed containers.
47// use this until we move to c++14
48template<typename A, typename B>
49bool vectors_equal(A a, B b)
50{
51 return
52 a.size() == b.size() &&
53 (a.empty() ||
54 memcmp((char*)&a[0], (char*)&b[0], sizeof(a[0]) * a.size()) == 0);
55}
56
57
58/*
59 * we track up to two intervals during which the osd was alive and
60 * healthy. the most recent is [up_from,up_thru), where up_thru is
61 * the last epoch the osd is known to have _started_. i.e., a lower
62 * bound on the actual osd death. down_at (if it is > up_from) is an
63 * upper bound on the actual osd death.
64 *
65 * the second is the last_clean interval [first,last]. in that case,
66 * the last interval is the last epoch known to have been either
67 * _finished_, or during which the osd cleanly shut down. when
68 * possible, we push this forward to the epoch the osd was eventually
69 * marked down.
70 *
71 * the lost_at is used to allow build_prior to proceed without waiting
72 * for an osd to recover. In certain cases, progress may be blocked
73 * because an osd is down that may contain updates (i.e., a pg may have
74 * gone rw during an interval). If the osd can't be brought online, we
75 * can force things to proceed knowing that we _might_ be losing some
76 * acked writes. If the osd comes back to life later, that's fine to,
77 * but those writes will still be lost (the divergent objects will be
78 * thrown out).
79 */
80struct osd_info_t {
81 epoch_t last_clean_begin; // last interval that ended with a clean osd shutdown
82 epoch_t last_clean_end;
83 epoch_t up_from; // epoch osd marked up
84 epoch_t up_thru; // lower bound on actual osd death (if > up_from)
85 epoch_t down_at; // upper bound on actual osd death (if > up_from)
86 epoch_t lost_at; // last epoch we decided data was "lost"
87
88 osd_info_t() : last_clean_begin(0), last_clean_end(0),
89 up_from(0), up_thru(0), down_at(0), lost_at(0) {}
90
91 void dump(Formatter *f) const;
92 void encode(bufferlist& bl) const;
93 void decode(bufferlist::iterator& bl);
94 static void generate_test_instances(list<osd_info_t*>& o);
95};
96WRITE_CLASS_ENCODER(osd_info_t)
97
98ostream& operator<<(ostream& out, const osd_info_t& info);
99
100struct osd_xinfo_t {
101 utime_t down_stamp; ///< timestamp when we were last marked down
102 float laggy_probability; ///< encoded as __u32: 0 = definitely not laggy, 0xffffffff definitely laggy
103 __u32 laggy_interval; ///< average interval between being marked laggy and recovering
104 uint64_t features; ///< features supported by this osd we should know about
105 __u32 old_weight; ///< weight prior to being auto marked out
106
107 osd_xinfo_t() : laggy_probability(0), laggy_interval(0),
108 features(0), old_weight(0) {}
109
110 void dump(Formatter *f) const;
111 void encode(bufferlist& bl) const;
112 void decode(bufferlist::iterator& bl);
113 static void generate_test_instances(list<osd_xinfo_t*>& o);
114};
115WRITE_CLASS_ENCODER(osd_xinfo_t)
116
117ostream& operator<<(ostream& out, const osd_xinfo_t& xi);
118
119
31f18b77
FG
120struct PGTempMap {
121#if 1
122 bufferlist data;
123 typedef btree::btree_map<pg_t,int32_t*> map_t;
124 map_t map;
125
126 void encode(bufferlist& bl) const {
127 uint32_t n = map.size();
128 ::encode(n, bl);
129 for (auto &p : map) {
130 ::encode(p.first, bl);
131 bl.append((char*)p.second, (*p.second + 1) * sizeof(int32_t));
132 }
133 }
134 void decode(bufferlist::iterator& p) {
135 data.clear();
136 map.clear();
137 uint32_t n;
138 ::decode(n, p);
139 if (!n)
140 return;
141 bufferlist::iterator pstart = p;
142 size_t start_off = pstart.get_off();
143 vector<pair<pg_t,size_t>> offsets;
144 offsets.resize(n);
145 for (unsigned i=0; i<n; ++i) {
146 pg_t pgid;
147 ::decode(pgid, p);
148 offsets[i].first = pgid;
149 offsets[i].second = p.get_off() - start_off;
150 uint32_t vn;
151 ::decode(vn, p);
152 p.advance(vn * sizeof(int32_t));
153 }
154 size_t len = p.get_off() - start_off;
155 pstart.copy(len, data);
156 if (data.get_num_buffers() > 1) {
157 data.rebuild();
158 }
159 //map.reserve(n);
160 char *start = data.c_str();
161 for (auto i : offsets) {
162 map.insert(map.end(), make_pair(i.first, (int32_t*)(start + i.second)));
163 }
164 }
165 void rebuild() {
166 bufferlist bl;
167 encode(bl);
168 auto p = bl.begin();
169 decode(p);
170 }
171 friend bool operator==(const PGTempMap& l, const PGTempMap& r) {
172 return
173 l.map.size() == r.map.size() &&
174 l.data.contents_equal(r.data);
175 }
176
177 class iterator {
178 map_t::const_iterator it;
179 map_t::const_iterator end;
180 pair<pg_t,vector<int32_t>> current;
181 void init_current() {
182 if (it != end) {
183 current.first = it->first;
184 assert(it->second);
185 current.second.resize(*it->second);
186 int32_t *p = it->second + 1;
187 for (int n = 0; n < *it->second; ++n, ++p) {
188 current.second[n] = *p;
189 }
190 }
191 }
192 public:
193 iterator(map_t::const_iterator p,
194 map_t::const_iterator e)
195 : it(p), end(e) {
196 init_current();
197 }
198
199 const pair<pg_t,vector<int32_t>>& operator*() const {
200 return current;
201 }
202 const pair<pg_t,vector<int32_t>>* operator->() const {
203 return &current;
204 }
205 friend bool operator==(const iterator& l, const iterator& r) {
206 return l.it == r.it;
207 }
208 friend bool operator!=(const iterator& l, const iterator& r) {
209 return l.it != r.it;
210 }
211 iterator& operator++() {
212 ++it;
213 if (it != end)
214 init_current();
215 return *this;
216 }
217 iterator operator++(int) {
218 iterator r = *this;
219 ++it;
220 if (it != end)
221 init_current();
222 return r;
223 }
224 };
225 iterator begin() const {
226 return iterator(map.begin(), map.end());
227 }
228 iterator end() const {
229 return iterator(map.end(), map.end());
230 }
231 iterator find(pg_t pgid) const {
232 return iterator(map.find(pgid), map.end());
233 }
234 size_t size() const {
235 return map.size();
236 }
237 size_t count(pg_t pgid) const {
238 return map.count(pgid);
239 }
240 void erase(pg_t pgid) {
241 map.erase(pgid);
242 }
243 void clear() {
244 map.clear();
245 data.clear();
246 }
247 void set(pg_t pgid, const mempool::osdmap::vector<int32_t>& v) {
248 size_t need = sizeof(int32_t) * (1 + v.size());
249 if (need < data.get_append_buffer_unused_tail_length()) {
250 bufferptr z(data.get_append_buffer_unused_tail_length());
251 z.zero();
252 data.append(z.c_str(), z.length());
253 }
254 ::encode(v, data);
255 map[pgid] = (int32_t*)(data.back().end_c_str()) - (1 + v.size());
256 }
257 mempool::osdmap::vector<int32_t> get(pg_t pgid) {
258 mempool::osdmap::vector<int32_t> v;
259 int32_t *p = map[pgid];
260 size_t n = *p++;
261 v.resize(n);
262 for (size_t i = 0; i < n; ++i, ++p) {
263 v[i] = *p;
264 }
265 return v;
266 }
267#else
268 // trivial implementation
269 mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t> > pg_temp;
270
271 void encode(bufferlist& bl) const {
272 ::encode(pg_temp, bl);
273 }
274 void decode(bufferlist::iterator& p) {
275 ::decode(pg_temp, p);
276 }
277 friend bool operator==(const PGTempMap& l, const PGTempMap& r) {
278 return
279 l.pg_temp.size() == r.pg_temp.size() &&
280 l.pg_temp == r.pg_temp;
281 }
282
283 class iterator {
284 mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t> >::const_iterator it;
285 public:
286 iterator(mempool::osdmap::map<pg_t,
287 mempool::osdmap::vector<int32_t> >::const_iterator p)
288 : it(p) {}
289
290 pair<pg_t,const mempool::osdmap::vector<int32_t>&> operator*() const {
291 return *it;
292 }
293 const pair<const pg_t,mempool::osdmap::vector<int32_t>>* operator->() const {
294 return &*it;
295 }
296 friend bool operator==(const iterator& l, const iterator& r) {
297 return l.it == r.it;
298 }
299 friend bool operator!=(const iterator& l, const iterator& r) {
300 return l.it != r.it;
301 }
302 iterator& operator++() {
303 ++it;
304 return *this;
305 }
306 iterator operator++(int) {
307 iterator r = *this;
308 ++it;
309 return r;
310 }
311 };
312 iterator begin() const {
313 return iterator(pg_temp.cbegin());
314 }
315 iterator end() const {
316 return iterator(pg_temp.cend());
317 }
318 iterator find(pg_t pgid) const {
319 return iterator(pg_temp.find(pgid));
320 }
321 size_t size() const {
322 return pg_temp.size();
323 }
324 size_t count(pg_t pgid) const {
325 return pg_temp.count(pgid);
326 }
327 void erase(pg_t pgid) {
328 pg_temp.erase(pgid);
329 }
330 void clear() {
331 pg_temp.clear();
332 }
333 void set(pg_t pgid, const mempool::osdmap::vector<int32_t>& v) {
334 pg_temp[pgid] = v;
335 }
336 const mempool::osdmap::vector<int32_t>& get(pg_t pgid) {
337 return pg_temp.at(pgid);
338 }
339#endif
340 void dump(Formatter *f) const {
341 for (const auto &pg : *this) {
342 f->open_object_section("osds");
343 f->dump_stream("pgid") << pg.first;
344 f->open_array_section("osds");
345 for (const auto osd : pg.second)
346 f->dump_int("osd", osd);
347 f->close_section();
348 f->close_section();
349 }
350 }
351};
352WRITE_CLASS_ENCODER(PGTempMap)
353
7c673cae
FG
354/** OSDMap
355 */
356class OSDMap {
357public:
358 MEMPOOL_CLASS_HELPERS();
359
360 class Incremental {
361 public:
362 MEMPOOL_CLASS_HELPERS();
363
364 /// feature bits we were encoded with. the subsequent OSDMap
365 /// encoding should match.
366 uint64_t encode_features;
367 uuid_d fsid;
368 epoch_t epoch; // new epoch; we are a diff from epoch-1 to epoch
369 utime_t modified;
370 int64_t new_pool_max; //incremented by the OSDMonitor on each pool create
371 int32_t new_flags;
31f18b77 372 int8_t new_require_osd_release = -1;
7c673cae
FG
373
374 // full (rare)
375 bufferlist fullmap; // in lieu of below.
376 bufferlist crush;
377
378 // incremental
379 int32_t new_max_osd;
380 mempool::osdmap::map<int64_t,pg_pool_t> new_pools;
381 mempool::osdmap::map<int64_t,string> new_pool_names;
382 mempool::osdmap::set<int64_t> old_pools;
383 mempool::osdmap::map<string,map<string,string> > new_erasure_code_profiles;
384 mempool::osdmap::vector<string> old_erasure_code_profiles;
385 mempool::osdmap::map<int32_t,entity_addr_t> new_up_client;
386 mempool::osdmap::map<int32_t,entity_addr_t> new_up_cluster;
31f18b77 387 mempool::osdmap::map<int32_t,uint32_t> new_state; // XORed onto previous state.
7c673cae
FG
388 mempool::osdmap::map<int32_t,uint32_t> new_weight;
389 mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t> > new_pg_temp; // [] to remove
390 mempool::osdmap::map<pg_t, int32_t> new_primary_temp; // [-1] to remove
391 mempool::osdmap::map<int32_t,uint32_t> new_primary_affinity;
392 mempool::osdmap::map<int32_t,epoch_t> new_up_thru;
393 mempool::osdmap::map<int32_t,pair<epoch_t,epoch_t> > new_last_clean_interval;
394 mempool::osdmap::map<int32_t,epoch_t> new_lost;
395 mempool::osdmap::map<int32_t,uuid_d> new_uuid;
396 mempool::osdmap::map<int32_t,osd_xinfo_t> new_xinfo;
397
398 mempool::osdmap::map<entity_addr_t,utime_t> new_blacklist;
399 mempool::osdmap::vector<entity_addr_t> old_blacklist;
400 mempool::osdmap::map<int32_t, entity_addr_t> new_hb_back_up;
401 mempool::osdmap::map<int32_t, entity_addr_t> new_hb_front_up;
402
403 mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t>> new_pg_upmap;
404 mempool::osdmap::map<pg_t,mempool::osdmap::vector<pair<int32_t,int32_t>>> new_pg_upmap_items;
405 mempool::osdmap::set<pg_t> old_pg_upmap, old_pg_upmap_items;
406
407 string cluster_snapshot;
408
409 float new_nearfull_ratio = -1;
410 float new_backfillfull_ratio = -1;
411 float new_full_ratio = -1;
412
31f18b77 413 int8_t new_require_min_compat_client = -1;
7c673cae
FG
414
415 mutable bool have_crc; ///< crc values are defined
416 uint32_t full_crc; ///< crc of the resulting OSDMap
417 mutable uint32_t inc_crc; ///< crc of this incremental
418
419 int get_net_marked_out(const OSDMap *previous) const;
420 int get_net_marked_down(const OSDMap *previous) const;
421 int identify_osd(uuid_d u) const;
422
423 void encode_client_old(bufferlist& bl) const;
424 void encode_classic(bufferlist& bl, uint64_t features) const;
425 void encode(bufferlist& bl, uint64_t features=CEPH_FEATURES_ALL) const;
426 void decode_classic(bufferlist::iterator &p);
427 void decode(bufferlist::iterator &bl);
428 void dump(Formatter *f) const;
429 static void generate_test_instances(list<Incremental*>& o);
430
431 explicit Incremental(epoch_t e=0) :
432 encode_features(0),
433 epoch(e), new_pool_max(-1), new_flags(-1), new_max_osd(-1),
434 have_crc(false), full_crc(0), inc_crc(0) {
435 memset(&fsid, 0, sizeof(fsid));
436 }
437 explicit Incremental(bufferlist &bl) {
438 bufferlist::iterator p = bl.begin();
439 decode(p);
440 }
441 explicit Incremental(bufferlist::iterator &p) {
442 decode(p);
443 }
444
445 pg_pool_t *get_new_pool(int64_t pool, const pg_pool_t *orig) {
446 if (new_pools.count(pool) == 0)
447 new_pools[pool] = *orig;
448 return &new_pools[pool];
449 }
450 bool has_erasure_code_profile(const string &name) const {
451 auto i = new_erasure_code_profiles.find(name);
452 return i != new_erasure_code_profiles.end();
453 }
454 void set_erasure_code_profile(const string &name,
455 const map<string,string>& profile) {
456 new_erasure_code_profiles[name] = profile;
457 }
458
459 /// propage update pools' snap metadata to any of their tiers
460 int propagate_snaps_to_tiers(CephContext *cct, const OSDMap &base);
31f18b77
FG
461
462 /// filter out osds with any pending state changing
463 size_t get_pending_state_osds(vector<int> *osds) {
464 assert(osds);
465 osds->clear();
466
467 for (auto &p : new_state) {
468 osds->push_back(p.first);
469 }
470
471 return osds->size();
472 }
473
474 bool pending_osd_has_state(int osd, unsigned state) {
475 return new_state.count(osd) && (new_state[osd] & state) != 0;
476 }
477
478 void pending_osd_state_set(int osd, unsigned state) {
479 new_state[osd] |= state;
480 }
481
482 // cancel the specified pending osd state if there is any
483 // return ture on success, false otherwise.
484 bool pending_osd_state_clear(int osd, unsigned state) {
485 if (!pending_osd_has_state(osd, state)) {
486 // never has been set or already has been cancelled.
487 return false;
488 }
489
490 new_state[osd] &= ~state;
491 return true;
492 }
493
7c673cae
FG
494 };
495
496private:
497 uuid_d fsid;
498 epoch_t epoch; // what epoch of the osd cluster descriptor is this
499 utime_t created, modified; // epoch start time
500 int32_t pool_max; // the largest pool num, ever
501
502 uint32_t flags;
503
504 int num_osd; // not saved; see calc_num_osds
505 int num_up_osd; // not saved; see calc_num_osds
506 int num_in_osd; // not saved; see calc_num_osds
507
508 int32_t max_osd;
31f18b77 509 vector<uint32_t> osd_state;
7c673cae
FG
510
511 struct addrs_s {
512 mempool::osdmap::vector<ceph::shared_ptr<entity_addr_t> > client_addr;
513 mempool::osdmap::vector<ceph::shared_ptr<entity_addr_t> > cluster_addr;
514 mempool::osdmap::vector<ceph::shared_ptr<entity_addr_t> > hb_back_addr;
515 mempool::osdmap::vector<ceph::shared_ptr<entity_addr_t> > hb_front_addr;
516 entity_addr_t blank;
517 };
518 ceph::shared_ptr<addrs_s> osd_addrs;
519
520 mempool::osdmap::vector<__u32> osd_weight; // 16.16 fixed point, 0x10000 = "in", 0 = "out"
521 mempool::osdmap::vector<osd_info_t> osd_info;
31f18b77 522 ceph::shared_ptr<PGTempMap> pg_temp; // temp pg mapping (e.g. while we rebuild)
7c673cae
FG
523 ceph::shared_ptr< mempool::osdmap::map<pg_t,int32_t > > primary_temp; // temp primary mapping (e.g. while we rebuild)
524 ceph::shared_ptr< mempool::osdmap::vector<__u32> > osd_primary_affinity; ///< 16.16 fixed point, 0x10000 = baseline
525
526 // remap (post-CRUSH, pre-up)
527 mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t>> pg_upmap; ///< remap pg
528 mempool::osdmap::map<pg_t,mempool::osdmap::vector<pair<int32_t,int32_t>>> pg_upmap_items; ///< remap osds in up set
529
530 mempool::osdmap::map<int64_t,pg_pool_t> pools;
531 mempool::osdmap::map<int64_t,string> pool_name;
532 mempool::osdmap::map<string,map<string,string> > erasure_code_profiles;
533 mempool::osdmap::map<string,int64_t> name_pool;
534
535 ceph::shared_ptr< mempool::osdmap::vector<uuid_d> > osd_uuid;
536 mempool::osdmap::vector<osd_xinfo_t> osd_xinfo;
537
538 mempool::osdmap::unordered_map<entity_addr_t,utime_t> blacklist;
539
540 epoch_t cluster_snapshot_epoch;
541 string cluster_snapshot;
542 bool new_blacklist_entries;
543
544 float full_ratio = 0, backfillfull_ratio = 0, nearfull_ratio = 0;
545
546 /// min compat client we want to support
31f18b77 547 uint8_t require_min_compat_client = 0; // CEPH_RELEASE_*
7c673cae 548
31f18b77
FG
549public:
550 /// require osds to run at least this release
551 uint8_t require_osd_release = 0; // CEPH_RELEASE_*
552
553private:
7c673cae
FG
554 mutable uint64_t cached_up_osd_features;
555
556 mutable bool crc_defined;
557 mutable uint32_t crc;
558
559 void _calc_up_osd_features();
560
561 public:
562 bool have_crc() const { return crc_defined; }
563 uint32_t get_crc() const { return crc; }
564
565 ceph::shared_ptr<CrushWrapper> crush; // hierarchical map
31f18b77
FG
566private:
567 uint32_t crush_version = 1;
7c673cae
FG
568
569 friend class OSDMonitor;
570
571 public:
572 OSDMap() : epoch(0),
224ce89b 573 pool_max(0),
7c673cae
FG
574 flags(0),
575 num_osd(0), num_up_osd(0), num_in_osd(0),
576 max_osd(0),
577 osd_addrs(std::make_shared<addrs_s>()),
31f18b77 578 pg_temp(std::make_shared<PGTempMap>()),
7c673cae
FG
579 primary_temp(std::make_shared<mempool::osdmap::map<pg_t,int32_t>>()),
580 osd_uuid(std::make_shared<mempool::osdmap::vector<uuid_d>>()),
581 cluster_snapshot_epoch(0),
582 new_blacklist_entries(false),
583 cached_up_osd_features(0),
584 crc_defined(false), crc(0),
585 crush(std::make_shared<CrushWrapper>()) {
586 memset(&fsid, 0, sizeof(fsid));
587 }
588
589 // no copying
590private:
591 OSDMap(const OSDMap& other) = default;
592 OSDMap& operator=(const OSDMap& other) = default;
593public:
594
595 void deepish_copy_from(const OSDMap& o) {
596 *this = o;
597 primary_temp.reset(new mempool::osdmap::map<pg_t,int32_t>(*o.primary_temp));
31f18b77 598 pg_temp.reset(new PGTempMap(*o.pg_temp));
7c673cae
FG
599 osd_uuid.reset(new mempool::osdmap::vector<uuid_d>(*o.osd_uuid));
600
601 if (o.osd_primary_affinity)
602 osd_primary_affinity.reset(new mempool::osdmap::vector<__u32>(*o.osd_primary_affinity));
603
604 // NOTE: this still references shared entity_addr_t's.
605 osd_addrs.reset(new addrs_s(*o.osd_addrs));
606
607 // NOTE: we do not copy crush. note that apply_incremental will
608 // allocate a new CrushWrapper, though.
609 }
610
611 // map info
612 const uuid_d& get_fsid() const { return fsid; }
613 void set_fsid(uuid_d& f) { fsid = f; }
614
615 epoch_t get_epoch() const { return epoch; }
616 void inc_epoch() { epoch++; }
617
618 void set_epoch(epoch_t e);
619
31f18b77
FG
620 uint32_t get_crush_version() const {
621 return crush_version;
622 }
623
7c673cae
FG
624 /* stamps etc */
625 const utime_t& get_created() const { return created; }
626 const utime_t& get_modified() const { return modified; }
627
628 bool is_blacklisted(const entity_addr_t& a) const;
629 void get_blacklist(list<pair<entity_addr_t,utime_t > > *bl) const;
31f18b77 630 void get_blacklist(std::set<entity_addr_t> *bl) const;
7c673cae
FG
631
632 string get_cluster_snapshot() const {
633 if (cluster_snapshot_epoch == epoch)
634 return cluster_snapshot;
635 return string();
636 }
637
638 float get_full_ratio() const {
639 return full_ratio;
640 }
641 float get_backfillfull_ratio() const {
642 return backfillfull_ratio;
643 }
644 float get_nearfull_ratio() const {
645 return nearfull_ratio;
646 }
7c673cae 647 void get_full_osd_util(
31f18b77 648 const mempool::pgmap::unordered_map<int32_t,osd_stat_t> &osd_stat,
7c673cae
FG
649 map<int, float> *full,
650 map<int, float> *backfill,
651 map<int, float> *nearfull) const;
3efd9988
FG
652 void get_full_pools(CephContext *cct,
653 set<int64_t> *full,
654 set<int64_t> *backfillfull,
655 set<int64_t> *nearfull) const;
31f18b77
FG
656 void get_full_osd_counts(set<int> *full, set<int> *backfill,
657 set<int> *nearfull) const;
658
659
7c673cae
FG
660 /***** cluster state *****/
661 /* osds */
662 int get_max_osd() const { return max_osd; }
663 void set_max_osd(int m);
664
665 unsigned get_num_osds() const {
666 return num_osd;
667 }
668 unsigned get_num_up_osds() const {
669 return num_up_osd;
670 }
671 unsigned get_num_in_osds() const {
672 return num_in_osd;
673 }
674 /// recalculate cached values for get_num{,_up,_in}_osds
675 int calc_num_osds();
676
677 void get_all_osds(set<int32_t>& ls) const;
678 void get_up_osds(set<int32_t>& ls) const;
31f18b77 679 void get_out_osds(set<int32_t>& ls) const;
7c673cae
FG
680 unsigned get_num_pg_temp() const {
681 return pg_temp->size();
682 }
683
684 int get_flags() const { return flags; }
685 bool test_flag(int f) const { return flags & f; }
686 void set_flag(int f) { flags |= f; }
687 void clear_flag(int f) { flags &= ~f; }
688
689 static void calc_state_set(int state, set<string>& st);
690
691 int get_state(int o) const {
692 assert(o < max_osd);
693 return osd_state[o];
694 }
695 int get_state(int o, set<string>& st) const {
696 assert(o < max_osd);
697 unsigned t = osd_state[o];
698 calc_state_set(t, st);
699 return osd_state[o];
700 }
701 void set_state(int o, unsigned s) {
702 assert(o < max_osd);
703 osd_state[o] = s;
704 }
705 void set_weight(int o, unsigned w) {
706 assert(o < max_osd);
707 osd_weight[o] = w;
708 if (w)
709 osd_state[o] |= CEPH_OSD_EXISTS;
710 }
711 unsigned get_weight(int o) const {
712 assert(o < max_osd);
713 return osd_weight[o];
714 }
715 float get_weightf(int o) const {
716 return (float)get_weight(o) / (float)CEPH_OSD_IN;
717 }
718 void adjust_osd_weights(const map<int,double>& weights, Incremental& inc) const;
719
720 void set_primary_affinity(int o, int w) {
721 assert(o < max_osd);
722 if (!osd_primary_affinity)
723 osd_primary_affinity.reset(
724 new mempool::osdmap::vector<__u32>(
725 max_osd, CEPH_OSD_DEFAULT_PRIMARY_AFFINITY));
726 (*osd_primary_affinity)[o] = w;
727 }
728 unsigned get_primary_affinity(int o) const {
729 assert(o < max_osd);
730 if (!osd_primary_affinity)
731 return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
732 return (*osd_primary_affinity)[o];
733 }
734 float get_primary_affinityf(int o) const {
735 return (float)get_primary_affinity(o) / (float)CEPH_OSD_MAX_PRIMARY_AFFINITY;
736 }
737
738 bool has_erasure_code_profile(const string &name) const {
739 auto i = erasure_code_profiles.find(name);
740 return i != erasure_code_profiles.end();
741 }
742 int get_erasure_code_profile_default(CephContext *cct,
743 map<string,string> &profile_map,
744 ostream *ss);
745 void set_erasure_code_profile(const string &name,
746 const map<string,string>& profile) {
747 erasure_code_profiles[name] = profile;
748 }
749 const map<string,string> &get_erasure_code_profile(
750 const string &name) const {
751 static map<string,string> empty;
752 auto i = erasure_code_profiles.find(name);
753 if (i == erasure_code_profiles.end())
754 return empty;
755 else
756 return i->second;
757 }
758 const mempool::osdmap::map<string,map<string,string> > &get_erasure_code_profiles() const {
759 return erasure_code_profiles;
760 }
761
762 bool exists(int osd) const {
763 //assert(osd >= 0);
764 return osd >= 0 && osd < max_osd && (osd_state[osd] & CEPH_OSD_EXISTS);
765 }
766
31f18b77
FG
767 bool is_destroyed(int osd) const {
768 return exists(osd) && (osd_state[osd] & CEPH_OSD_DESTROYED);
769 }
770
7c673cae
FG
771 bool is_up(int osd) const {
772 return exists(osd) && (osd_state[osd] & CEPH_OSD_UP);
773 }
774
775 bool has_been_up_since(int osd, epoch_t epoch) const {
776 return is_up(osd) && get_up_from(osd) <= epoch;
777 }
778
779 bool is_down(int osd) const {
780 return !is_up(osd);
781 }
782
783 bool is_out(int osd) const {
784 return !exists(osd) || get_weight(osd) == CEPH_OSD_OUT;
785 }
786
787 bool is_in(int osd) const {
788 return !is_out(osd);
789 }
790
31f18b77
FG
791 bool is_noup(int osd) const {
792 return exists(osd) && (osd_state[osd] & CEPH_OSD_NOUP);
793 }
794
795 bool is_nodown(int osd) const {
796 return exists(osd) && (osd_state[osd] & CEPH_OSD_NODOWN);
797 }
798
799 bool is_noin(int osd) const {
800 return exists(osd) && (osd_state[osd] & CEPH_OSD_NOIN);
801 }
802
803 bool is_noout(int osd) const {
804 return exists(osd) && (osd_state[osd] & CEPH_OSD_NOOUT);
805 }
806
807 void get_noup_osds(vector<int> *osds) const {
808 assert(osds);
809 osds->clear();
810
811 for (int i = 0; i < max_osd; i++) {
812 if (is_noup(i)) {
813 osds->push_back(i);
814 }
815 }
816 }
817
818 void get_nodown_osds(vector<int> *osds) const {
819 assert(osds);
820 osds->clear();
821
822 for (int i = 0; i < max_osd; i++) {
823 if (is_nodown(i)) {
824 osds->push_back(i);
825 }
826 }
827 }
828
829 void get_noin_osds(vector<int> *osds) const {
830 assert(osds);
831 osds->clear();
832
833 for (int i = 0; i < max_osd; i++) {
834 if (is_noin(i)) {
835 osds->push_back(i);
836 }
837 }
838 }
839
840 void get_noout_osds(vector<int> *osds) const {
841 assert(osds);
842 osds->clear();
843
844 for (int i = 0; i < max_osd; i++) {
845 if (is_noout(i)) {
846 osds->push_back(i);
847 }
848 }
849 }
850
7c673cae
FG
851 /**
852 * check if an entire crush subtree is down
853 */
854 bool subtree_is_down(int id, set<int> *down_cache) const;
855 bool containing_subtree_is_down(CephContext *cct, int osd, int subtree_type, set<int> *down_cache) const;
856
31f18b77
FG
857 bool subtree_type_is_down(CephContext *cct, int id, int subtree_type, set<int> *down_in_osds, set<int> *up_in_osds,
858 set<int> *subtree_up, unordered_map<int, set<int> > *subtree_type_down) const;
859
7c673cae
FG
860 int identify_osd(const entity_addr_t& addr) const;
861 int identify_osd(const uuid_d& u) const;
862 int identify_osd_on_all_channels(const entity_addr_t& addr) const;
863
864 bool have_addr(const entity_addr_t& addr) const {
865 return identify_osd(addr) >= 0;
866 }
867 int find_osd_on_ip(const entity_addr_t& ip) const;
868 const entity_addr_t &get_addr(int osd) const {
869 assert(exists(osd));
870 return osd_addrs->client_addr[osd] ? *osd_addrs->client_addr[osd] : osd_addrs->blank;
871 }
872 const entity_addr_t &get_cluster_addr(int osd) const {
873 assert(exists(osd));
874 if (!osd_addrs->cluster_addr[osd] || *osd_addrs->cluster_addr[osd] == entity_addr_t())
875 return get_addr(osd);
876 return *osd_addrs->cluster_addr[osd];
877 }
878 const entity_addr_t &get_hb_back_addr(int osd) const {
879 assert(exists(osd));
880 return osd_addrs->hb_back_addr[osd] ? *osd_addrs->hb_back_addr[osd] : osd_addrs->blank;
881 }
882 const entity_addr_t &get_hb_front_addr(int osd) const {
883 assert(exists(osd));
884 return osd_addrs->hb_front_addr[osd] ? *osd_addrs->hb_front_addr[osd] : osd_addrs->blank;
885 }
886 entity_inst_t get_most_recent_inst(int osd) const {
887 assert(exists(osd));
888 return entity_inst_t(entity_name_t::OSD(osd), get_addr(osd));
889 }
890 entity_inst_t get_inst(int osd) const {
891 assert(is_up(osd));
892 return get_most_recent_inst(osd);
893 }
894 entity_inst_t get_cluster_inst(int osd) const {
895 assert(is_up(osd));
896 return entity_inst_t(entity_name_t::OSD(osd), get_cluster_addr(osd));
897 }
898 entity_inst_t get_hb_back_inst(int osd) const {
899 assert(is_up(osd));
900 return entity_inst_t(entity_name_t::OSD(osd), get_hb_back_addr(osd));
901 }
902 entity_inst_t get_hb_front_inst(int osd) const {
903 assert(is_up(osd));
904 return entity_inst_t(entity_name_t::OSD(osd), get_hb_front_addr(osd));
905 }
906
907 const uuid_d& get_uuid(int osd) const {
908 assert(exists(osd));
909 return (*osd_uuid)[osd];
910 }
911
912 const epoch_t& get_up_from(int osd) const {
913 assert(exists(osd));
914 return osd_info[osd].up_from;
915 }
916 const epoch_t& get_up_thru(int osd) const {
917 assert(exists(osd));
918 return osd_info[osd].up_thru;
919 }
920 const epoch_t& get_down_at(int osd) const {
921 assert(exists(osd));
922 return osd_info[osd].down_at;
923 }
924 const osd_info_t& get_info(int osd) const {
925 assert(osd < max_osd);
926 return osd_info[osd];
927 }
928
929 const osd_xinfo_t& get_xinfo(int osd) const {
930 assert(osd < max_osd);
931 return osd_xinfo[osd];
932 }
933
934 int get_next_up_osd_after(int n) const {
935 if (get_max_osd() == 0)
936 return -1;
937 for (int i = n + 1; i != n; ++i) {
938 if (i >= get_max_osd())
939 i = 0;
940 if (i == n)
941 break;
942 if (is_up(i))
943 return i;
944 }
945 return -1;
946 }
947
948 int get_previous_up_osd_before(int n) const {
949 if (get_max_osd() == 0)
950 return -1;
951 for (int i = n - 1; i != n; --i) {
952 if (i < 0)
953 i = get_max_osd() - 1;
954 if (i == n)
955 break;
956 if (is_up(i))
957 return i;
958 }
959 return -1;
960 }
961
962 /**
963 * get feature bits required by the current structure
964 *
965 * @param entity_type [in] what entity type we are asking about
966 * @param mask [out] set of all possible map-related features we could set
967 * @return feature bits used by this map
968 */
969 uint64_t get_features(int entity_type, uint64_t *mask) const;
970
971 /**
972 * get oldest *client* version (firefly, hammer, etc.) that can connect given
973 * the feature bits required (according to get_features()).
974 */
31f18b77 975 uint8_t get_min_compat_client() const;
7c673cae
FG
976
977 /**
978 * get intersection of features supported by up osds
979 */
980 uint64_t get_up_osd_features() const;
981
982 int apply_incremental(const Incremental &inc);
983
984 /// try to re-use/reference addrs in oldmap from newmap
985 static void dedup(const OSDMap *oldmap, OSDMap *newmap);
986
987 static void clean_temps(CephContext *cct, const OSDMap& osdmap,
988 Incremental *pending_inc);
989
990 // serialize, unserialize
991private:
992 void encode_client_old(bufferlist& bl) const;
993 void encode_classic(bufferlist& bl, uint64_t features) const;
994 void decode_classic(bufferlist::iterator& p);
995 void post_decode();
996public:
997 void encode(bufferlist& bl, uint64_t features=CEPH_FEATURES_ALL) const;
998 void decode(bufferlist& bl);
999 void decode(bufferlist::iterator& bl);
1000
1001
1002 /**** mapping facilities ****/
1003 int map_to_pg(
1004 int64_t pool,
1005 const string& name,
1006 const string& key,
1007 const string& nspace,
1008 pg_t *pg) const;
1009 int object_locator_to_pg(const object_t& oid, const object_locator_t& loc,
1010 pg_t &pg) const;
1011 pg_t object_locator_to_pg(const object_t& oid,
1012 const object_locator_t& loc) const {
1013 pg_t pg;
1014 int ret = object_locator_to_pg(oid, loc, pg);
1015 assert(ret == 0);
1016 return pg;
1017 }
1018
1019
1020 static object_locator_t file_to_object_locator(const file_layout_t& layout) {
1021 return object_locator_t(layout.pool_id, layout.pool_ns);
1022 }
1023
1024 ceph_object_layout file_to_object_layout(object_t oid,
1025 file_layout_t& layout) const {
1026 return make_object_layout(oid, layout.pool_id, layout.pool_ns);
1027 }
1028
1029 ceph_object_layout make_object_layout(object_t oid, int pg_pool,
1030 string nspace) const;
1031
1032 int get_pg_num(int pg_pool) const
1033 {
1034 const pg_pool_t *pool = get_pg_pool(pg_pool);
1035 assert(NULL != pool);
1036 return pool->get_pg_num();
1037 }
1038
1039 bool pg_exists(pg_t pgid) const {
1040 const pg_pool_t *p = get_pg_pool(pgid.pool());
1041 return p && pgid.ps() < p->get_pg_num();
1042 }
1043
224ce89b
WB
1044 int get_pg_pool_min_size(pg_t pgid) const {
1045 if (!pg_exists(pgid)) {
1046 return -ENOENT;
1047 }
1048 const pg_pool_t *p = get_pg_pool(pgid.pool());
1049 assert(p);
1050 return p->get_min_size();
1051 }
1052
1053 int get_pg_pool_size(pg_t pgid) const {
1054 if (!pg_exists(pgid)) {
1055 return -ENOENT;
1056 }
1057 const pg_pool_t *p = get_pg_pool(pgid.pool());
1058 assert(p);
1059 return p->get_size();
1060 }
1061
7c673cae
FG
1062private:
1063 /// pg -> (raw osd list)
31f18b77 1064 void _pg_to_raw_osds(
7c673cae
FG
1065 const pg_pool_t& pool, pg_t pg,
1066 vector<int> *osds,
1067 ps_t *ppps) const;
1068 int _pick_primary(const vector<int>& osds) const;
1069 void _remove_nonexistent_osds(const pg_pool_t& pool, vector<int>& osds) const;
1070
1071 void _apply_primary_affinity(ps_t seed, const pg_pool_t& pool,
1072 vector<int> *osds, int *primary) const;
1073
1074 /// apply pg_upmap[_items] mappings
224ce89b 1075 void _apply_upmap(const pg_pool_t& pi, pg_t pg, vector<int> *raw) const;
7c673cae
FG
1076
1077 /// pg -> (up osd list)
1078 void _raw_to_up_osds(const pg_pool_t& pool, const vector<int>& raw,
1079 vector<int> *up) const;
1080
1081
1082 /**
1083 * Get the pg and primary temp, if they are specified.
1084 * @param temp_pg [out] Will be empty or contain the temp PG mapping on return
1085 * @param temp_primary [out] Will be the value in primary_temp, or a value derived
1086 * from the pg_temp (if specified), or -1 if you should use the calculated (up_)primary.
1087 */
1088 void _get_temp_osds(const pg_pool_t& pool, pg_t pg,
1089 vector<int> *temp_pg, int *temp_primary) const;
1090
1091 /**
1092 * map to up and acting. Fills in whatever fields are non-NULL.
1093 */
1094 void _pg_to_up_acting_osds(const pg_t& pg, vector<int> *up, int *up_primary,
1095 vector<int> *acting, int *acting_primary,
1096 bool raw_pg_to_pg = true) const;
1097
1098public:
1099 /***
1100 * This is suitable only for looking at raw CRUSH outputs. It skips
1101 * applying the temp and up checks and should not be used
1102 * by anybody for data mapping purposes.
1103 * raw and primary must be non-NULL
1104 */
31f18b77 1105 void pg_to_raw_osds(pg_t pg, vector<int> *raw, int *primary) const;
7c673cae 1106 /// map a pg to its acting set. @return acting set size
31f18b77 1107 void pg_to_acting_osds(const pg_t& pg, vector<int> *acting,
7c673cae
FG
1108 int *acting_primary) const {
1109 _pg_to_up_acting_osds(pg, NULL, NULL, acting, acting_primary);
7c673cae 1110 }
31f18b77 1111 void pg_to_acting_osds(pg_t pg, vector<int>& acting) const {
7c673cae
FG
1112 return pg_to_acting_osds(pg, &acting, NULL);
1113 }
1114 /**
1115 * This does not apply temp overrides and should not be used
1116 * by anybody for data mapping purposes. Specify both pointers.
1117 */
1118 void pg_to_raw_up(pg_t pg, vector<int> *up, int *primary) const;
1119 /**
1120 * map a pg to its acting set as well as its up set. You must use
1121 * the acting set for data mapping purposes, but some users will
1122 * also find the up set useful for things like deciding what to
1123 * set as pg_temp.
1124 * Each of these pointers must be non-NULL.
1125 */
1126 void pg_to_up_acting_osds(pg_t pg, vector<int> *up, int *up_primary,
1127 vector<int> *acting, int *acting_primary) const {
1128 _pg_to_up_acting_osds(pg, up, up_primary, acting, acting_primary);
1129 }
1130 void pg_to_up_acting_osds(pg_t pg, vector<int>& up, vector<int>& acting) const {
1131 int up_primary, acting_primary;
1132 pg_to_up_acting_osds(pg, &up, &up_primary, &acting, &acting_primary);
1133 }
1134 bool pg_is_ec(pg_t pg) const {
1135 auto i = pools.find(pg.pool());
1136 assert(i != pools.end());
1137 return i->second.ec_pool();
1138 }
1139 bool get_primary_shard(const pg_t& pgid, spg_t *out) const {
1140 auto i = get_pools().find(pgid.pool());
1141 if (i == get_pools().end()) {
1142 return false;
1143 }
1144 if (!i->second.ec_pool()) {
1145 *out = spg_t(pgid);
1146 return true;
1147 }
1148 int primary;
1149 vector<int> acting;
1150 pg_to_acting_osds(pgid, &acting, &primary);
1151 for (uint8_t i = 0; i < acting.size(); ++i) {
1152 if (acting[i] == primary) {
1153 *out = spg_t(pgid, shard_id_t(i));
1154 return true;
1155 }
1156 }
1157 return false;
1158 }
1159
1160 int64_t lookup_pg_pool_name(const string& name) const {
1161 auto p = name_pool.find(name);
1162 if (p == name_pool.end())
1163 return -ENOENT;
1164 return p->second;
1165 }
1166
1167 int64_t get_pool_max() const {
1168 return pool_max;
1169 }
1170 const mempool::osdmap::map<int64_t,pg_pool_t>& get_pools() const {
1171 return pools;
1172 }
1173 mempool::osdmap::map<int64_t,pg_pool_t>& get_pools() {
1174 return pools;
1175 }
3efd9988
FG
1176 void get_pool_ids_by_rule(int rule_id, set<int64_t> *pool_ids) const {
1177 assert(pool_ids);
1178 for (auto &p: pools) {
1179 if ((int)p.second.get_crush_rule() == rule_id) {
1180 pool_ids->insert(p.first);
1181 }
1182 }
1183 }
1184 void get_pool_ids_by_osd(CephContext *cct,
1185 int osd,
1186 set<int64_t> *pool_ids) const;
7c673cae
FG
1187 const string& get_pool_name(int64_t p) const {
1188 auto i = pool_name.find(p);
1189 assert(i != pool_name.end());
1190 return i->second;
1191 }
c07f9fc5
FG
1192 const mempool::osdmap::map<int64_t,string>& get_pool_names() const {
1193 return pool_name;
1194 }
7c673cae
FG
1195 bool have_pg_pool(int64_t p) const {
1196 return pools.count(p);
1197 }
1198 const pg_pool_t* get_pg_pool(int64_t p) const {
1199 auto i = pools.find(p);
1200 if (i != pools.end())
1201 return &i->second;
1202 return NULL;
1203 }
1204 unsigned get_pg_size(pg_t pg) const {
1205 auto p = pools.find(pg.pool());
1206 assert(p != pools.end());
1207 return p->second.get_size();
1208 }
1209 int get_pg_type(pg_t pg) const {
1210 auto p = pools.find(pg.pool());
1211 assert(p != pools.end());
1212 return p->second.get_type();
1213 }
1214
1215
1216 pg_t raw_pg_to_pg(pg_t pg) const {
1217 auto p = pools.find(pg.pool());
1218 assert(p != pools.end());
1219 return p->second.raw_pg_to_pg(pg);
1220 }
1221
1222 // pg -> acting primary osd
1223 int get_pg_acting_primary(pg_t pg) const {
1224 int primary = -1;
1225 _pg_to_up_acting_osds(pg, nullptr, nullptr, nullptr, &primary);
1226 return primary;
1227 }
1228
1229 /*
1230 * check whether an spg_t maps to a particular osd
1231 */
1232 bool is_up_acting_osd_shard(spg_t pg, int osd) const {
1233 vector<int> up, acting;
1234 _pg_to_up_acting_osds(pg.pgid, &up, NULL, &acting, NULL, false);
1235 if (pg.shard == shard_id_t::NO_SHARD) {
1236 if (calc_pg_role(osd, acting, acting.size()) >= 0 ||
1237 calc_pg_role(osd, up, up.size()) >= 0)
1238 return true;
1239 } else {
1240 if (pg.shard < (int)acting.size() && acting[pg.shard] == osd)
1241 return true;
1242 if (pg.shard < (int)up.size() && up[pg.shard] == osd)
1243 return true;
1244 }
1245 return false;
1246 }
1247
1248
1249 /* what replica # is a given osd? 0 primary, -1 for none. */
1250 static int calc_pg_rank(int osd, const vector<int>& acting, int nrep=0);
1251 static int calc_pg_role(int osd, const vector<int>& acting, int nrep=0);
1252 static bool primary_changed(
1253 int oldprimary,
1254 const vector<int> &oldacting,
1255 int newprimary,
1256 const vector<int> &newacting);
1257
1258 /* rank is -1 (stray), 0 (primary), 1,2,3,... (replica) */
1259 int get_pg_acting_rank(pg_t pg, int osd) const {
1260 vector<int> group;
31f18b77
FG
1261 pg_to_acting_osds(pg, group);
1262 return calc_pg_rank(osd, group, group.size());
7c673cae
FG
1263 }
1264 /* role is -1 (stray), 0 (primary), 1 (replica) */
1265 int get_pg_acting_role(const pg_t& pg, int osd) const {
1266 vector<int> group;
31f18b77
FG
1267 pg_to_acting_osds(pg, group);
1268 return calc_pg_role(osd, group, group.size());
7c673cae
FG
1269 }
1270
1271 bool osd_is_valid_op_target(pg_t pg, int osd) const {
1272 int primary;
1273 vector<int> group;
31f18b77 1274 pg_to_acting_osds(pg, &group, &primary);
7c673cae
FG
1275 if (osd == primary)
1276 return true;
1277 if (pg_is_ec(pg))
1278 return false;
1279
31f18b77 1280 return calc_pg_role(osd, group, group.size()) >= 0;
7c673cae
FG
1281 }
1282
1283 int clean_pg_upmaps(
1284 CephContext *cct,
1285 Incremental *pending_inc);
1286
1287 bool try_pg_upmap(
1288 CephContext *cct,
1289 pg_t pg, ///< pg to potentially remap
1290 const set<int>& overfull, ///< osds we'd want to evacuate
1291 const vector<int>& underfull, ///< osds to move to, in order of preference
1292 vector<int> *orig,
1293 vector<int> *out); ///< resulting alternative mapping
1294
1295 int calc_pg_upmaps(
1296 CephContext *cct,
1297 float max_deviation, ///< max deviation from target (value < 1.0)
1298 int max_iterations, ///< max iterations to run
1299 const set<int64_t>& pools, ///< [optional] restrict to pool
1300 Incremental *pending_inc
1301 );
1302
31f18b77
FG
1303 int get_osds_by_bucket_name(const string &name, set<int> *osds) const;
1304
7c673cae
FG
1305 /*
1306 * handy helpers to build simple maps...
1307 */
1308 /**
1309 * Build an OSD map suitable for basic usage. If **num_osd** is >= 0
1310 * it will be initialized with the specified number of OSDs in a
1311 * single host. If **num_osd** is < 0 the layout of the OSD map will
1312 * be built by reading the content of the configuration file.
1313 *
1314 * @param cct [in] in core ceph context
1315 * @param e [in] initial epoch
1316 * @param fsid [in] id of the cluster
1317 * @param num_osd [in] number of OSDs if >= 0 or read from conf if < 0
1318 * @return **0** on success, negative errno on error.
1319 */
224ce89b
WB
1320private:
1321 int build_simple_optioned(CephContext *cct, epoch_t e, uuid_d &fsid,
1322 int num_osd, int pg_bits, int pgp_bits,
1323 bool default_pool);
1324public:
7c673cae 1325 int build_simple(CephContext *cct, epoch_t e, uuid_d &fsid,
224ce89b
WB
1326 int num_osd) {
1327 return build_simple_optioned(cct, e, fsid, num_osd, 0, 0, false);
1328 }
1329 int build_simple_with_pool(CephContext *cct, epoch_t e, uuid_d &fsid,
1330 int num_osd, int pg_bits, int pgp_bits) {
1331 return build_simple_optioned(cct, e, fsid, num_osd,
1332 pg_bits, pgp_bits, true);
1333 }
7c673cae
FG
1334 static int _build_crush_types(CrushWrapper& crush);
1335 static int build_simple_crush_map(CephContext *cct, CrushWrapper& crush,
1336 int num_osd, ostream *ss);
1337 static int build_simple_crush_map_from_conf(CephContext *cct,
1338 CrushWrapper& crush,
1339 ostream *ss);
31f18b77
FG
1340 static int build_simple_crush_rules(
1341 CephContext *cct, CrushWrapper& crush,
1342 const string& root,
1343 ostream *ss);
7c673cae 1344
3efd9988
FG
1345 bool crush_rule_in_use(int rule_id) const;
1346
1347 int validate_crush_rules(CrushWrapper *crush, ostream *ss) const;
7c673cae
FG
1348
1349 void clear_temp() {
1350 pg_temp->clear();
1351 primary_temp->clear();
1352 }
1353
1354private:
1355 void print_osd_line(int cur, ostream *out, Formatter *f) const;
1356public:
1357 void print(ostream& out) const;
1358 void print_pools(ostream& out) const;
224ce89b 1359 void print_summary(Formatter *f, ostream& out, const string& prefix) const;
7c673cae 1360 void print_oneline_summary(ostream& out) const;
31f18b77
FG
1361
1362 enum {
c07f9fc5
FG
1363 DUMP_IN = 1, // only 'in' osds
1364 DUMP_OUT = 2, // only 'out' osds
1365 DUMP_UP = 4, // only 'up' osds
1366 DUMP_DOWN = 8, // only 'down' osds
1367 DUMP_DESTROYED = 16, // only 'destroyed' osds
31f18b77
FG
1368 };
1369 void print_tree(Formatter *f, ostream *out, unsigned dump_flags=0) const;
7c673cae
FG
1370
1371 int summarize_mapping_stats(
1372 OSDMap *newmap,
1373 const set<int64_t> *pools,
1374 std::string *out,
1375 Formatter *f) const;
1376
1377 string get_flag_string() const;
1378 static string get_flag_string(unsigned flags);
1379 static void dump_erasure_code_profiles(
1380 const mempool::osdmap::map<string,map<string,string> > &profiles,
1381 Formatter *f);
1382 void dump(Formatter *f) const;
1383 static void generate_test_instances(list<OSDMap*>& o);
1384 bool check_new_blacklist_entries() const { return new_blacklist_entries; }
224ce89b
WB
1385
1386 void check_health(health_check_map_t *checks) const;
35e4c445
FG
1387
1388 int parse_osd_id_list(const vector<string>& ls,
1389 set<int> *out,
1390 ostream *ss) const;
7c673cae
FG
1391};
1392WRITE_CLASS_ENCODER_FEATURES(OSDMap)
1393WRITE_CLASS_ENCODER_FEATURES(OSDMap::Incremental)
1394
1395typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
1396
1397inline ostream& operator<<(ostream& out, const OSDMap& m) {
1398 m.print_oneline_summary(out);
1399 return out;
1400}
1401
31f18b77
FG
1402class PGStatService;
1403
1404void print_osd_utilization(const OSDMap& osdmap,
1405 const PGStatService *pgstat,
1406 ostream& out,
1407 Formatter *f,
1408 bool tree);
7c673cae
FG
1409
1410#endif