]> git.proxmox.com Git - ceph.git/blame - ceph/src/osd/OSDMap.h
update sources to 12.2.8
[ceph.git] / ceph / src / osd / OSDMap.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License version 2.1, as published by the Free Software
14 * Foundation. See file COPYING.
15 *
16 */
17
18
19#ifndef CEPH_OSDMAP_H
20#define CEPH_OSDMAP_H
21
22/*
23 * describe properties of the OSD cluster.
24 * disks, disk groups, total # osds,
25 *
26 */
27#include "include/types.h"
28#include "osd_types.h"
29
30//#include "include/ceph_features.h"
31#include "crush/CrushWrapper.h"
32#include <vector>
33#include <list>
34#include <set>
35#include <map>
36#include "include/memory.h"
94b18763 37#include "include/btree_map.h"
7c673cae
FG
38using namespace std;
39
40// forward declaration
41class CephContext;
42class CrushWrapper;
224ce89b 43class health_check_map_t;
7c673cae
FG
44
45// FIXME C++11 does not have std::equal for two differently-typed containers.
46// use this until we move to c++14
47template<typename A, typename B>
48bool vectors_equal(A a, B b)
49{
50 return
51 a.size() == b.size() &&
52 (a.empty() ||
53 memcmp((char*)&a[0], (char*)&b[0], sizeof(a[0]) * a.size()) == 0);
54}
55
56
57/*
58 * we track up to two intervals during which the osd was alive and
59 * healthy. the most recent is [up_from,up_thru), where up_thru is
60 * the last epoch the osd is known to have _started_. i.e., a lower
61 * bound on the actual osd death. down_at (if it is > up_from) is an
62 * upper bound on the actual osd death.
63 *
64 * the second is the last_clean interval [first,last]. in that case,
65 * the last interval is the last epoch known to have been either
66 * _finished_, or during which the osd cleanly shut down. when
67 * possible, we push this forward to the epoch the osd was eventually
68 * marked down.
69 *
70 * the lost_at is used to allow build_prior to proceed without waiting
71 * for an osd to recover. In certain cases, progress may be blocked
72 * because an osd is down that may contain updates (i.e., a pg may have
73 * gone rw during an interval). If the osd can't be brought online, we
74 * can force things to proceed knowing that we _might_ be losing some
75 * acked writes. If the osd comes back to life later, that's fine to,
76 * but those writes will still be lost (the divergent objects will be
77 * thrown out).
78 */
79struct osd_info_t {
80 epoch_t last_clean_begin; // last interval that ended with a clean osd shutdown
81 epoch_t last_clean_end;
82 epoch_t up_from; // epoch osd marked up
83 epoch_t up_thru; // lower bound on actual osd death (if > up_from)
84 epoch_t down_at; // upper bound on actual osd death (if > up_from)
85 epoch_t lost_at; // last epoch we decided data was "lost"
86
87 osd_info_t() : last_clean_begin(0), last_clean_end(0),
88 up_from(0), up_thru(0), down_at(0), lost_at(0) {}
89
90 void dump(Formatter *f) const;
91 void encode(bufferlist& bl) const;
92 void decode(bufferlist::iterator& bl);
93 static void generate_test_instances(list<osd_info_t*>& o);
94};
95WRITE_CLASS_ENCODER(osd_info_t)
96
97ostream& operator<<(ostream& out, const osd_info_t& info);
98
99struct osd_xinfo_t {
100 utime_t down_stamp; ///< timestamp when we were last marked down
101 float laggy_probability; ///< encoded as __u32: 0 = definitely not laggy, 0xffffffff definitely laggy
102 __u32 laggy_interval; ///< average interval between being marked laggy and recovering
103 uint64_t features; ///< features supported by this osd we should know about
104 __u32 old_weight; ///< weight prior to being auto marked out
105
106 osd_xinfo_t() : laggy_probability(0), laggy_interval(0),
107 features(0), old_weight(0) {}
108
109 void dump(Formatter *f) const;
110 void encode(bufferlist& bl) const;
111 void decode(bufferlist::iterator& bl);
112 static void generate_test_instances(list<osd_xinfo_t*>& o);
113};
114WRITE_CLASS_ENCODER(osd_xinfo_t)
115
116ostream& operator<<(ostream& out, const osd_xinfo_t& xi);
117
118
31f18b77
FG
119struct PGTempMap {
120#if 1
121 bufferlist data;
122 typedef btree::btree_map<pg_t,int32_t*> map_t;
123 map_t map;
124
125 void encode(bufferlist& bl) const {
126 uint32_t n = map.size();
127 ::encode(n, bl);
128 for (auto &p : map) {
129 ::encode(p.first, bl);
130 bl.append((char*)p.second, (*p.second + 1) * sizeof(int32_t));
131 }
132 }
133 void decode(bufferlist::iterator& p) {
134 data.clear();
135 map.clear();
136 uint32_t n;
137 ::decode(n, p);
138 if (!n)
139 return;
140 bufferlist::iterator pstart = p;
141 size_t start_off = pstart.get_off();
142 vector<pair<pg_t,size_t>> offsets;
143 offsets.resize(n);
144 for (unsigned i=0; i<n; ++i) {
145 pg_t pgid;
146 ::decode(pgid, p);
147 offsets[i].first = pgid;
148 offsets[i].second = p.get_off() - start_off;
149 uint32_t vn;
150 ::decode(vn, p);
151 p.advance(vn * sizeof(int32_t));
152 }
153 size_t len = p.get_off() - start_off;
154 pstart.copy(len, data);
155 if (data.get_num_buffers() > 1) {
156 data.rebuild();
157 }
158 //map.reserve(n);
159 char *start = data.c_str();
160 for (auto i : offsets) {
161 map.insert(map.end(), make_pair(i.first, (int32_t*)(start + i.second)));
162 }
163 }
164 void rebuild() {
165 bufferlist bl;
166 encode(bl);
167 auto p = bl.begin();
168 decode(p);
169 }
170 friend bool operator==(const PGTempMap& l, const PGTempMap& r) {
171 return
172 l.map.size() == r.map.size() &&
173 l.data.contents_equal(r.data);
174 }
175
176 class iterator {
177 map_t::const_iterator it;
178 map_t::const_iterator end;
179 pair<pg_t,vector<int32_t>> current;
180 void init_current() {
181 if (it != end) {
182 current.first = it->first;
183 assert(it->second);
184 current.second.resize(*it->second);
185 int32_t *p = it->second + 1;
186 for (int n = 0; n < *it->second; ++n, ++p) {
187 current.second[n] = *p;
188 }
189 }
190 }
191 public:
192 iterator(map_t::const_iterator p,
193 map_t::const_iterator e)
194 : it(p), end(e) {
195 init_current();
196 }
197
198 const pair<pg_t,vector<int32_t>>& operator*() const {
199 return current;
200 }
201 const pair<pg_t,vector<int32_t>>* operator->() const {
202 return &current;
203 }
204 friend bool operator==(const iterator& l, const iterator& r) {
205 return l.it == r.it;
206 }
207 friend bool operator!=(const iterator& l, const iterator& r) {
208 return l.it != r.it;
209 }
210 iterator& operator++() {
211 ++it;
212 if (it != end)
213 init_current();
214 return *this;
215 }
216 iterator operator++(int) {
217 iterator r = *this;
218 ++it;
219 if (it != end)
220 init_current();
221 return r;
222 }
223 };
224 iterator begin() const {
225 return iterator(map.begin(), map.end());
226 }
227 iterator end() const {
228 return iterator(map.end(), map.end());
229 }
230 iterator find(pg_t pgid) const {
231 return iterator(map.find(pgid), map.end());
232 }
233 size_t size() const {
234 return map.size();
235 }
236 size_t count(pg_t pgid) const {
237 return map.count(pgid);
238 }
239 void erase(pg_t pgid) {
240 map.erase(pgid);
241 }
242 void clear() {
243 map.clear();
244 data.clear();
245 }
246 void set(pg_t pgid, const mempool::osdmap::vector<int32_t>& v) {
247 size_t need = sizeof(int32_t) * (1 + v.size());
248 if (need < data.get_append_buffer_unused_tail_length()) {
249 bufferptr z(data.get_append_buffer_unused_tail_length());
250 z.zero();
251 data.append(z.c_str(), z.length());
252 }
253 ::encode(v, data);
254 map[pgid] = (int32_t*)(data.back().end_c_str()) - (1 + v.size());
255 }
256 mempool::osdmap::vector<int32_t> get(pg_t pgid) {
257 mempool::osdmap::vector<int32_t> v;
258 int32_t *p = map[pgid];
259 size_t n = *p++;
260 v.resize(n);
261 for (size_t i = 0; i < n; ++i, ++p) {
262 v[i] = *p;
263 }
264 return v;
265 }
266#else
267 // trivial implementation
268 mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t> > pg_temp;
269
270 void encode(bufferlist& bl) const {
271 ::encode(pg_temp, bl);
272 }
273 void decode(bufferlist::iterator& p) {
274 ::decode(pg_temp, p);
275 }
276 friend bool operator==(const PGTempMap& l, const PGTempMap& r) {
277 return
278 l.pg_temp.size() == r.pg_temp.size() &&
279 l.pg_temp == r.pg_temp;
280 }
281
282 class iterator {
283 mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t> >::const_iterator it;
284 public:
285 iterator(mempool::osdmap::map<pg_t,
286 mempool::osdmap::vector<int32_t> >::const_iterator p)
287 : it(p) {}
288
289 pair<pg_t,const mempool::osdmap::vector<int32_t>&> operator*() const {
290 return *it;
291 }
292 const pair<const pg_t,mempool::osdmap::vector<int32_t>>* operator->() const {
293 return &*it;
294 }
295 friend bool operator==(const iterator& l, const iterator& r) {
296 return l.it == r.it;
297 }
298 friend bool operator!=(const iterator& l, const iterator& r) {
299 return l.it != r.it;
300 }
301 iterator& operator++() {
302 ++it;
303 return *this;
304 }
305 iterator operator++(int) {
306 iterator r = *this;
307 ++it;
308 return r;
309 }
310 };
311 iterator begin() const {
312 return iterator(pg_temp.cbegin());
313 }
314 iterator end() const {
315 return iterator(pg_temp.cend());
316 }
317 iterator find(pg_t pgid) const {
318 return iterator(pg_temp.find(pgid));
319 }
320 size_t size() const {
321 return pg_temp.size();
322 }
323 size_t count(pg_t pgid) const {
324 return pg_temp.count(pgid);
325 }
326 void erase(pg_t pgid) {
327 pg_temp.erase(pgid);
328 }
329 void clear() {
330 pg_temp.clear();
331 }
332 void set(pg_t pgid, const mempool::osdmap::vector<int32_t>& v) {
333 pg_temp[pgid] = v;
334 }
335 const mempool::osdmap::vector<int32_t>& get(pg_t pgid) {
336 return pg_temp.at(pgid);
337 }
338#endif
339 void dump(Formatter *f) const {
340 for (const auto &pg : *this) {
341 f->open_object_section("osds");
342 f->dump_stream("pgid") << pg.first;
343 f->open_array_section("osds");
344 for (const auto osd : pg.second)
345 f->dump_int("osd", osd);
346 f->close_section();
347 f->close_section();
348 }
349 }
350};
351WRITE_CLASS_ENCODER(PGTempMap)
352
7c673cae
FG
353/** OSDMap
354 */
355class OSDMap {
356public:
357 MEMPOOL_CLASS_HELPERS();
358
359 class Incremental {
360 public:
361 MEMPOOL_CLASS_HELPERS();
362
363 /// feature bits we were encoded with. the subsequent OSDMap
364 /// encoding should match.
365 uint64_t encode_features;
366 uuid_d fsid;
367 epoch_t epoch; // new epoch; we are a diff from epoch-1 to epoch
368 utime_t modified;
369 int64_t new_pool_max; //incremented by the OSDMonitor on each pool create
370 int32_t new_flags;
31f18b77 371 int8_t new_require_osd_release = -1;
7c673cae
FG
372
373 // full (rare)
374 bufferlist fullmap; // in lieu of below.
375 bufferlist crush;
376
377 // incremental
378 int32_t new_max_osd;
379 mempool::osdmap::map<int64_t,pg_pool_t> new_pools;
380 mempool::osdmap::map<int64_t,string> new_pool_names;
381 mempool::osdmap::set<int64_t> old_pools;
382 mempool::osdmap::map<string,map<string,string> > new_erasure_code_profiles;
383 mempool::osdmap::vector<string> old_erasure_code_profiles;
384 mempool::osdmap::map<int32_t,entity_addr_t> new_up_client;
385 mempool::osdmap::map<int32_t,entity_addr_t> new_up_cluster;
31f18b77 386 mempool::osdmap::map<int32_t,uint32_t> new_state; // XORed onto previous state.
7c673cae
FG
387 mempool::osdmap::map<int32_t,uint32_t> new_weight;
388 mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t> > new_pg_temp; // [] to remove
389 mempool::osdmap::map<pg_t, int32_t> new_primary_temp; // [-1] to remove
390 mempool::osdmap::map<int32_t,uint32_t> new_primary_affinity;
391 mempool::osdmap::map<int32_t,epoch_t> new_up_thru;
392 mempool::osdmap::map<int32_t,pair<epoch_t,epoch_t> > new_last_clean_interval;
393 mempool::osdmap::map<int32_t,epoch_t> new_lost;
394 mempool::osdmap::map<int32_t,uuid_d> new_uuid;
395 mempool::osdmap::map<int32_t,osd_xinfo_t> new_xinfo;
396
397 mempool::osdmap::map<entity_addr_t,utime_t> new_blacklist;
398 mempool::osdmap::vector<entity_addr_t> old_blacklist;
399 mempool::osdmap::map<int32_t, entity_addr_t> new_hb_back_up;
400 mempool::osdmap::map<int32_t, entity_addr_t> new_hb_front_up;
401
402 mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t>> new_pg_upmap;
403 mempool::osdmap::map<pg_t,mempool::osdmap::vector<pair<int32_t,int32_t>>> new_pg_upmap_items;
404 mempool::osdmap::set<pg_t> old_pg_upmap, old_pg_upmap_items;
405
406 string cluster_snapshot;
407
408 float new_nearfull_ratio = -1;
409 float new_backfillfull_ratio = -1;
410 float new_full_ratio = -1;
411
31f18b77 412 int8_t new_require_min_compat_client = -1;
7c673cae
FG
413
414 mutable bool have_crc; ///< crc values are defined
415 uint32_t full_crc; ///< crc of the resulting OSDMap
416 mutable uint32_t inc_crc; ///< crc of this incremental
417
418 int get_net_marked_out(const OSDMap *previous) const;
419 int get_net_marked_down(const OSDMap *previous) const;
420 int identify_osd(uuid_d u) const;
421
422 void encode_client_old(bufferlist& bl) const;
423 void encode_classic(bufferlist& bl, uint64_t features) const;
424 void encode(bufferlist& bl, uint64_t features=CEPH_FEATURES_ALL) const;
425 void decode_classic(bufferlist::iterator &p);
426 void decode(bufferlist::iterator &bl);
427 void dump(Formatter *f) const;
428 static void generate_test_instances(list<Incremental*>& o);
429
430 explicit Incremental(epoch_t e=0) :
431 encode_features(0),
432 epoch(e), new_pool_max(-1), new_flags(-1), new_max_osd(-1),
433 have_crc(false), full_crc(0), inc_crc(0) {
7c673cae
FG
434 }
435 explicit Incremental(bufferlist &bl) {
436 bufferlist::iterator p = bl.begin();
437 decode(p);
438 }
439 explicit Incremental(bufferlist::iterator &p) {
440 decode(p);
441 }
442
443 pg_pool_t *get_new_pool(int64_t pool, const pg_pool_t *orig) {
444 if (new_pools.count(pool) == 0)
445 new_pools[pool] = *orig;
446 return &new_pools[pool];
447 }
448 bool has_erasure_code_profile(const string &name) const {
449 auto i = new_erasure_code_profiles.find(name);
450 return i != new_erasure_code_profiles.end();
451 }
452 void set_erasure_code_profile(const string &name,
453 const map<string,string>& profile) {
454 new_erasure_code_profiles[name] = profile;
455 }
456
457 /// propage update pools' snap metadata to any of their tiers
458 int propagate_snaps_to_tiers(CephContext *cct, const OSDMap &base);
31f18b77
FG
459
460 /// filter out osds with any pending state changing
461 size_t get_pending_state_osds(vector<int> *osds) {
462 assert(osds);
463 osds->clear();
464
465 for (auto &p : new_state) {
466 osds->push_back(p.first);
467 }
468
469 return osds->size();
470 }
471
472 bool pending_osd_has_state(int osd, unsigned state) {
473 return new_state.count(osd) && (new_state[osd] & state) != 0;
474 }
475
476 void pending_osd_state_set(int osd, unsigned state) {
477 new_state[osd] |= state;
478 }
479
480 // cancel the specified pending osd state if there is any
481 // return ture on success, false otherwise.
482 bool pending_osd_state_clear(int osd, unsigned state) {
483 if (!pending_osd_has_state(osd, state)) {
484 // never has been set or already has been cancelled.
485 return false;
486 }
487
488 new_state[osd] &= ~state;
489 return true;
490 }
491
7c673cae
FG
492 };
493
494private:
495 uuid_d fsid;
496 epoch_t epoch; // what epoch of the osd cluster descriptor is this
497 utime_t created, modified; // epoch start time
498 int32_t pool_max; // the largest pool num, ever
499
500 uint32_t flags;
501
502 int num_osd; // not saved; see calc_num_osds
503 int num_up_osd; // not saved; see calc_num_osds
504 int num_in_osd; // not saved; see calc_num_osds
505
506 int32_t max_osd;
31f18b77 507 vector<uint32_t> osd_state;
7c673cae 508
28e407b8
AA
509 // These features affect OSDMap[::Incremental] encoding, or the
510 // encoding of some type embedded therein (CrushWrapper, something
511 // from osd_types, etc.).
512 static constexpr uint64_t SIGNIFICANT_FEATURES =
513 CEPH_FEATUREMASK_PGID64 |
514 CEPH_FEATUREMASK_PGPOOL3 |
515 CEPH_FEATUREMASK_OSDENC |
516 CEPH_FEATUREMASK_OSDMAP_ENC |
517 CEPH_FEATUREMASK_OSD_POOLRESEND |
518 CEPH_FEATUREMASK_NEW_OSDOP_ENCODING |
519 CEPH_FEATUREMASK_MSG_ADDR2 |
520 CEPH_FEATUREMASK_CRUSH_TUNABLES5 |
521 CEPH_FEATUREMASK_CRUSH_CHOOSE_ARGS |
522 CEPH_FEATUREMASK_SERVER_LUMINOUS ;
7c673cae
FG
523 struct addrs_s {
524 mempool::osdmap::vector<ceph::shared_ptr<entity_addr_t> > client_addr;
525 mempool::osdmap::vector<ceph::shared_ptr<entity_addr_t> > cluster_addr;
526 mempool::osdmap::vector<ceph::shared_ptr<entity_addr_t> > hb_back_addr;
527 mempool::osdmap::vector<ceph::shared_ptr<entity_addr_t> > hb_front_addr;
528 entity_addr_t blank;
529 };
530 ceph::shared_ptr<addrs_s> osd_addrs;
531
532 mempool::osdmap::vector<__u32> osd_weight; // 16.16 fixed point, 0x10000 = "in", 0 = "out"
533 mempool::osdmap::vector<osd_info_t> osd_info;
31f18b77 534 ceph::shared_ptr<PGTempMap> pg_temp; // temp pg mapping (e.g. while we rebuild)
7c673cae
FG
535 ceph::shared_ptr< mempool::osdmap::map<pg_t,int32_t > > primary_temp; // temp primary mapping (e.g. while we rebuild)
536 ceph::shared_ptr< mempool::osdmap::vector<__u32> > osd_primary_affinity; ///< 16.16 fixed point, 0x10000 = baseline
537
538 // remap (post-CRUSH, pre-up)
539 mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t>> pg_upmap; ///< remap pg
540 mempool::osdmap::map<pg_t,mempool::osdmap::vector<pair<int32_t,int32_t>>> pg_upmap_items; ///< remap osds in up set
541
542 mempool::osdmap::map<int64_t,pg_pool_t> pools;
543 mempool::osdmap::map<int64_t,string> pool_name;
544 mempool::osdmap::map<string,map<string,string> > erasure_code_profiles;
545 mempool::osdmap::map<string,int64_t> name_pool;
546
547 ceph::shared_ptr< mempool::osdmap::vector<uuid_d> > osd_uuid;
548 mempool::osdmap::vector<osd_xinfo_t> osd_xinfo;
549
550 mempool::osdmap::unordered_map<entity_addr_t,utime_t> blacklist;
551
552 epoch_t cluster_snapshot_epoch;
553 string cluster_snapshot;
554 bool new_blacklist_entries;
555
556 float full_ratio = 0, backfillfull_ratio = 0, nearfull_ratio = 0;
557
558 /// min compat client we want to support
31f18b77 559 uint8_t require_min_compat_client = 0; // CEPH_RELEASE_*
7c673cae 560
31f18b77
FG
561public:
562 /// require osds to run at least this release
563 uint8_t require_osd_release = 0; // CEPH_RELEASE_*
564
565private:
7c673cae
FG
566 mutable uint64_t cached_up_osd_features;
567
568 mutable bool crc_defined;
569 mutable uint32_t crc;
570
571 void _calc_up_osd_features();
572
573 public:
574 bool have_crc() const { return crc_defined; }
575 uint32_t get_crc() const { return crc; }
576
577 ceph::shared_ptr<CrushWrapper> crush; // hierarchical map
31f18b77
FG
578private:
579 uint32_t crush_version = 1;
7c673cae
FG
580
581 friend class OSDMonitor;
582
583 public:
584 OSDMap() : epoch(0),
224ce89b 585 pool_max(0),
7c673cae
FG
586 flags(0),
587 num_osd(0), num_up_osd(0), num_in_osd(0),
588 max_osd(0),
589 osd_addrs(std::make_shared<addrs_s>()),
31f18b77 590 pg_temp(std::make_shared<PGTempMap>()),
7c673cae
FG
591 primary_temp(std::make_shared<mempool::osdmap::map<pg_t,int32_t>>()),
592 osd_uuid(std::make_shared<mempool::osdmap::vector<uuid_d>>()),
593 cluster_snapshot_epoch(0),
594 new_blacklist_entries(false),
595 cached_up_osd_features(0),
596 crc_defined(false), crc(0),
597 crush(std::make_shared<CrushWrapper>()) {
7c673cae
FG
598 }
599
600 // no copying
601private:
602 OSDMap(const OSDMap& other) = default;
603 OSDMap& operator=(const OSDMap& other) = default;
604public:
605
28e407b8
AA
606 /// return feature mask subset that is relevant to OSDMap encoding
607 static uint64_t get_significant_features(uint64_t features) {
608 return SIGNIFICANT_FEATURES & features;
609 }
610
611 uint64_t get_encoding_features() const;
612
7c673cae
FG
613 void deepish_copy_from(const OSDMap& o) {
614 *this = o;
615 primary_temp.reset(new mempool::osdmap::map<pg_t,int32_t>(*o.primary_temp));
31f18b77 616 pg_temp.reset(new PGTempMap(*o.pg_temp));
7c673cae
FG
617 osd_uuid.reset(new mempool::osdmap::vector<uuid_d>(*o.osd_uuid));
618
619 if (o.osd_primary_affinity)
620 osd_primary_affinity.reset(new mempool::osdmap::vector<__u32>(*o.osd_primary_affinity));
621
622 // NOTE: this still references shared entity_addr_t's.
623 osd_addrs.reset(new addrs_s(*o.osd_addrs));
624
625 // NOTE: we do not copy crush. note that apply_incremental will
626 // allocate a new CrushWrapper, though.
627 }
628
629 // map info
630 const uuid_d& get_fsid() const { return fsid; }
631 void set_fsid(uuid_d& f) { fsid = f; }
632
633 epoch_t get_epoch() const { return epoch; }
634 void inc_epoch() { epoch++; }
635
636 void set_epoch(epoch_t e);
637
31f18b77
FG
638 uint32_t get_crush_version() const {
639 return crush_version;
640 }
641
7c673cae
FG
642 /* stamps etc */
643 const utime_t& get_created() const { return created; }
644 const utime_t& get_modified() const { return modified; }
645
646 bool is_blacklisted(const entity_addr_t& a) const;
647 void get_blacklist(list<pair<entity_addr_t,utime_t > > *bl) const;
31f18b77 648 void get_blacklist(std::set<entity_addr_t> *bl) const;
7c673cae
FG
649
650 string get_cluster_snapshot() const {
651 if (cluster_snapshot_epoch == epoch)
652 return cluster_snapshot;
653 return string();
654 }
655
656 float get_full_ratio() const {
657 return full_ratio;
658 }
659 float get_backfillfull_ratio() const {
660 return backfillfull_ratio;
661 }
662 float get_nearfull_ratio() const {
663 return nearfull_ratio;
664 }
7c673cae 665 void get_full_osd_util(
31f18b77 666 const mempool::pgmap::unordered_map<int32_t,osd_stat_t> &osd_stat,
7c673cae
FG
667 map<int, float> *full,
668 map<int, float> *backfill,
669 map<int, float> *nearfull) const;
3efd9988
FG
670 void get_full_pools(CephContext *cct,
671 set<int64_t> *full,
672 set<int64_t> *backfillfull,
673 set<int64_t> *nearfull) const;
31f18b77
FG
674 void get_full_osd_counts(set<int> *full, set<int> *backfill,
675 set<int> *nearfull) const;
676
677
7c673cae
FG
678 /***** cluster state *****/
679 /* osds */
680 int get_max_osd() const { return max_osd; }
681 void set_max_osd(int m);
682
683 unsigned get_num_osds() const {
684 return num_osd;
685 }
686 unsigned get_num_up_osds() const {
687 return num_up_osd;
688 }
689 unsigned get_num_in_osds() const {
690 return num_in_osd;
691 }
692 /// recalculate cached values for get_num{,_up,_in}_osds
693 int calc_num_osds();
694
695 void get_all_osds(set<int32_t>& ls) const;
696 void get_up_osds(set<int32_t>& ls) const;
31f18b77 697 void get_out_osds(set<int32_t>& ls) const;
7c673cae
FG
698 unsigned get_num_pg_temp() const {
699 return pg_temp->size();
700 }
701
702 int get_flags() const { return flags; }
703 bool test_flag(int f) const { return flags & f; }
704 void set_flag(int f) { flags |= f; }
705 void clear_flag(int f) { flags &= ~f; }
706
707 static void calc_state_set(int state, set<string>& st);
708
709 int get_state(int o) const {
710 assert(o < max_osd);
711 return osd_state[o];
712 }
713 int get_state(int o, set<string>& st) const {
714 assert(o < max_osd);
715 unsigned t = osd_state[o];
716 calc_state_set(t, st);
717 return osd_state[o];
718 }
719 void set_state(int o, unsigned s) {
720 assert(o < max_osd);
721 osd_state[o] = s;
722 }
723 void set_weight(int o, unsigned w) {
724 assert(o < max_osd);
725 osd_weight[o] = w;
726 if (w)
727 osd_state[o] |= CEPH_OSD_EXISTS;
728 }
729 unsigned get_weight(int o) const {
730 assert(o < max_osd);
731 return osd_weight[o];
732 }
733 float get_weightf(int o) const {
734 return (float)get_weight(o) / (float)CEPH_OSD_IN;
735 }
736 void adjust_osd_weights(const map<int,double>& weights, Incremental& inc) const;
737
738 void set_primary_affinity(int o, int w) {
739 assert(o < max_osd);
740 if (!osd_primary_affinity)
741 osd_primary_affinity.reset(
742 new mempool::osdmap::vector<__u32>(
743 max_osd, CEPH_OSD_DEFAULT_PRIMARY_AFFINITY));
744 (*osd_primary_affinity)[o] = w;
745 }
746 unsigned get_primary_affinity(int o) const {
747 assert(o < max_osd);
748 if (!osd_primary_affinity)
749 return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
750 return (*osd_primary_affinity)[o];
751 }
752 float get_primary_affinityf(int o) const {
753 return (float)get_primary_affinity(o) / (float)CEPH_OSD_MAX_PRIMARY_AFFINITY;
754 }
755
756 bool has_erasure_code_profile(const string &name) const {
757 auto i = erasure_code_profiles.find(name);
758 return i != erasure_code_profiles.end();
759 }
760 int get_erasure_code_profile_default(CephContext *cct,
761 map<string,string> &profile_map,
762 ostream *ss);
763 void set_erasure_code_profile(const string &name,
764 const map<string,string>& profile) {
765 erasure_code_profiles[name] = profile;
766 }
767 const map<string,string> &get_erasure_code_profile(
768 const string &name) const {
769 static map<string,string> empty;
770 auto i = erasure_code_profiles.find(name);
771 if (i == erasure_code_profiles.end())
772 return empty;
773 else
774 return i->second;
775 }
776 const mempool::osdmap::map<string,map<string,string> > &get_erasure_code_profiles() const {
777 return erasure_code_profiles;
778 }
779
780 bool exists(int osd) const {
781 //assert(osd >= 0);
782 return osd >= 0 && osd < max_osd && (osd_state[osd] & CEPH_OSD_EXISTS);
783 }
784
31f18b77
FG
785 bool is_destroyed(int osd) const {
786 return exists(osd) && (osd_state[osd] & CEPH_OSD_DESTROYED);
787 }
788
7c673cae
FG
789 bool is_up(int osd) const {
790 return exists(osd) && (osd_state[osd] & CEPH_OSD_UP);
791 }
792
793 bool has_been_up_since(int osd, epoch_t epoch) const {
794 return is_up(osd) && get_up_from(osd) <= epoch;
795 }
796
797 bool is_down(int osd) const {
798 return !is_up(osd);
799 }
800
801 bool is_out(int osd) const {
802 return !exists(osd) || get_weight(osd) == CEPH_OSD_OUT;
803 }
804
805 bool is_in(int osd) const {
806 return !is_out(osd);
807 }
808
31f18b77
FG
809 bool is_noup(int osd) const {
810 return exists(osd) && (osd_state[osd] & CEPH_OSD_NOUP);
811 }
812
813 bool is_nodown(int osd) const {
814 return exists(osd) && (osd_state[osd] & CEPH_OSD_NODOWN);
815 }
816
817 bool is_noin(int osd) const {
818 return exists(osd) && (osd_state[osd] & CEPH_OSD_NOIN);
819 }
820
821 bool is_noout(int osd) const {
822 return exists(osd) && (osd_state[osd] & CEPH_OSD_NOOUT);
823 }
824
825 void get_noup_osds(vector<int> *osds) const {
826 assert(osds);
827 osds->clear();
828
829 for (int i = 0; i < max_osd; i++) {
830 if (is_noup(i)) {
831 osds->push_back(i);
832 }
833 }
834 }
835
836 void get_nodown_osds(vector<int> *osds) const {
837 assert(osds);
838 osds->clear();
839
840 for (int i = 0; i < max_osd; i++) {
841 if (is_nodown(i)) {
842 osds->push_back(i);
843 }
844 }
845 }
846
847 void get_noin_osds(vector<int> *osds) const {
848 assert(osds);
849 osds->clear();
850
851 for (int i = 0; i < max_osd; i++) {
852 if (is_noin(i)) {
853 osds->push_back(i);
854 }
855 }
856 }
857
858 void get_noout_osds(vector<int> *osds) const {
859 assert(osds);
860 osds->clear();
861
862 for (int i = 0; i < max_osd; i++) {
863 if (is_noout(i)) {
864 osds->push_back(i);
865 }
866 }
867 }
868
7c673cae
FG
869 /**
870 * check if an entire crush subtree is down
871 */
872 bool subtree_is_down(int id, set<int> *down_cache) const;
873 bool containing_subtree_is_down(CephContext *cct, int osd, int subtree_type, set<int> *down_cache) const;
874
31f18b77
FG
875 bool subtree_type_is_down(CephContext *cct, int id, int subtree_type, set<int> *down_in_osds, set<int> *up_in_osds,
876 set<int> *subtree_up, unordered_map<int, set<int> > *subtree_type_down) const;
877
7c673cae
FG
878 int identify_osd(const entity_addr_t& addr) const;
879 int identify_osd(const uuid_d& u) const;
880 int identify_osd_on_all_channels(const entity_addr_t& addr) const;
881
882 bool have_addr(const entity_addr_t& addr) const {
883 return identify_osd(addr) >= 0;
884 }
885 int find_osd_on_ip(const entity_addr_t& ip) const;
886 const entity_addr_t &get_addr(int osd) const {
887 assert(exists(osd));
888 return osd_addrs->client_addr[osd] ? *osd_addrs->client_addr[osd] : osd_addrs->blank;
889 }
890 const entity_addr_t &get_cluster_addr(int osd) const {
891 assert(exists(osd));
892 if (!osd_addrs->cluster_addr[osd] || *osd_addrs->cluster_addr[osd] == entity_addr_t())
893 return get_addr(osd);
894 return *osd_addrs->cluster_addr[osd];
895 }
896 const entity_addr_t &get_hb_back_addr(int osd) const {
897 assert(exists(osd));
898 return osd_addrs->hb_back_addr[osd] ? *osd_addrs->hb_back_addr[osd] : osd_addrs->blank;
899 }
900 const entity_addr_t &get_hb_front_addr(int osd) const {
901 assert(exists(osd));
902 return osd_addrs->hb_front_addr[osd] ? *osd_addrs->hb_front_addr[osd] : osd_addrs->blank;
903 }
904 entity_inst_t get_most_recent_inst(int osd) const {
905 assert(exists(osd));
906 return entity_inst_t(entity_name_t::OSD(osd), get_addr(osd));
907 }
908 entity_inst_t get_inst(int osd) const {
909 assert(is_up(osd));
910 return get_most_recent_inst(osd);
911 }
912 entity_inst_t get_cluster_inst(int osd) const {
913 assert(is_up(osd));
914 return entity_inst_t(entity_name_t::OSD(osd), get_cluster_addr(osd));
915 }
916 entity_inst_t get_hb_back_inst(int osd) const {
917 assert(is_up(osd));
918 return entity_inst_t(entity_name_t::OSD(osd), get_hb_back_addr(osd));
919 }
920 entity_inst_t get_hb_front_inst(int osd) const {
921 assert(is_up(osd));
922 return entity_inst_t(entity_name_t::OSD(osd), get_hb_front_addr(osd));
923 }
924
925 const uuid_d& get_uuid(int osd) const {
926 assert(exists(osd));
927 return (*osd_uuid)[osd];
928 }
929
930 const epoch_t& get_up_from(int osd) const {
931 assert(exists(osd));
932 return osd_info[osd].up_from;
933 }
934 const epoch_t& get_up_thru(int osd) const {
935 assert(exists(osd));
936 return osd_info[osd].up_thru;
937 }
938 const epoch_t& get_down_at(int osd) const {
939 assert(exists(osd));
940 return osd_info[osd].down_at;
941 }
942 const osd_info_t& get_info(int osd) const {
943 assert(osd < max_osd);
944 return osd_info[osd];
945 }
946
947 const osd_xinfo_t& get_xinfo(int osd) const {
948 assert(osd < max_osd);
949 return osd_xinfo[osd];
950 }
951
952 int get_next_up_osd_after(int n) const {
953 if (get_max_osd() == 0)
954 return -1;
955 for (int i = n + 1; i != n; ++i) {
956 if (i >= get_max_osd())
957 i = 0;
958 if (i == n)
959 break;
960 if (is_up(i))
961 return i;
962 }
963 return -1;
964 }
965
966 int get_previous_up_osd_before(int n) const {
967 if (get_max_osd() == 0)
968 return -1;
969 for (int i = n - 1; i != n; --i) {
970 if (i < 0)
971 i = get_max_osd() - 1;
972 if (i == n)
973 break;
974 if (is_up(i))
975 return i;
976 }
977 return -1;
978 }
979
980 /**
981 * get feature bits required by the current structure
982 *
983 * @param entity_type [in] what entity type we are asking about
984 * @param mask [out] set of all possible map-related features we could set
985 * @return feature bits used by this map
986 */
987 uint64_t get_features(int entity_type, uint64_t *mask) const;
988
989 /**
990 * get oldest *client* version (firefly, hammer, etc.) that can connect given
991 * the feature bits required (according to get_features()).
992 */
31f18b77 993 uint8_t get_min_compat_client() const;
7c673cae
FG
994
995 /**
996 * get intersection of features supported by up osds
997 */
998 uint64_t get_up_osd_features() const;
999
94b18763
FG
1000 void maybe_remove_pg_upmaps(CephContext *cct,
1001 const OSDMap& osdmap,
1002 Incremental *pending_inc);
1003
7c673cae
FG
1004 int apply_incremental(const Incremental &inc);
1005
1006 /// try to re-use/reference addrs in oldmap from newmap
1007 static void dedup(const OSDMap *oldmap, OSDMap *newmap);
1008
1009 static void clean_temps(CephContext *cct, const OSDMap& osdmap,
1010 Incremental *pending_inc);
1011
1012 // serialize, unserialize
1013private:
1014 void encode_client_old(bufferlist& bl) const;
1015 void encode_classic(bufferlist& bl, uint64_t features) const;
1016 void decode_classic(bufferlist::iterator& p);
1017 void post_decode();
1018public:
1019 void encode(bufferlist& bl, uint64_t features=CEPH_FEATURES_ALL) const;
1020 void decode(bufferlist& bl);
1021 void decode(bufferlist::iterator& bl);
1022
1023
1024 /**** mapping facilities ****/
1025 int map_to_pg(
1026 int64_t pool,
1027 const string& name,
1028 const string& key,
1029 const string& nspace,
1030 pg_t *pg) const;
1031 int object_locator_to_pg(const object_t& oid, const object_locator_t& loc,
1032 pg_t &pg) const;
1033 pg_t object_locator_to_pg(const object_t& oid,
1034 const object_locator_t& loc) const {
1035 pg_t pg;
1036 int ret = object_locator_to_pg(oid, loc, pg);
1037 assert(ret == 0);
1038 return pg;
1039 }
1040
1041
1042 static object_locator_t file_to_object_locator(const file_layout_t& layout) {
1043 return object_locator_t(layout.pool_id, layout.pool_ns);
1044 }
1045
1046 ceph_object_layout file_to_object_layout(object_t oid,
1047 file_layout_t& layout) const {
1048 return make_object_layout(oid, layout.pool_id, layout.pool_ns);
1049 }
1050
1051 ceph_object_layout make_object_layout(object_t oid, int pg_pool,
1052 string nspace) const;
1053
1054 int get_pg_num(int pg_pool) const
1055 {
1056 const pg_pool_t *pool = get_pg_pool(pg_pool);
1057 assert(NULL != pool);
1058 return pool->get_pg_num();
1059 }
1060
1061 bool pg_exists(pg_t pgid) const {
1062 const pg_pool_t *p = get_pg_pool(pgid.pool());
1063 return p && pgid.ps() < p->get_pg_num();
1064 }
1065
224ce89b
WB
1066 int get_pg_pool_min_size(pg_t pgid) const {
1067 if (!pg_exists(pgid)) {
1068 return -ENOENT;
1069 }
1070 const pg_pool_t *p = get_pg_pool(pgid.pool());
1071 assert(p);
1072 return p->get_min_size();
1073 }
1074
1075 int get_pg_pool_size(pg_t pgid) const {
1076 if (!pg_exists(pgid)) {
1077 return -ENOENT;
1078 }
1079 const pg_pool_t *p = get_pg_pool(pgid.pool());
1080 assert(p);
1081 return p->get_size();
1082 }
1083
94b18763
FG
1084 int get_pg_pool_crush_rule(pg_t pgid) const {
1085 if (!pg_exists(pgid)) {
1086 return -ENOENT;
1087 }
1088 const pg_pool_t *p = get_pg_pool(pgid.pool());
1089 assert(p);
1090 return p->get_crush_rule();
1091 }
1092
7c673cae
FG
1093private:
1094 /// pg -> (raw osd list)
31f18b77 1095 void _pg_to_raw_osds(
7c673cae
FG
1096 const pg_pool_t& pool, pg_t pg,
1097 vector<int> *osds,
1098 ps_t *ppps) const;
1099 int _pick_primary(const vector<int>& osds) const;
1100 void _remove_nonexistent_osds(const pg_pool_t& pool, vector<int>& osds) const;
1101
1102 void _apply_primary_affinity(ps_t seed, const pg_pool_t& pool,
1103 vector<int> *osds, int *primary) const;
1104
1105 /// apply pg_upmap[_items] mappings
224ce89b 1106 void _apply_upmap(const pg_pool_t& pi, pg_t pg, vector<int> *raw) const;
7c673cae
FG
1107
1108 /// pg -> (up osd list)
1109 void _raw_to_up_osds(const pg_pool_t& pool, const vector<int>& raw,
1110 vector<int> *up) const;
1111
1112
1113 /**
1114 * Get the pg and primary temp, if they are specified.
1115 * @param temp_pg [out] Will be empty or contain the temp PG mapping on return
1116 * @param temp_primary [out] Will be the value in primary_temp, or a value derived
1117 * from the pg_temp (if specified), or -1 if you should use the calculated (up_)primary.
1118 */
1119 void _get_temp_osds(const pg_pool_t& pool, pg_t pg,
1120 vector<int> *temp_pg, int *temp_primary) const;
1121
1122 /**
1123 * map to up and acting. Fills in whatever fields are non-NULL.
1124 */
1125 void _pg_to_up_acting_osds(const pg_t& pg, vector<int> *up, int *up_primary,
1126 vector<int> *acting, int *acting_primary,
1127 bool raw_pg_to_pg = true) const;
1128
1129public:
1130 /***
1131 * This is suitable only for looking at raw CRUSH outputs. It skips
1132 * applying the temp and up checks and should not be used
1133 * by anybody for data mapping purposes.
1134 * raw and primary must be non-NULL
1135 */
31f18b77 1136 void pg_to_raw_osds(pg_t pg, vector<int> *raw, int *primary) const;
7c673cae 1137 /// map a pg to its acting set. @return acting set size
31f18b77 1138 void pg_to_acting_osds(const pg_t& pg, vector<int> *acting,
7c673cae
FG
1139 int *acting_primary) const {
1140 _pg_to_up_acting_osds(pg, NULL, NULL, acting, acting_primary);
7c673cae 1141 }
31f18b77 1142 void pg_to_acting_osds(pg_t pg, vector<int>& acting) const {
7c673cae
FG
1143 return pg_to_acting_osds(pg, &acting, NULL);
1144 }
1145 /**
1146 * This does not apply temp overrides and should not be used
1147 * by anybody for data mapping purposes. Specify both pointers.
1148 */
1149 void pg_to_raw_up(pg_t pg, vector<int> *up, int *primary) const;
1150 /**
1151 * map a pg to its acting set as well as its up set. You must use
1152 * the acting set for data mapping purposes, but some users will
1153 * also find the up set useful for things like deciding what to
1154 * set as pg_temp.
1155 * Each of these pointers must be non-NULL.
1156 */
1157 void pg_to_up_acting_osds(pg_t pg, vector<int> *up, int *up_primary,
1158 vector<int> *acting, int *acting_primary) const {
1159 _pg_to_up_acting_osds(pg, up, up_primary, acting, acting_primary);
1160 }
1161 void pg_to_up_acting_osds(pg_t pg, vector<int>& up, vector<int>& acting) const {
1162 int up_primary, acting_primary;
1163 pg_to_up_acting_osds(pg, &up, &up_primary, &acting, &acting_primary);
1164 }
1165 bool pg_is_ec(pg_t pg) const {
1166 auto i = pools.find(pg.pool());
1167 assert(i != pools.end());
1168 return i->second.ec_pool();
1169 }
1170 bool get_primary_shard(const pg_t& pgid, spg_t *out) const {
1171 auto i = get_pools().find(pgid.pool());
1172 if (i == get_pools().end()) {
1173 return false;
1174 }
1175 if (!i->second.ec_pool()) {
1176 *out = spg_t(pgid);
1177 return true;
1178 }
1179 int primary;
1180 vector<int> acting;
1181 pg_to_acting_osds(pgid, &acting, &primary);
1182 for (uint8_t i = 0; i < acting.size(); ++i) {
1183 if (acting[i] == primary) {
1184 *out = spg_t(pgid, shard_id_t(i));
1185 return true;
1186 }
1187 }
1188 return false;
1189 }
1190
1191 int64_t lookup_pg_pool_name(const string& name) const {
1192 auto p = name_pool.find(name);
1193 if (p == name_pool.end())
1194 return -ENOENT;
1195 return p->second;
1196 }
1197
1198 int64_t get_pool_max() const {
1199 return pool_max;
1200 }
1201 const mempool::osdmap::map<int64_t,pg_pool_t>& get_pools() const {
1202 return pools;
1203 }
1204 mempool::osdmap::map<int64_t,pg_pool_t>& get_pools() {
1205 return pools;
1206 }
3efd9988
FG
1207 void get_pool_ids_by_rule(int rule_id, set<int64_t> *pool_ids) const {
1208 assert(pool_ids);
1209 for (auto &p: pools) {
1210 if ((int)p.second.get_crush_rule() == rule_id) {
1211 pool_ids->insert(p.first);
1212 }
1213 }
1214 }
1215 void get_pool_ids_by_osd(CephContext *cct,
1216 int osd,
1217 set<int64_t> *pool_ids) const;
7c673cae
FG
1218 const string& get_pool_name(int64_t p) const {
1219 auto i = pool_name.find(p);
1220 assert(i != pool_name.end());
1221 return i->second;
1222 }
c07f9fc5
FG
1223 const mempool::osdmap::map<int64_t,string>& get_pool_names() const {
1224 return pool_name;
1225 }
7c673cae
FG
1226 bool have_pg_pool(int64_t p) const {
1227 return pools.count(p);
1228 }
1229 const pg_pool_t* get_pg_pool(int64_t p) const {
1230 auto i = pools.find(p);
1231 if (i != pools.end())
1232 return &i->second;
1233 return NULL;
1234 }
1235 unsigned get_pg_size(pg_t pg) const {
1236 auto p = pools.find(pg.pool());
1237 assert(p != pools.end());
1238 return p->second.get_size();
1239 }
1240 int get_pg_type(pg_t pg) const {
1241 auto p = pools.find(pg.pool());
1242 assert(p != pools.end());
1243 return p->second.get_type();
1244 }
1245
1246
1247 pg_t raw_pg_to_pg(pg_t pg) const {
1248 auto p = pools.find(pg.pool());
1249 assert(p != pools.end());
1250 return p->second.raw_pg_to_pg(pg);
1251 }
1252
1253 // pg -> acting primary osd
1254 int get_pg_acting_primary(pg_t pg) const {
1255 int primary = -1;
1256 _pg_to_up_acting_osds(pg, nullptr, nullptr, nullptr, &primary);
1257 return primary;
1258 }
1259
1260 /*
1261 * check whether an spg_t maps to a particular osd
1262 */
1263 bool is_up_acting_osd_shard(spg_t pg, int osd) const {
1264 vector<int> up, acting;
1265 _pg_to_up_acting_osds(pg.pgid, &up, NULL, &acting, NULL, false);
1266 if (pg.shard == shard_id_t::NO_SHARD) {
1267 if (calc_pg_role(osd, acting, acting.size()) >= 0 ||
1268 calc_pg_role(osd, up, up.size()) >= 0)
1269 return true;
1270 } else {
1271 if (pg.shard < (int)acting.size() && acting[pg.shard] == osd)
1272 return true;
1273 if (pg.shard < (int)up.size() && up[pg.shard] == osd)
1274 return true;
1275 }
1276 return false;
1277 }
1278
1279
1280 /* what replica # is a given osd? 0 primary, -1 for none. */
1281 static int calc_pg_rank(int osd, const vector<int>& acting, int nrep=0);
1282 static int calc_pg_role(int osd, const vector<int>& acting, int nrep=0);
1283 static bool primary_changed(
1284 int oldprimary,
1285 const vector<int> &oldacting,
1286 int newprimary,
1287 const vector<int> &newacting);
1288
1289 /* rank is -1 (stray), 0 (primary), 1,2,3,... (replica) */
1290 int get_pg_acting_rank(pg_t pg, int osd) const {
1291 vector<int> group;
31f18b77
FG
1292 pg_to_acting_osds(pg, group);
1293 return calc_pg_rank(osd, group, group.size());
7c673cae
FG
1294 }
1295 /* role is -1 (stray), 0 (primary), 1 (replica) */
1296 int get_pg_acting_role(const pg_t& pg, int osd) const {
1297 vector<int> group;
31f18b77
FG
1298 pg_to_acting_osds(pg, group);
1299 return calc_pg_role(osd, group, group.size());
7c673cae
FG
1300 }
1301
1302 bool osd_is_valid_op_target(pg_t pg, int osd) const {
1303 int primary;
1304 vector<int> group;
31f18b77 1305 pg_to_acting_osds(pg, &group, &primary);
7c673cae
FG
1306 if (osd == primary)
1307 return true;
1308 if (pg_is_ec(pg))
1309 return false;
1310
31f18b77 1311 return calc_pg_role(osd, group, group.size()) >= 0;
7c673cae
FG
1312 }
1313
1314 int clean_pg_upmaps(
1315 CephContext *cct,
1316 Incremental *pending_inc);
1317
1318 bool try_pg_upmap(
1319 CephContext *cct,
1320 pg_t pg, ///< pg to potentially remap
1321 const set<int>& overfull, ///< osds we'd want to evacuate
1322 const vector<int>& underfull, ///< osds to move to, in order of preference
1323 vector<int> *orig,
1324 vector<int> *out); ///< resulting alternative mapping
1325
1326 int calc_pg_upmaps(
1327 CephContext *cct,
1328 float max_deviation, ///< max deviation from target (value < 1.0)
1329 int max_iterations, ///< max iterations to run
1330 const set<int64_t>& pools, ///< [optional] restrict to pool
1331 Incremental *pending_inc
1332 );
1333
31f18b77
FG
1334 int get_osds_by_bucket_name(const string &name, set<int> *osds) const;
1335
7c673cae
FG
1336 /*
1337 * handy helpers to build simple maps...
1338 */
1339 /**
1340 * Build an OSD map suitable for basic usage. If **num_osd** is >= 0
1341 * it will be initialized with the specified number of OSDs in a
1342 * single host. If **num_osd** is < 0 the layout of the OSD map will
1343 * be built by reading the content of the configuration file.
1344 *
1345 * @param cct [in] in core ceph context
1346 * @param e [in] initial epoch
1347 * @param fsid [in] id of the cluster
1348 * @param num_osd [in] number of OSDs if >= 0 or read from conf if < 0
1349 * @return **0** on success, negative errno on error.
1350 */
224ce89b
WB
1351private:
1352 int build_simple_optioned(CephContext *cct, epoch_t e, uuid_d &fsid,
1353 int num_osd, int pg_bits, int pgp_bits,
1354 bool default_pool);
1355public:
7c673cae 1356 int build_simple(CephContext *cct, epoch_t e, uuid_d &fsid,
224ce89b
WB
1357 int num_osd) {
1358 return build_simple_optioned(cct, e, fsid, num_osd, 0, 0, false);
1359 }
1360 int build_simple_with_pool(CephContext *cct, epoch_t e, uuid_d &fsid,
1361 int num_osd, int pg_bits, int pgp_bits) {
1362 return build_simple_optioned(cct, e, fsid, num_osd,
1363 pg_bits, pgp_bits, true);
1364 }
7c673cae
FG
1365 static int _build_crush_types(CrushWrapper& crush);
1366 static int build_simple_crush_map(CephContext *cct, CrushWrapper& crush,
1367 int num_osd, ostream *ss);
1368 static int build_simple_crush_map_from_conf(CephContext *cct,
1369 CrushWrapper& crush,
1370 ostream *ss);
31f18b77
FG
1371 static int build_simple_crush_rules(
1372 CephContext *cct, CrushWrapper& crush,
1373 const string& root,
1374 ostream *ss);
7c673cae 1375
3efd9988
FG
1376 bool crush_rule_in_use(int rule_id) const;
1377
1378 int validate_crush_rules(CrushWrapper *crush, ostream *ss) const;
7c673cae
FG
1379
1380 void clear_temp() {
1381 pg_temp->clear();
1382 primary_temp->clear();
1383 }
1384
1385private:
1386 void print_osd_line(int cur, ostream *out, Formatter *f) const;
1387public:
1388 void print(ostream& out) const;
1389 void print_pools(ostream& out) const;
224ce89b 1390 void print_summary(Formatter *f, ostream& out, const string& prefix) const;
7c673cae 1391 void print_oneline_summary(ostream& out) const;
31f18b77
FG
1392
1393 enum {
c07f9fc5
FG
1394 DUMP_IN = 1, // only 'in' osds
1395 DUMP_OUT = 2, // only 'out' osds
1396 DUMP_UP = 4, // only 'up' osds
1397 DUMP_DOWN = 8, // only 'down' osds
1398 DUMP_DESTROYED = 16, // only 'destroyed' osds
31f18b77
FG
1399 };
1400 void print_tree(Formatter *f, ostream *out, unsigned dump_flags=0) const;
7c673cae
FG
1401
1402 int summarize_mapping_stats(
1403 OSDMap *newmap,
1404 const set<int64_t> *pools,
1405 std::string *out,
1406 Formatter *f) const;
1407
1408 string get_flag_string() const;
1409 static string get_flag_string(unsigned flags);
1410 static void dump_erasure_code_profiles(
1411 const mempool::osdmap::map<string,map<string,string> > &profiles,
1412 Formatter *f);
1413 void dump(Formatter *f) const;
1414 static void generate_test_instances(list<OSDMap*>& o);
1415 bool check_new_blacklist_entries() const { return new_blacklist_entries; }
224ce89b
WB
1416
1417 void check_health(health_check_map_t *checks) const;
35e4c445
FG
1418
1419 int parse_osd_id_list(const vector<string>& ls,
1420 set<int> *out,
1421 ostream *ss) const;
7c673cae
FG
1422};
1423WRITE_CLASS_ENCODER_FEATURES(OSDMap)
1424WRITE_CLASS_ENCODER_FEATURES(OSDMap::Incremental)
1425
1426typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
1427
1428inline ostream& operator<<(ostream& out, const OSDMap& m) {
1429 m.print_oneline_summary(out);
1430 return out;
1431}
1432
31f18b77
FG
1433class PGStatService;
1434
1435void print_osd_utilization(const OSDMap& osdmap,
1436 const PGStatService *pgstat,
1437 ostream& out,
1438 Formatter *f,
1439 bool tree);
7c673cae
FG
1440
1441#endif