]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com> | |
8 | * | |
9 | * Author: Loic Dachary <loic@dachary.org> | |
10 | * | |
11 | * This is free software; you can redistribute it and/or | |
12 | * modify it under the terms of the GNU Lesser General Public | |
13 | * License version 2.1, as published by the Free Software | |
14 | * Foundation. See file COPYING. | |
15 | * | |
16 | */ | |
17 | ||
18 | ||
19 | #ifndef CEPH_OSDMAP_H | |
20 | #define CEPH_OSDMAP_H | |
21 | ||
22 | /* | |
23 | * describe properties of the OSD cluster. | |
24 | * disks, disk groups, total # osds, | |
25 | * | |
26 | */ | |
7c673cae FG |
27 | #include <vector> |
28 | #include <list> | |
29 | #include <set> | |
30 | #include <map> | |
11fdf7f2 | 31 | #include <memory> |
9f95a23c TL |
32 | |
33 | #include <boost/smart_ptr/local_shared_ptr.hpp> | |
94b18763 | 34 | #include "include/btree_map.h" |
9f95a23c TL |
35 | #include "include/common_fwd.h" |
36 | #include "include/types.h" | |
37 | #include "common/ceph_releases.h" | |
38 | #include "osd_types.h" | |
39 | ||
40 | //#include "include/ceph_features.h" | |
41 | #include "crush/CrushWrapper.h" | |
7c673cae FG |
42 | |
43 | // forward declaration | |
7c673cae | 44 | class CrushWrapper; |
224ce89b | 45 | class health_check_map_t; |
7c673cae | 46 | |
7c673cae FG |
47 | /* |
48 | * we track up to two intervals during which the osd was alive and | |
49 | * healthy. the most recent is [up_from,up_thru), where up_thru is | |
50 | * the last epoch the osd is known to have _started_. i.e., a lower | |
51 | * bound on the actual osd death. down_at (if it is > up_from) is an | |
52 | * upper bound on the actual osd death. | |
53 | * | |
54 | * the second is the last_clean interval [first,last]. in that case, | |
55 | * the last interval is the last epoch known to have been either | |
56 | * _finished_, or during which the osd cleanly shut down. when | |
57 | * possible, we push this forward to the epoch the osd was eventually | |
58 | * marked down. | |
59 | * | |
60 | * the lost_at is used to allow build_prior to proceed without waiting | |
61 | * for an osd to recover. In certain cases, progress may be blocked | |
62 | * because an osd is down that may contain updates (i.e., a pg may have | |
63 | * gone rw during an interval). If the osd can't be brought online, we | |
64 | * can force things to proceed knowing that we _might_ be losing some | |
65 | * acked writes. If the osd comes back to life later, that's fine to, | |
66 | * but those writes will still be lost (the divergent objects will be | |
67 | * thrown out). | |
68 | */ | |
69 | struct osd_info_t { | |
70 | epoch_t last_clean_begin; // last interval that ended with a clean osd shutdown | |
71 | epoch_t last_clean_end; | |
72 | epoch_t up_from; // epoch osd marked up | |
73 | epoch_t up_thru; // lower bound on actual osd death (if > up_from) | |
74 | epoch_t down_at; // upper bound on actual osd death (if > up_from) | |
75 | epoch_t lost_at; // last epoch we decided data was "lost" | |
76 | ||
77 | osd_info_t() : last_clean_begin(0), last_clean_end(0), | |
78 | up_from(0), up_thru(0), down_at(0), lost_at(0) {} | |
79 | ||
9f95a23c TL |
80 | void dump(ceph::Formatter *f) const; |
81 | void encode(ceph::buffer::list& bl) const; | |
82 | void decode(ceph::buffer::list::const_iterator& bl); | |
83 | static void generate_test_instances(std::list<osd_info_t*>& o); | |
7c673cae FG |
84 | }; |
85 | WRITE_CLASS_ENCODER(osd_info_t) | |
86 | ||
9f95a23c | 87 | std::ostream& operator<<(std::ostream& out, const osd_info_t& info); |
7c673cae FG |
88 | |
89 | struct osd_xinfo_t { | |
90 | utime_t down_stamp; ///< timestamp when we were last marked down | |
91 | float laggy_probability; ///< encoded as __u32: 0 = definitely not laggy, 0xffffffff definitely laggy | |
92 | __u32 laggy_interval; ///< average interval between being marked laggy and recovering | |
93 | uint64_t features; ///< features supported by this osd we should know about | |
94 | __u32 old_weight; ///< weight prior to being auto marked out | |
9f95a23c TL |
95 | utime_t last_purged_snaps_scrub; ///< last scrub of purged_snaps |
96 | epoch_t dead_epoch = 0; ///< last epoch we were confirmed dead (not just down) | |
7c673cae FG |
97 | |
98 | osd_xinfo_t() : laggy_probability(0), laggy_interval(0), | |
99 | features(0), old_weight(0) {} | |
100 | ||
9f95a23c TL |
101 | void dump(ceph::Formatter *f) const; |
102 | void encode(ceph::buffer::list& bl, uint64_t features) const; | |
103 | void decode(ceph::buffer::list::const_iterator& bl); | |
104 | static void generate_test_instances(std::list<osd_xinfo_t*>& o); | |
7c673cae | 105 | }; |
9f95a23c | 106 | WRITE_CLASS_ENCODER_FEATURES(osd_xinfo_t) |
7c673cae | 107 | |
9f95a23c | 108 | std::ostream& operator<<(std::ostream& out, const osd_xinfo_t& xi); |
7c673cae FG |
109 | |
110 | ||
31f18b77 FG |
111 | struct PGTempMap { |
112 | #if 1 | |
9f95a23c | 113 | ceph::buffer::list data; |
eafe8130 | 114 | typedef btree::btree_map<pg_t,ceph_le32*> map_t; |
31f18b77 FG |
115 | map_t map; |
116 | ||
9f95a23c | 117 | void encode(ceph::buffer::list& bl) const { |
11fdf7f2 | 118 | using ceph::encode; |
31f18b77 | 119 | uint32_t n = map.size(); |
11fdf7f2 | 120 | encode(n, bl); |
31f18b77 | 121 | for (auto &p : map) { |
11fdf7f2 | 122 | encode(p.first, bl); |
eafe8130 | 123 | bl.append((char*)p.second, (*p.second + 1) * sizeof(ceph_le32)); |
31f18b77 FG |
124 | } |
125 | } | |
9f95a23c | 126 | void decode(ceph::buffer::list::const_iterator& p) { |
11fdf7f2 | 127 | using ceph::decode; |
31f18b77 FG |
128 | data.clear(); |
129 | map.clear(); | |
130 | uint32_t n; | |
11fdf7f2 | 131 | decode(n, p); |
31f18b77 FG |
132 | if (!n) |
133 | return; | |
11fdf7f2 | 134 | auto pstart = p; |
31f18b77 | 135 | size_t start_off = pstart.get_off(); |
9f95a23c | 136 | std::vector<std::pair<pg_t,size_t>> offsets; |
31f18b77 FG |
137 | offsets.resize(n); |
138 | for (unsigned i=0; i<n; ++i) { | |
139 | pg_t pgid; | |
11fdf7f2 | 140 | decode(pgid, p); |
31f18b77 FG |
141 | offsets[i].first = pgid; |
142 | offsets[i].second = p.get_off() - start_off; | |
143 | uint32_t vn; | |
11fdf7f2 | 144 | decode(vn, p); |
9f95a23c | 145 | p += vn * sizeof(int32_t); |
31f18b77 FG |
146 | } |
147 | size_t len = p.get_off() - start_off; | |
148 | pstart.copy(len, data); | |
149 | if (data.get_num_buffers() > 1) { | |
150 | data.rebuild(); | |
151 | } | |
152 | //map.reserve(n); | |
153 | char *start = data.c_str(); | |
154 | for (auto i : offsets) { | |
9f95a23c | 155 | map.insert(map.end(), std::make_pair(i.first, (ceph_le32*)(start + i.second))); |
31f18b77 FG |
156 | } |
157 | } | |
158 | void rebuild() { | |
9f95a23c | 159 | ceph::buffer::list bl; |
31f18b77 | 160 | encode(bl); |
11fdf7f2 | 161 | auto p = std::cbegin(bl); |
31f18b77 FG |
162 | decode(p); |
163 | } | |
164 | friend bool operator==(const PGTempMap& l, const PGTempMap& r) { | |
165 | return | |
166 | l.map.size() == r.map.size() && | |
167 | l.data.contents_equal(r.data); | |
168 | } | |
169 | ||
170 | class iterator { | |
171 | map_t::const_iterator it; | |
172 | map_t::const_iterator end; | |
9f95a23c | 173 | std::pair<pg_t,std::vector<int32_t>> current; |
31f18b77 FG |
174 | void init_current() { |
175 | if (it != end) { | |
176 | current.first = it->first; | |
11fdf7f2 | 177 | ceph_assert(it->second); |
31f18b77 | 178 | current.second.resize(*it->second); |
eafe8130 TL |
179 | ceph_le32 *p = it->second + 1; |
180 | for (uint32_t n = 0; n < *it->second; ++n, ++p) { | |
31f18b77 FG |
181 | current.second[n] = *p; |
182 | } | |
183 | } | |
184 | } | |
185 | public: | |
186 | iterator(map_t::const_iterator p, | |
187 | map_t::const_iterator e) | |
188 | : it(p), end(e) { | |
189 | init_current(); | |
190 | } | |
191 | ||
9f95a23c | 192 | const std::pair<pg_t,std::vector<int32_t>>& operator*() const { |
31f18b77 FG |
193 | return current; |
194 | } | |
9f95a23c | 195 | const std::pair<pg_t,std::vector<int32_t>>* operator->() const { |
31f18b77 FG |
196 | return ¤t; |
197 | } | |
198 | friend bool operator==(const iterator& l, const iterator& r) { | |
199 | return l.it == r.it; | |
200 | } | |
201 | friend bool operator!=(const iterator& l, const iterator& r) { | |
202 | return l.it != r.it; | |
203 | } | |
204 | iterator& operator++() { | |
205 | ++it; | |
206 | if (it != end) | |
207 | init_current(); | |
208 | return *this; | |
209 | } | |
210 | iterator operator++(int) { | |
211 | iterator r = *this; | |
212 | ++it; | |
213 | if (it != end) | |
214 | init_current(); | |
215 | return r; | |
216 | } | |
217 | }; | |
218 | iterator begin() const { | |
219 | return iterator(map.begin(), map.end()); | |
220 | } | |
221 | iterator end() const { | |
222 | return iterator(map.end(), map.end()); | |
223 | } | |
224 | iterator find(pg_t pgid) const { | |
225 | return iterator(map.find(pgid), map.end()); | |
226 | } | |
227 | size_t size() const { | |
228 | return map.size(); | |
229 | } | |
230 | size_t count(pg_t pgid) const { | |
231 | return map.count(pgid); | |
232 | } | |
233 | void erase(pg_t pgid) { | |
234 | map.erase(pgid); | |
235 | } | |
236 | void clear() { | |
237 | map.clear(); | |
238 | data.clear(); | |
239 | } | |
240 | void set(pg_t pgid, const mempool::osdmap::vector<int32_t>& v) { | |
11fdf7f2 | 241 | using ceph::encode; |
eafe8130 | 242 | size_t need = sizeof(ceph_le32) * (1 + v.size()); |
31f18b77 | 243 | if (need < data.get_append_buffer_unused_tail_length()) { |
9f95a23c | 244 | ceph::buffer::ptr z(data.get_append_buffer_unused_tail_length()); |
31f18b77 FG |
245 | z.zero(); |
246 | data.append(z.c_str(), z.length()); | |
247 | } | |
11fdf7f2 | 248 | encode(v, data); |
eafe8130 | 249 | map[pgid] = (ceph_le32*)(data.back().end_c_str()) - (1 + v.size()); |
31f18b77 FG |
250 | } |
251 | mempool::osdmap::vector<int32_t> get(pg_t pgid) { | |
252 | mempool::osdmap::vector<int32_t> v; | |
eafe8130 | 253 | ceph_le32 *p = map[pgid]; |
31f18b77 FG |
254 | size_t n = *p++; |
255 | v.resize(n); | |
256 | for (size_t i = 0; i < n; ++i, ++p) { | |
257 | v[i] = *p; | |
258 | } | |
259 | return v; | |
260 | } | |
261 | #else | |
262 | // trivial implementation | |
263 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t> > pg_temp; | |
264 | ||
9f95a23c | 265 | void encode(ceph::buffer::list& bl) const { |
11fdf7f2 | 266 | encode(pg_temp, bl); |
31f18b77 | 267 | } |
9f95a23c | 268 | void decode(ceph::buffer::list::const_iterator& p) { |
11fdf7f2 | 269 | decode(pg_temp, p); |
31f18b77 FG |
270 | } |
271 | friend bool operator==(const PGTempMap& l, const PGTempMap& r) { | |
272 | return | |
273 | l.pg_temp.size() == r.pg_temp.size() && | |
274 | l.pg_temp == r.pg_temp; | |
275 | } | |
276 | ||
277 | class iterator { | |
278 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t> >::const_iterator it; | |
279 | public: | |
280 | iterator(mempool::osdmap::map<pg_t, | |
281 | mempool::osdmap::vector<int32_t> >::const_iterator p) | |
282 | : it(p) {} | |
283 | ||
9f95a23c | 284 | std::pair<pg_t,const mempool::osdmap::vector<int32_t>&> operator*() const { |
31f18b77 FG |
285 | return *it; |
286 | } | |
9f95a23c | 287 | const std::pair<const pg_t,mempool::osdmap::vector<int32_t>>* operator->() const { |
31f18b77 FG |
288 | return &*it; |
289 | } | |
290 | friend bool operator==(const iterator& l, const iterator& r) { | |
291 | return l.it == r.it; | |
292 | } | |
293 | friend bool operator!=(const iterator& l, const iterator& r) { | |
294 | return l.it != r.it; | |
295 | } | |
296 | iterator& operator++() { | |
297 | ++it; | |
298 | return *this; | |
299 | } | |
300 | iterator operator++(int) { | |
301 | iterator r = *this; | |
302 | ++it; | |
303 | return r; | |
304 | } | |
305 | }; | |
306 | iterator begin() const { | |
307 | return iterator(pg_temp.cbegin()); | |
308 | } | |
309 | iterator end() const { | |
310 | return iterator(pg_temp.cend()); | |
311 | } | |
312 | iterator find(pg_t pgid) const { | |
313 | return iterator(pg_temp.find(pgid)); | |
314 | } | |
315 | size_t size() const { | |
316 | return pg_temp.size(); | |
317 | } | |
318 | size_t count(pg_t pgid) const { | |
319 | return pg_temp.count(pgid); | |
320 | } | |
321 | void erase(pg_t pgid) { | |
322 | pg_temp.erase(pgid); | |
323 | } | |
324 | void clear() { | |
325 | pg_temp.clear(); | |
326 | } | |
327 | void set(pg_t pgid, const mempool::osdmap::vector<int32_t>& v) { | |
328 | pg_temp[pgid] = v; | |
329 | } | |
330 | const mempool::osdmap::vector<int32_t>& get(pg_t pgid) { | |
331 | return pg_temp.at(pgid); | |
332 | } | |
333 | #endif | |
9f95a23c | 334 | void dump(ceph::Formatter *f) const { |
31f18b77 FG |
335 | for (const auto &pg : *this) { |
336 | f->open_object_section("osds"); | |
337 | f->dump_stream("pgid") << pg.first; | |
338 | f->open_array_section("osds"); | |
339 | for (const auto osd : pg.second) | |
340 | f->dump_int("osd", osd); | |
341 | f->close_section(); | |
342 | f->close_section(); | |
343 | } | |
344 | } | |
345 | }; | |
346 | WRITE_CLASS_ENCODER(PGTempMap) | |
347 | ||
7c673cae FG |
348 | /** OSDMap |
349 | */ | |
350 | class OSDMap { | |
351 | public: | |
352 | MEMPOOL_CLASS_HELPERS(); | |
353 | ||
354 | class Incremental { | |
355 | public: | |
356 | MEMPOOL_CLASS_HELPERS(); | |
357 | ||
358 | /// feature bits we were encoded with. the subsequent OSDMap | |
359 | /// encoding should match. | |
360 | uint64_t encode_features; | |
361 | uuid_d fsid; | |
362 | epoch_t epoch; // new epoch; we are a diff from epoch-1 to epoch | |
363 | utime_t modified; | |
364 | int64_t new_pool_max; //incremented by the OSDMonitor on each pool create | |
365 | int32_t new_flags; | |
9f95a23c | 366 | ceph_release_t new_require_osd_release{0xff}; |
7c673cae FG |
367 | |
368 | // full (rare) | |
9f95a23c TL |
369 | ceph::buffer::list fullmap; // in lieu of below. |
370 | ceph::buffer::list crush; | |
7c673cae FG |
371 | |
372 | // incremental | |
373 | int32_t new_max_osd; | |
374 | mempool::osdmap::map<int64_t,pg_pool_t> new_pools; | |
9f95a23c | 375 | mempool::osdmap::map<int64_t,std::string> new_pool_names; |
7c673cae | 376 | mempool::osdmap::set<int64_t> old_pools; |
9f95a23c TL |
377 | mempool::osdmap::map<std::string,std::map<std::string,std::string> > new_erasure_code_profiles; |
378 | mempool::osdmap::vector<std::string> old_erasure_code_profiles; | |
11fdf7f2 TL |
379 | mempool::osdmap::map<int32_t,entity_addrvec_t> new_up_client; |
380 | mempool::osdmap::map<int32_t,entity_addrvec_t> new_up_cluster; | |
31f18b77 | 381 | mempool::osdmap::map<int32_t,uint32_t> new_state; // XORed onto previous state. |
7c673cae FG |
382 | mempool::osdmap::map<int32_t,uint32_t> new_weight; |
383 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t> > new_pg_temp; // [] to remove | |
384 | mempool::osdmap::map<pg_t, int32_t> new_primary_temp; // [-1] to remove | |
385 | mempool::osdmap::map<int32_t,uint32_t> new_primary_affinity; | |
386 | mempool::osdmap::map<int32_t,epoch_t> new_up_thru; | |
9f95a23c | 387 | mempool::osdmap::map<int32_t,std::pair<epoch_t,epoch_t> > new_last_clean_interval; |
7c673cae FG |
388 | mempool::osdmap::map<int32_t,epoch_t> new_lost; |
389 | mempool::osdmap::map<int32_t,uuid_d> new_uuid; | |
390 | mempool::osdmap::map<int32_t,osd_xinfo_t> new_xinfo; | |
391 | ||
392 | mempool::osdmap::map<entity_addr_t,utime_t> new_blacklist; | |
393 | mempool::osdmap::vector<entity_addr_t> old_blacklist; | |
11fdf7f2 TL |
394 | mempool::osdmap::map<int32_t, entity_addrvec_t> new_hb_back_up; |
395 | mempool::osdmap::map<int32_t, entity_addrvec_t> new_hb_front_up; | |
7c673cae FG |
396 | |
397 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t>> new_pg_upmap; | |
9f95a23c | 398 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<std::pair<int32_t,int32_t>>> new_pg_upmap_items; |
7c673cae | 399 | mempool::osdmap::set<pg_t> old_pg_upmap, old_pg_upmap_items; |
11fdf7f2 TL |
400 | mempool::osdmap::map<int64_t, snap_interval_set_t> new_removed_snaps; |
401 | mempool::osdmap::map<int64_t, snap_interval_set_t> new_purged_snaps; | |
7c673cae | 402 | |
81eedcae TL |
403 | mempool::osdmap::map<int32_t,uint32_t> new_crush_node_flags; |
404 | mempool::osdmap::map<int32_t,uint32_t> new_device_class_flags; | |
405 | ||
9f95a23c | 406 | std::string cluster_snapshot; |
7c673cae FG |
407 | |
408 | float new_nearfull_ratio = -1; | |
409 | float new_backfillfull_ratio = -1; | |
410 | float new_full_ratio = -1; | |
411 | ||
9f95a23c | 412 | ceph_release_t new_require_min_compat_client{0xff}; |
7c673cae | 413 | |
11fdf7f2 TL |
414 | utime_t new_last_up_change, new_last_in_change; |
415 | ||
7c673cae FG |
416 | mutable bool have_crc; ///< crc values are defined |
417 | uint32_t full_crc; ///< crc of the resulting OSDMap | |
418 | mutable uint32_t inc_crc; ///< crc of this incremental | |
419 | ||
420 | int get_net_marked_out(const OSDMap *previous) const; | |
421 | int get_net_marked_down(const OSDMap *previous) const; | |
422 | int identify_osd(uuid_d u) const; | |
423 | ||
9f95a23c TL |
424 | void encode_client_old(ceph::buffer::list& bl) const; |
425 | void encode_classic(ceph::buffer::list& bl, uint64_t features) const; | |
426 | void encode(ceph::buffer::list& bl, uint64_t features=CEPH_FEATURES_ALL) const; | |
427 | void decode_classic(ceph::buffer::list::const_iterator &p); | |
428 | void decode(ceph::buffer::list::const_iterator &bl); | |
429 | void dump(ceph::Formatter *f) const; | |
430 | static void generate_test_instances(std::list<Incremental*>& o); | |
7c673cae FG |
431 | |
432 | explicit Incremental(epoch_t e=0) : | |
433 | encode_features(0), | |
434 | epoch(e), new_pool_max(-1), new_flags(-1), new_max_osd(-1), | |
435 | have_crc(false), full_crc(0), inc_crc(0) { | |
7c673cae | 436 | } |
9f95a23c | 437 | explicit Incremental(ceph::buffer::list &bl) { |
11fdf7f2 | 438 | auto p = std::cbegin(bl); |
7c673cae FG |
439 | decode(p); |
440 | } | |
9f95a23c | 441 | explicit Incremental(ceph::buffer::list::const_iterator &p) { |
7c673cae FG |
442 | decode(p); |
443 | } | |
444 | ||
445 | pg_pool_t *get_new_pool(int64_t pool, const pg_pool_t *orig) { | |
446 | if (new_pools.count(pool) == 0) | |
447 | new_pools[pool] = *orig; | |
448 | return &new_pools[pool]; | |
449 | } | |
9f95a23c | 450 | bool has_erasure_code_profile(const std::string &name) const { |
7c673cae FG |
451 | auto i = new_erasure_code_profiles.find(name); |
452 | return i != new_erasure_code_profiles.end(); | |
453 | } | |
9f95a23c TL |
454 | void set_erasure_code_profile(const std::string &name, |
455 | const std::map<std::string,std::string>& profile) { | |
7c673cae FG |
456 | new_erasure_code_profiles[name] = profile; |
457 | } | |
9f95a23c | 458 | mempool::osdmap::map<std::string,std::map<std::string,std::string>> get_erasure_code_profiles() const { |
11fdf7f2 TL |
459 | return new_erasure_code_profiles; |
460 | } | |
7c673cae | 461 | |
11fdf7f2 | 462 | /// propagate update pools' snap metadata to any of their tiers |
7c673cae | 463 | int propagate_snaps_to_tiers(CephContext *cct, const OSDMap &base); |
31f18b77 FG |
464 | |
465 | /// filter out osds with any pending state changing | |
9f95a23c | 466 | size_t get_pending_state_osds(std::vector<int> *osds) { |
11fdf7f2 | 467 | ceph_assert(osds); |
31f18b77 FG |
468 | osds->clear(); |
469 | ||
470 | for (auto &p : new_state) { | |
471 | osds->push_back(p.first); | |
472 | } | |
473 | ||
474 | return osds->size(); | |
475 | } | |
476 | ||
477 | bool pending_osd_has_state(int osd, unsigned state) { | |
478 | return new_state.count(osd) && (new_state[osd] & state) != 0; | |
479 | } | |
480 | ||
81eedcae TL |
481 | bool pending_osd_state_set(int osd, unsigned state) { |
482 | if (pending_osd_has_state(osd, state)) | |
483 | return false; | |
31f18b77 | 484 | new_state[osd] |= state; |
81eedcae | 485 | return true; |
31f18b77 FG |
486 | } |
487 | ||
488 | // cancel the specified pending osd state if there is any | |
489 | // return ture on success, false otherwise. | |
490 | bool pending_osd_state_clear(int osd, unsigned state) { | |
491 | if (!pending_osd_has_state(osd, state)) { | |
492 | // never has been set or already has been cancelled. | |
493 | return false; | |
494 | } | |
495 | ||
496 | new_state[osd] &= ~state; | |
11fdf7f2 TL |
497 | if (!new_state[osd]) { |
498 | // all flags cleared | |
499 | new_state.erase(osd); | |
500 | } | |
31f18b77 FG |
501 | return true; |
502 | } | |
503 | ||
9f95a23c TL |
504 | bool in_new_removed_snaps(int64_t pool, snapid_t snap) const { |
505 | auto p = new_removed_snaps.find(pool); | |
506 | if (p == new_removed_snaps.end()) { | |
507 | return false; | |
508 | } | |
509 | return p->second.contains(snap); | |
510 | } | |
7c673cae FG |
511 | }; |
512 | ||
513 | private: | |
514 | uuid_d fsid; | |
515 | epoch_t epoch; // what epoch of the osd cluster descriptor is this | |
516 | utime_t created, modified; // epoch start time | |
517 | int32_t pool_max; // the largest pool num, ever | |
518 | ||
519 | uint32_t flags; | |
520 | ||
521 | int num_osd; // not saved; see calc_num_osds | |
522 | int num_up_osd; // not saved; see calc_num_osds | |
523 | int num_in_osd; // not saved; see calc_num_osds | |
524 | ||
525 | int32_t max_osd; | |
9f95a23c | 526 | std::vector<uint32_t> osd_state; |
7c673cae | 527 | |
81eedcae TL |
528 | mempool::osdmap::map<int32_t,uint32_t> crush_node_flags; // crush node -> CEPH_OSD_* flags |
529 | mempool::osdmap::map<int32_t,uint32_t> device_class_flags; // device class -> CEPH_OSD_* flags | |
530 | ||
11fdf7f2 TL |
531 | utime_t last_up_change, last_in_change; |
532 | ||
28e407b8 AA |
533 | // These features affect OSDMap[::Incremental] encoding, or the |
534 | // encoding of some type embedded therein (CrushWrapper, something | |
535 | // from osd_types, etc.). | |
536 | static constexpr uint64_t SIGNIFICANT_FEATURES = | |
537 | CEPH_FEATUREMASK_PGID64 | | |
538 | CEPH_FEATUREMASK_PGPOOL3 | | |
539 | CEPH_FEATUREMASK_OSDENC | | |
540 | CEPH_FEATUREMASK_OSDMAP_ENC | | |
541 | CEPH_FEATUREMASK_OSD_POOLRESEND | | |
542 | CEPH_FEATUREMASK_NEW_OSDOP_ENCODING | | |
543 | CEPH_FEATUREMASK_MSG_ADDR2 | | |
544 | CEPH_FEATUREMASK_CRUSH_TUNABLES5 | | |
545 | CEPH_FEATUREMASK_CRUSH_CHOOSE_ARGS | | |
11fdf7f2 TL |
546 | CEPH_FEATUREMASK_SERVER_LUMINOUS | |
547 | CEPH_FEATUREMASK_SERVER_MIMIC | | |
9f95a23c TL |
548 | CEPH_FEATUREMASK_SERVER_NAUTILUS | |
549 | CEPH_FEATUREMASK_SERVER_OCTOPUS; | |
11fdf7f2 | 550 | |
7c673cae | 551 | struct addrs_s { |
11fdf7f2 TL |
552 | mempool::osdmap::vector<std::shared_ptr<entity_addrvec_t> > client_addrs; |
553 | mempool::osdmap::vector<std::shared_ptr<entity_addrvec_t> > cluster_addrs; | |
554 | mempool::osdmap::vector<std::shared_ptr<entity_addrvec_t> > hb_back_addrs; | |
555 | mempool::osdmap::vector<std::shared_ptr<entity_addrvec_t> > hb_front_addrs; | |
7c673cae | 556 | }; |
11fdf7f2 TL |
557 | std::shared_ptr<addrs_s> osd_addrs; |
558 | ||
559 | entity_addrvec_t _blank_addrvec; | |
7c673cae FG |
560 | |
561 | mempool::osdmap::vector<__u32> osd_weight; // 16.16 fixed point, 0x10000 = "in", 0 = "out" | |
562 | mempool::osdmap::vector<osd_info_t> osd_info; | |
11fdf7f2 TL |
563 | std::shared_ptr<PGTempMap> pg_temp; // temp pg mapping (e.g. while we rebuild) |
564 | std::shared_ptr< mempool::osdmap::map<pg_t,int32_t > > primary_temp; // temp primary mapping (e.g. while we rebuild) | |
565 | std::shared_ptr< mempool::osdmap::vector<__u32> > osd_primary_affinity; ///< 16.16 fixed point, 0x10000 = baseline | |
7c673cae FG |
566 | |
567 | // remap (post-CRUSH, pre-up) | |
568 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t>> pg_upmap; ///< remap pg | |
9f95a23c | 569 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<std::pair<int32_t,int32_t>>> pg_upmap_items; ///< remap osds in up set |
7c673cae FG |
570 | |
571 | mempool::osdmap::map<int64_t,pg_pool_t> pools; | |
9f95a23c TL |
572 | mempool::osdmap::map<int64_t,std::string> pool_name; |
573 | mempool::osdmap::map<std::string, std::map<std::string,std::string>> erasure_code_profiles; | |
574 | mempool::osdmap::map<std::string,int64_t> name_pool; | |
7c673cae | 575 | |
11fdf7f2 | 576 | std::shared_ptr< mempool::osdmap::vector<uuid_d> > osd_uuid; |
7c673cae FG |
577 | mempool::osdmap::vector<osd_xinfo_t> osd_xinfo; |
578 | ||
579 | mempool::osdmap::unordered_map<entity_addr_t,utime_t> blacklist; | |
580 | ||
11fdf7f2 TL |
581 | /// queue of snaps to remove |
582 | mempool::osdmap::map<int64_t, snap_interval_set_t> removed_snaps_queue; | |
583 | ||
584 | /// removed_snaps additions this epoch | |
585 | mempool::osdmap::map<int64_t, snap_interval_set_t> new_removed_snaps; | |
586 | ||
587 | /// removed_snaps removals this epoch | |
588 | mempool::osdmap::map<int64_t, snap_interval_set_t> new_purged_snaps; | |
589 | ||
7c673cae | 590 | epoch_t cluster_snapshot_epoch; |
9f95a23c | 591 | std::string cluster_snapshot; |
7c673cae FG |
592 | bool new_blacklist_entries; |
593 | ||
594 | float full_ratio = 0, backfillfull_ratio = 0, nearfull_ratio = 0; | |
595 | ||
596 | /// min compat client we want to support | |
9f95a23c | 597 | ceph_release_t require_min_compat_client{ceph_release_t::unknown}; |
7c673cae | 598 | |
31f18b77 FG |
599 | public: |
600 | /// require osds to run at least this release | |
9f95a23c | 601 | ceph_release_t require_osd_release{ceph_release_t::unknown}; |
31f18b77 FG |
602 | |
603 | private: | |
7c673cae FG |
604 | mutable uint64_t cached_up_osd_features; |
605 | ||
606 | mutable bool crc_defined; | |
607 | mutable uint32_t crc; | |
608 | ||
609 | void _calc_up_osd_features(); | |
610 | ||
611 | public: | |
612 | bool have_crc() const { return crc_defined; } | |
613 | uint32_t get_crc() const { return crc; } | |
614 | ||
11fdf7f2 | 615 | std::shared_ptr<CrushWrapper> crush; // hierarchical map |
31f18b77 FG |
616 | private: |
617 | uint32_t crush_version = 1; | |
7c673cae FG |
618 | |
619 | friend class OSDMonitor; | |
620 | ||
621 | public: | |
622 | OSDMap() : epoch(0), | |
224ce89b | 623 | pool_max(0), |
7c673cae FG |
624 | flags(0), |
625 | num_osd(0), num_up_osd(0), num_in_osd(0), | |
626 | max_osd(0), | |
627 | osd_addrs(std::make_shared<addrs_s>()), | |
31f18b77 | 628 | pg_temp(std::make_shared<PGTempMap>()), |
7c673cae FG |
629 | primary_temp(std::make_shared<mempool::osdmap::map<pg_t,int32_t>>()), |
630 | osd_uuid(std::make_shared<mempool::osdmap::vector<uuid_d>>()), | |
631 | cluster_snapshot_epoch(0), | |
632 | new_blacklist_entries(false), | |
633 | cached_up_osd_features(0), | |
634 | crc_defined(false), crc(0), | |
635 | crush(std::make_shared<CrushWrapper>()) { | |
7c673cae FG |
636 | } |
637 | ||
7c673cae FG |
638 | private: |
639 | OSDMap(const OSDMap& other) = default; | |
640 | OSDMap& operator=(const OSDMap& other) = default; | |
641 | public: | |
642 | ||
28e407b8 AA |
643 | /// return feature mask subset that is relevant to OSDMap encoding |
644 | static uint64_t get_significant_features(uint64_t features) { | |
645 | return SIGNIFICANT_FEATURES & features; | |
646 | } | |
647 | ||
648 | uint64_t get_encoding_features() const; | |
649 | ||
7c673cae FG |
650 | void deepish_copy_from(const OSDMap& o) { |
651 | *this = o; | |
652 | primary_temp.reset(new mempool::osdmap::map<pg_t,int32_t>(*o.primary_temp)); | |
31f18b77 | 653 | pg_temp.reset(new PGTempMap(*o.pg_temp)); |
7c673cae FG |
654 | osd_uuid.reset(new mempool::osdmap::vector<uuid_d>(*o.osd_uuid)); |
655 | ||
656 | if (o.osd_primary_affinity) | |
657 | osd_primary_affinity.reset(new mempool::osdmap::vector<__u32>(*o.osd_primary_affinity)); | |
658 | ||
11fdf7f2 | 659 | // NOTE: this still references shared entity_addrvec_t's. |
7c673cae FG |
660 | osd_addrs.reset(new addrs_s(*o.osd_addrs)); |
661 | ||
662 | // NOTE: we do not copy crush. note that apply_incremental will | |
663 | // allocate a new CrushWrapper, though. | |
664 | } | |
665 | ||
666 | // map info | |
667 | const uuid_d& get_fsid() const { return fsid; } | |
668 | void set_fsid(uuid_d& f) { fsid = f; } | |
669 | ||
670 | epoch_t get_epoch() const { return epoch; } | |
671 | void inc_epoch() { epoch++; } | |
672 | ||
673 | void set_epoch(epoch_t e); | |
674 | ||
31f18b77 FG |
675 | uint32_t get_crush_version() const { |
676 | return crush_version; | |
677 | } | |
678 | ||
7c673cae FG |
679 | /* stamps etc */ |
680 | const utime_t& get_created() const { return created; } | |
681 | const utime_t& get_modified() const { return modified; } | |
682 | ||
683 | bool is_blacklisted(const entity_addr_t& a) const; | |
11fdf7f2 | 684 | bool is_blacklisted(const entity_addrvec_t& a) const; |
9f95a23c | 685 | void get_blacklist(std::list<std::pair<entity_addr_t,utime_t > > *bl) const; |
31f18b77 | 686 | void get_blacklist(std::set<entity_addr_t> *bl) const; |
7c673cae | 687 | |
9f95a23c | 688 | std::string get_cluster_snapshot() const { |
7c673cae FG |
689 | if (cluster_snapshot_epoch == epoch) |
690 | return cluster_snapshot; | |
9f95a23c | 691 | return std::string(); |
7c673cae FG |
692 | } |
693 | ||
694 | float get_full_ratio() const { | |
695 | return full_ratio; | |
696 | } | |
697 | float get_backfillfull_ratio() const { | |
698 | return backfillfull_ratio; | |
699 | } | |
700 | float get_nearfull_ratio() const { | |
701 | return nearfull_ratio; | |
702 | } | |
3efd9988 | 703 | void get_full_pools(CephContext *cct, |
9f95a23c TL |
704 | std::set<int64_t> *full, |
705 | std::set<int64_t> *backfillfull, | |
706 | std::set<int64_t> *nearfull) const; | |
707 | void get_full_osd_counts(std::set<int> *full, std::set<int> *backfill, | |
708 | std::set<int> *nearfull) const; | |
31f18b77 FG |
709 | |
710 | ||
7c673cae FG |
711 | /***** cluster state *****/ |
712 | /* osds */ | |
713 | int get_max_osd() const { return max_osd; } | |
714 | void set_max_osd(int m); | |
715 | ||
716 | unsigned get_num_osds() const { | |
717 | return num_osd; | |
718 | } | |
719 | unsigned get_num_up_osds() const { | |
720 | return num_up_osd; | |
721 | } | |
722 | unsigned get_num_in_osds() const { | |
723 | return num_in_osd; | |
724 | } | |
725 | /// recalculate cached values for get_num{,_up,_in}_osds | |
726 | int calc_num_osds(); | |
727 | ||
9f95a23c TL |
728 | void get_all_osds(std::set<int32_t>& ls) const; |
729 | void get_up_osds(std::set<int32_t>& ls) const; | |
81eedcae | 730 | void get_out_existing_osds(std::set<int32_t>& ls) const; |
7c673cae FG |
731 | unsigned get_num_pg_temp() const { |
732 | return pg_temp->size(); | |
733 | } | |
734 | ||
735 | int get_flags() const { return flags; } | |
736 | bool test_flag(int f) const { return flags & f; } | |
737 | void set_flag(int f) { flags |= f; } | |
738 | void clear_flag(int f) { flags &= ~f; } | |
739 | ||
9f95a23c | 740 | void get_flag_set(std::set<std::string> *flagset) const; |
11fdf7f2 | 741 | |
9f95a23c | 742 | static void calc_state_set(int state, std::set<std::string>& st); |
7c673cae FG |
743 | |
744 | int get_state(int o) const { | |
11fdf7f2 | 745 | ceph_assert(o < max_osd); |
7c673cae FG |
746 | return osd_state[o]; |
747 | } | |
9f95a23c | 748 | int get_state(int o, std::set<std::string>& st) const { |
11fdf7f2 | 749 | ceph_assert(o < max_osd); |
7c673cae FG |
750 | unsigned t = osd_state[o]; |
751 | calc_state_set(t, st); | |
752 | return osd_state[o]; | |
753 | } | |
754 | void set_state(int o, unsigned s) { | |
11fdf7f2 | 755 | ceph_assert(o < max_osd); |
7c673cae FG |
756 | osd_state[o] = s; |
757 | } | |
758 | void set_weight(int o, unsigned w) { | |
11fdf7f2 | 759 | ceph_assert(o < max_osd); |
7c673cae FG |
760 | osd_weight[o] = w; |
761 | if (w) | |
762 | osd_state[o] |= CEPH_OSD_EXISTS; | |
763 | } | |
764 | unsigned get_weight(int o) const { | |
11fdf7f2 | 765 | ceph_assert(o < max_osd); |
7c673cae FG |
766 | return osd_weight[o]; |
767 | } | |
768 | float get_weightf(int o) const { | |
769 | return (float)get_weight(o) / (float)CEPH_OSD_IN; | |
770 | } | |
9f95a23c | 771 | void adjust_osd_weights(const std::map<int,double>& weights, Incremental& inc) const; |
7c673cae FG |
772 | |
773 | void set_primary_affinity(int o, int w) { | |
11fdf7f2 | 774 | ceph_assert(o < max_osd); |
7c673cae FG |
775 | if (!osd_primary_affinity) |
776 | osd_primary_affinity.reset( | |
777 | new mempool::osdmap::vector<__u32>( | |
778 | max_osd, CEPH_OSD_DEFAULT_PRIMARY_AFFINITY)); | |
779 | (*osd_primary_affinity)[o] = w; | |
780 | } | |
781 | unsigned get_primary_affinity(int o) const { | |
11fdf7f2 | 782 | ceph_assert(o < max_osd); |
7c673cae FG |
783 | if (!osd_primary_affinity) |
784 | return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; | |
785 | return (*osd_primary_affinity)[o]; | |
786 | } | |
787 | float get_primary_affinityf(int o) const { | |
788 | return (float)get_primary_affinity(o) / (float)CEPH_OSD_MAX_PRIMARY_AFFINITY; | |
789 | } | |
790 | ||
9f95a23c | 791 | bool has_erasure_code_profile(const std::string &name) const { |
7c673cae FG |
792 | auto i = erasure_code_profiles.find(name); |
793 | return i != erasure_code_profiles.end(); | |
794 | } | |
795 | int get_erasure_code_profile_default(CephContext *cct, | |
9f95a23c TL |
796 | std::map<std::string,std::string> &profile_map, |
797 | std::ostream *ss); | |
798 | void set_erasure_code_profile(const std::string &name, | |
799 | const std::map<std::string,std::string>& profile) { | |
7c673cae FG |
800 | erasure_code_profiles[name] = profile; |
801 | } | |
9f95a23c TL |
802 | const std::map<std::string,std::string> &get_erasure_code_profile( |
803 | const std::string &name) const { | |
804 | static std::map<std::string,std::string> empty; | |
7c673cae FG |
805 | auto i = erasure_code_profiles.find(name); |
806 | if (i == erasure_code_profiles.end()) | |
807 | return empty; | |
808 | else | |
809 | return i->second; | |
810 | } | |
9f95a23c | 811 | const mempool::osdmap::map<std::string,std::map<std::string,std::string>> &get_erasure_code_profiles() const { |
7c673cae FG |
812 | return erasure_code_profiles; |
813 | } | |
814 | ||
815 | bool exists(int osd) const { | |
816 | //assert(osd >= 0); | |
817 | return osd >= 0 && osd < max_osd && (osd_state[osd] & CEPH_OSD_EXISTS); | |
818 | } | |
819 | ||
31f18b77 FG |
820 | bool is_destroyed(int osd) const { |
821 | return exists(osd) && (osd_state[osd] & CEPH_OSD_DESTROYED); | |
822 | } | |
823 | ||
7c673cae FG |
824 | bool is_up(int osd) const { |
825 | return exists(osd) && (osd_state[osd] & CEPH_OSD_UP); | |
826 | } | |
827 | ||
828 | bool has_been_up_since(int osd, epoch_t epoch) const { | |
829 | return is_up(osd) && get_up_from(osd) <= epoch; | |
830 | } | |
831 | ||
832 | bool is_down(int osd) const { | |
833 | return !is_up(osd); | |
834 | } | |
835 | ||
9f95a23c TL |
836 | bool is_stop(int osd) const { |
837 | return exists(osd) && is_down(osd) && | |
838 | (osd_state[osd] & CEPH_OSD_STOP); | |
839 | } | |
840 | ||
7c673cae FG |
841 | bool is_out(int osd) const { |
842 | return !exists(osd) || get_weight(osd) == CEPH_OSD_OUT; | |
843 | } | |
844 | ||
845 | bool is_in(int osd) const { | |
846 | return !is_out(osd); | |
847 | } | |
848 | ||
9f95a23c TL |
849 | bool is_dead(int osd) const { |
850 | if (!exists(osd)) { | |
851 | return false; // unclear if they know they are removed from map | |
852 | } | |
853 | return get_xinfo(osd).dead_epoch > get_info(osd).up_from; | |
854 | } | |
855 | ||
81eedcae TL |
856 | unsigned get_osd_crush_node_flags(int osd) const; |
857 | unsigned get_crush_node_flags(int id) const; | |
858 | unsigned get_device_class_flags(int id) const; | |
859 | ||
860 | bool is_noup_by_osd(int osd) const { | |
31f18b77 FG |
861 | return exists(osd) && (osd_state[osd] & CEPH_OSD_NOUP); |
862 | } | |
863 | ||
81eedcae | 864 | bool is_nodown_by_osd(int osd) const { |
31f18b77 FG |
865 | return exists(osd) && (osd_state[osd] & CEPH_OSD_NODOWN); |
866 | } | |
867 | ||
81eedcae | 868 | bool is_noin_by_osd(int osd) const { |
31f18b77 FG |
869 | return exists(osd) && (osd_state[osd] & CEPH_OSD_NOIN); |
870 | } | |
871 | ||
81eedcae | 872 | bool is_noout_by_osd(int osd) const { |
31f18b77 FG |
873 | return exists(osd) && (osd_state[osd] & CEPH_OSD_NOOUT); |
874 | } | |
875 | ||
81eedcae TL |
876 | bool is_noup(int osd) const { |
877 | if (test_flag(CEPH_OSDMAP_NOUP)) // global? | |
878 | return true; | |
879 | if (is_noup_by_osd(osd)) // by osd? | |
880 | return true; | |
881 | if (get_osd_crush_node_flags(osd) & CEPH_OSD_NOUP) // by crush-node? | |
882 | return true; | |
883 | if (auto class_id = crush->get_item_class_id(osd); class_id >= 0 && | |
884 | get_device_class_flags(class_id) & CEPH_OSD_NOUP) // by device-class? | |
885 | return true; | |
886 | return false; | |
31f18b77 FG |
887 | } |
888 | ||
81eedcae TL |
889 | bool is_nodown(int osd) const { |
890 | if (test_flag(CEPH_OSDMAP_NODOWN)) | |
891 | return true; | |
892 | if (is_nodown_by_osd(osd)) | |
893 | return true; | |
894 | if (get_osd_crush_node_flags(osd) & CEPH_OSD_NODOWN) | |
895 | return true; | |
896 | if (auto class_id = crush->get_item_class_id(osd); class_id >= 0 && | |
897 | get_device_class_flags(class_id) & CEPH_OSD_NODOWN) | |
898 | return true; | |
899 | return false; | |
31f18b77 FG |
900 | } |
901 | ||
81eedcae TL |
902 | bool is_noin(int osd) const { |
903 | if (test_flag(CEPH_OSDMAP_NOIN)) | |
904 | return true; | |
905 | if (is_noin_by_osd(osd)) | |
906 | return true; | |
907 | if (get_osd_crush_node_flags(osd) & CEPH_OSD_NOIN) | |
908 | return true; | |
909 | if (auto class_id = crush->get_item_class_id(osd); class_id >= 0 && | |
910 | get_device_class_flags(class_id) & CEPH_OSD_NOIN) | |
911 | return true; | |
912 | return false; | |
31f18b77 FG |
913 | } |
914 | ||
81eedcae TL |
915 | bool is_noout(int osd) const { |
916 | if (test_flag(CEPH_OSDMAP_NOOUT)) | |
917 | return true; | |
918 | if (is_noout_by_osd(osd)) | |
919 | return true; | |
920 | if (get_osd_crush_node_flags(osd) & CEPH_OSD_NOOUT) | |
921 | return true; | |
922 | if (auto class_id = crush->get_item_class_id(osd); class_id >= 0 && | |
923 | get_device_class_flags(class_id) & CEPH_OSD_NOOUT) | |
924 | return true; | |
925 | return false; | |
31f18b77 FG |
926 | } |
927 | ||
7c673cae FG |
928 | /** |
929 | * check if an entire crush subtree is down | |
930 | */ | |
9f95a23c TL |
931 | bool subtree_is_down(int id, std::set<int> *down_cache) const; |
932 | bool containing_subtree_is_down(CephContext *cct, int osd, int subtree_type, std::set<int> *down_cache) const; | |
933 | ||
934 | bool subtree_type_is_down(CephContext *cct, int id, int subtree_type, std::set<int> *down_in_osds, std::set<int> *up_in_osds, | |
935 | std::set<int> *subtree_up, std::unordered_map<int, std::set<int> > *subtree_type_down) const; | |
31f18b77 | 936 | |
7c673cae FG |
937 | int identify_osd(const entity_addr_t& addr) const; |
938 | int identify_osd(const uuid_d& u) const; | |
939 | int identify_osd_on_all_channels(const entity_addr_t& addr) const; | |
940 | ||
941 | bool have_addr(const entity_addr_t& addr) const { | |
942 | return identify_osd(addr) >= 0; | |
943 | } | |
944 | int find_osd_on_ip(const entity_addr_t& ip) const; | |
11fdf7f2 TL |
945 | |
946 | const entity_addrvec_t& get_addrs(int osd) const { | |
947 | ceph_assert(exists(osd)); | |
948 | return osd_addrs->client_addrs[osd] ? | |
949 | *osd_addrs->client_addrs[osd] : _blank_addrvec; | |
7c673cae | 950 | } |
11fdf7f2 TL |
951 | const entity_addrvec_t& get_most_recent_addrs(int osd) const { |
952 | return get_addrs(osd); | |
7c673cae | 953 | } |
11fdf7f2 TL |
954 | const entity_addrvec_t &get_cluster_addrs(int osd) const { |
955 | ceph_assert(exists(osd)); | |
956 | return osd_addrs->cluster_addrs[osd] ? | |
957 | *osd_addrs->cluster_addrs[osd] : _blank_addrvec; | |
7c673cae | 958 | } |
11fdf7f2 TL |
959 | const entity_addrvec_t &get_hb_back_addrs(int osd) const { |
960 | ceph_assert(exists(osd)); | |
961 | return osd_addrs->hb_back_addrs[osd] ? | |
962 | *osd_addrs->hb_back_addrs[osd] : _blank_addrvec; | |
7c673cae | 963 | } |
11fdf7f2 TL |
964 | const entity_addrvec_t &get_hb_front_addrs(int osd) const { |
965 | ceph_assert(exists(osd)); | |
966 | return osd_addrs->hb_front_addrs[osd] ? | |
967 | *osd_addrs->hb_front_addrs[osd] : _blank_addrvec; | |
7c673cae FG |
968 | } |
969 | ||
970 | const uuid_d& get_uuid(int osd) const { | |
11fdf7f2 | 971 | ceph_assert(exists(osd)); |
7c673cae FG |
972 | return (*osd_uuid)[osd]; |
973 | } | |
974 | ||
975 | const epoch_t& get_up_from(int osd) const { | |
11fdf7f2 | 976 | ceph_assert(exists(osd)); |
7c673cae FG |
977 | return osd_info[osd].up_from; |
978 | } | |
979 | const epoch_t& get_up_thru(int osd) const { | |
11fdf7f2 | 980 | ceph_assert(exists(osd)); |
7c673cae FG |
981 | return osd_info[osd].up_thru; |
982 | } | |
983 | const epoch_t& get_down_at(int osd) const { | |
11fdf7f2 | 984 | ceph_assert(exists(osd)); |
7c673cae FG |
985 | return osd_info[osd].down_at; |
986 | } | |
987 | const osd_info_t& get_info(int osd) const { | |
11fdf7f2 | 988 | ceph_assert(osd < max_osd); |
7c673cae FG |
989 | return osd_info[osd]; |
990 | } | |
991 | ||
992 | const osd_xinfo_t& get_xinfo(int osd) const { | |
11fdf7f2 | 993 | ceph_assert(osd < max_osd); |
7c673cae FG |
994 | return osd_xinfo[osd]; |
995 | } | |
996 | ||
997 | int get_next_up_osd_after(int n) const { | |
998 | if (get_max_osd() == 0) | |
999 | return -1; | |
1000 | for (int i = n + 1; i != n; ++i) { | |
1001 | if (i >= get_max_osd()) | |
1002 | i = 0; | |
1003 | if (i == n) | |
1004 | break; | |
1005 | if (is_up(i)) | |
1006 | return i; | |
1007 | } | |
1008 | return -1; | |
1009 | } | |
1010 | ||
1011 | int get_previous_up_osd_before(int n) const { | |
1012 | if (get_max_osd() == 0) | |
1013 | return -1; | |
1014 | for (int i = n - 1; i != n; --i) { | |
1015 | if (i < 0) | |
1016 | i = get_max_osd() - 1; | |
1017 | if (i == n) | |
1018 | break; | |
1019 | if (is_up(i)) | |
1020 | return i; | |
1021 | } | |
1022 | return -1; | |
1023 | } | |
1024 | ||
11fdf7f2 TL |
1025 | |
1026 | void get_random_up_osds_by_subtree(int n, // whoami | |
9f95a23c | 1027 | std::string &subtree, |
11fdf7f2 | 1028 | int limit, // how many |
9f95a23c TL |
1029 | std::set<int> skip, |
1030 | std::set<int> *want) const; | |
11fdf7f2 | 1031 | |
7c673cae FG |
1032 | /** |
1033 | * get feature bits required by the current structure | |
1034 | * | |
1035 | * @param entity_type [in] what entity type we are asking about | |
9f95a23c | 1036 | * @param mask [out] std::set of all possible map-related features we could std::set |
7c673cae FG |
1037 | * @return feature bits used by this map |
1038 | */ | |
1039 | uint64_t get_features(int entity_type, uint64_t *mask) const; | |
1040 | ||
1041 | /** | |
1042 | * get oldest *client* version (firefly, hammer, etc.) that can connect given | |
1043 | * the feature bits required (according to get_features()). | |
1044 | */ | |
9f95a23c | 1045 | ceph_release_t get_min_compat_client() const; |
7c673cae | 1046 | |
11fdf7f2 TL |
1047 | /** |
1048 | * gets the required minimum *client* version that can connect to the cluster. | |
1049 | */ | |
9f95a23c | 1050 | ceph_release_t get_require_min_compat_client() const; |
11fdf7f2 | 1051 | |
7c673cae FG |
1052 | /** |
1053 | * get intersection of features supported by up osds | |
1054 | */ | |
1055 | uint64_t get_up_osd_features() const; | |
1056 | ||
494da23a TL |
1057 | void get_upmap_pgs(vector<pg_t> *upmap_pgs) const; |
1058 | bool check_pg_upmaps( | |
1059 | CephContext *cct, | |
1060 | const vector<pg_t>& to_check, | |
1061 | vector<pg_t> *to_cancel, | |
1062 | map<pg_t, mempool::osdmap::vector<pair<int,int>>> *to_remap) const; | |
1063 | void clean_pg_upmaps( | |
1064 | CephContext *cct, | |
1065 | Incremental *pending_inc, | |
1066 | const vector<pg_t>& to_cancel, | |
1067 | const map<pg_t, mempool::osdmap::vector<pair<int,int>>>& to_remap) const; | |
1068 | bool clean_pg_upmaps(CephContext *cct, Incremental *pending_inc) const; | |
94b18763 | 1069 | |
7c673cae FG |
1070 | int apply_incremental(const Incremental &inc); |
1071 | ||
1072 | /// try to re-use/reference addrs in oldmap from newmap | |
1073 | static void dedup(const OSDMap *oldmap, OSDMap *newmap); | |
1074 | ||
11fdf7f2 TL |
1075 | static void clean_temps(CephContext *cct, |
1076 | const OSDMap& oldmap, | |
1077 | const OSDMap& nextmap, | |
7c673cae FG |
1078 | Incremental *pending_inc); |
1079 | ||
1080 | // serialize, unserialize | |
1081 | private: | |
9f95a23c TL |
1082 | void encode_client_old(ceph::buffer::list& bl) const; |
1083 | void encode_classic(ceph::buffer::list& bl, uint64_t features) const; | |
1084 | void decode_classic(ceph::buffer::list::const_iterator& p); | |
7c673cae FG |
1085 | void post_decode(); |
1086 | public: | |
9f95a23c TL |
1087 | void encode(ceph::buffer::list& bl, uint64_t features=CEPH_FEATURES_ALL) const; |
1088 | void decode(ceph::buffer::list& bl); | |
1089 | void decode(ceph::buffer::list::const_iterator& bl); | |
7c673cae FG |
1090 | |
1091 | ||
1092 | /**** mapping facilities ****/ | |
1093 | int map_to_pg( | |
1094 | int64_t pool, | |
9f95a23c TL |
1095 | const std::string& name, |
1096 | const std::string& key, | |
1097 | const std::string& nspace, | |
7c673cae FG |
1098 | pg_t *pg) const; |
1099 | int object_locator_to_pg(const object_t& oid, const object_locator_t& loc, | |
1100 | pg_t &pg) const; | |
1101 | pg_t object_locator_to_pg(const object_t& oid, | |
1102 | const object_locator_t& loc) const { | |
1103 | pg_t pg; | |
1104 | int ret = object_locator_to_pg(oid, loc, pg); | |
11fdf7f2 | 1105 | ceph_assert(ret == 0); |
7c673cae FG |
1106 | return pg; |
1107 | } | |
1108 | ||
1109 | ||
1110 | static object_locator_t file_to_object_locator(const file_layout_t& layout) { | |
1111 | return object_locator_t(layout.pool_id, layout.pool_ns); | |
1112 | } | |
1113 | ||
1114 | ceph_object_layout file_to_object_layout(object_t oid, | |
1115 | file_layout_t& layout) const { | |
1116 | return make_object_layout(oid, layout.pool_id, layout.pool_ns); | |
1117 | } | |
1118 | ||
1119 | ceph_object_layout make_object_layout(object_t oid, int pg_pool, | |
9f95a23c | 1120 | std::string nspace) const; |
7c673cae FG |
1121 | |
1122 | int get_pg_num(int pg_pool) const | |
1123 | { | |
1124 | const pg_pool_t *pool = get_pg_pool(pg_pool); | |
11fdf7f2 | 1125 | ceph_assert(NULL != pool); |
7c673cae FG |
1126 | return pool->get_pg_num(); |
1127 | } | |
1128 | ||
1129 | bool pg_exists(pg_t pgid) const { | |
1130 | const pg_pool_t *p = get_pg_pool(pgid.pool()); | |
1131 | return p && pgid.ps() < p->get_pg_num(); | |
1132 | } | |
1133 | ||
224ce89b WB |
1134 | int get_pg_pool_min_size(pg_t pgid) const { |
1135 | if (!pg_exists(pgid)) { | |
1136 | return -ENOENT; | |
1137 | } | |
1138 | const pg_pool_t *p = get_pg_pool(pgid.pool()); | |
11fdf7f2 | 1139 | ceph_assert(p); |
224ce89b WB |
1140 | return p->get_min_size(); |
1141 | } | |
1142 | ||
1143 | int get_pg_pool_size(pg_t pgid) const { | |
1144 | if (!pg_exists(pgid)) { | |
1145 | return -ENOENT; | |
1146 | } | |
1147 | const pg_pool_t *p = get_pg_pool(pgid.pool()); | |
11fdf7f2 | 1148 | ceph_assert(p); |
224ce89b WB |
1149 | return p->get_size(); |
1150 | } | |
1151 | ||
94b18763 FG |
1152 | int get_pg_pool_crush_rule(pg_t pgid) const { |
1153 | if (!pg_exists(pgid)) { | |
1154 | return -ENOENT; | |
1155 | } | |
1156 | const pg_pool_t *p = get_pg_pool(pgid.pool()); | |
11fdf7f2 | 1157 | ceph_assert(p); |
94b18763 FG |
1158 | return p->get_crush_rule(); |
1159 | } | |
1160 | ||
7c673cae | 1161 | private: |
9f95a23c | 1162 | /// pg -> (raw osd std::list) |
31f18b77 | 1163 | void _pg_to_raw_osds( |
7c673cae | 1164 | const pg_pool_t& pool, pg_t pg, |
9f95a23c | 1165 | std::vector<int> *osds, |
7c673cae | 1166 | ps_t *ppps) const; |
9f95a23c TL |
1167 | int _pick_primary(const std::vector<int>& osds) const; |
1168 | void _remove_nonexistent_osds(const pg_pool_t& pool, std::vector<int>& osds) const; | |
7c673cae FG |
1169 | |
1170 | void _apply_primary_affinity(ps_t seed, const pg_pool_t& pool, | |
9f95a23c | 1171 | std::vector<int> *osds, int *primary) const; |
7c673cae FG |
1172 | |
1173 | /// apply pg_upmap[_items] mappings | |
9f95a23c | 1174 | void _apply_upmap(const pg_pool_t& pi, pg_t pg, std::vector<int> *raw) const; |
7c673cae | 1175 | |
9f95a23c TL |
1176 | /// pg -> (up osd std::list) |
1177 | void _raw_to_up_osds(const pg_pool_t& pool, const std::vector<int>& raw, | |
1178 | std::vector<int> *up) const; | |
7c673cae FG |
1179 | |
1180 | ||
1181 | /** | |
1182 | * Get the pg and primary temp, if they are specified. | |
1183 | * @param temp_pg [out] Will be empty or contain the temp PG mapping on return | |
1184 | * @param temp_primary [out] Will be the value in primary_temp, or a value derived | |
1185 | * from the pg_temp (if specified), or -1 if you should use the calculated (up_)primary. | |
1186 | */ | |
1187 | void _get_temp_osds(const pg_pool_t& pool, pg_t pg, | |
9f95a23c | 1188 | std::vector<int> *temp_pg, int *temp_primary) const; |
7c673cae FG |
1189 | |
1190 | /** | |
1191 | * map to up and acting. Fills in whatever fields are non-NULL. | |
1192 | */ | |
9f95a23c TL |
1193 | void _pg_to_up_acting_osds(const pg_t& pg, std::vector<int> *up, int *up_primary, |
1194 | std::vector<int> *acting, int *acting_primary, | |
7c673cae FG |
1195 | bool raw_pg_to_pg = true) const; |
1196 | ||
1197 | public: | |
1198 | /*** | |
1199 | * This is suitable only for looking at raw CRUSH outputs. It skips | |
1200 | * applying the temp and up checks and should not be used | |
1201 | * by anybody for data mapping purposes. | |
1202 | * raw and primary must be non-NULL | |
1203 | */ | |
9f95a23c TL |
1204 | void pg_to_raw_osds(pg_t pg, std::vector<int> *raw, int *primary) const; |
1205 | void pg_to_raw_upmap(pg_t pg, std::vector<int> *raw, | |
1206 | std::vector<int> *raw_upmap) const; | |
7c673cae | 1207 | /// map a pg to its acting set. @return acting set size |
9f95a23c | 1208 | void pg_to_acting_osds(const pg_t& pg, std::vector<int> *acting, |
7c673cae FG |
1209 | int *acting_primary) const { |
1210 | _pg_to_up_acting_osds(pg, NULL, NULL, acting, acting_primary); | |
7c673cae | 1211 | } |
9f95a23c | 1212 | void pg_to_acting_osds(pg_t pg, std::vector<int>& acting) const { |
7c673cae FG |
1213 | return pg_to_acting_osds(pg, &acting, NULL); |
1214 | } | |
1215 | /** | |
1216 | * This does not apply temp overrides and should not be used | |
1217 | * by anybody for data mapping purposes. Specify both pointers. | |
1218 | */ | |
9f95a23c | 1219 | void pg_to_raw_up(pg_t pg, std::vector<int> *up, int *primary) const; |
7c673cae FG |
1220 | /** |
1221 | * map a pg to its acting set as well as its up set. You must use | |
1222 | * the acting set for data mapping purposes, but some users will | |
1223 | * also find the up set useful for things like deciding what to | |
1224 | * set as pg_temp. | |
1225 | * Each of these pointers must be non-NULL. | |
1226 | */ | |
9f95a23c TL |
1227 | void pg_to_up_acting_osds(pg_t pg, std::vector<int> *up, int *up_primary, |
1228 | std::vector<int> *acting, int *acting_primary) const { | |
7c673cae FG |
1229 | _pg_to_up_acting_osds(pg, up, up_primary, acting, acting_primary); |
1230 | } | |
9f95a23c | 1231 | void pg_to_up_acting_osds(pg_t pg, std::vector<int>& up, std::vector<int>& acting) const { |
7c673cae FG |
1232 | int up_primary, acting_primary; |
1233 | pg_to_up_acting_osds(pg, &up, &up_primary, &acting, &acting_primary); | |
1234 | } | |
1235 | bool pg_is_ec(pg_t pg) const { | |
1236 | auto i = pools.find(pg.pool()); | |
11fdf7f2 TL |
1237 | ceph_assert(i != pools.end()); |
1238 | return i->second.is_erasure(); | |
7c673cae FG |
1239 | } |
1240 | bool get_primary_shard(const pg_t& pgid, spg_t *out) const { | |
1241 | auto i = get_pools().find(pgid.pool()); | |
1242 | if (i == get_pools().end()) { | |
1243 | return false; | |
1244 | } | |
11fdf7f2 | 1245 | if (!i->second.is_erasure()) { |
7c673cae FG |
1246 | *out = spg_t(pgid); |
1247 | return true; | |
1248 | } | |
1249 | int primary; | |
9f95a23c | 1250 | std::vector<int> acting; |
7c673cae FG |
1251 | pg_to_acting_osds(pgid, &acting, &primary); |
1252 | for (uint8_t i = 0; i < acting.size(); ++i) { | |
1253 | if (acting[i] == primary) { | |
1254 | *out = spg_t(pgid, shard_id_t(i)); | |
1255 | return true; | |
1256 | } | |
1257 | } | |
1258 | return false; | |
1259 | } | |
11fdf7f2 TL |
1260 | bool get_primary_shard(const pg_t& pgid, int *primary, spg_t *out) const { |
1261 | auto i = get_pools().find(pgid.pool()); | |
1262 | if (i == get_pools().end()) { | |
1263 | return false; | |
1264 | } | |
9f95a23c | 1265 | std::vector<int> acting; |
11fdf7f2 TL |
1266 | pg_to_acting_osds(pgid, &acting, primary); |
1267 | if (i->second.is_erasure()) { | |
1268 | for (uint8_t i = 0; i < acting.size(); ++i) { | |
1269 | if (acting[i] == *primary) { | |
1270 | *out = spg_t(pgid, shard_id_t(i)); | |
1271 | return true; | |
1272 | } | |
1273 | } | |
1274 | } else { | |
1275 | *out = spg_t(pgid); | |
1276 | return true; | |
1277 | } | |
1278 | return false; | |
1279 | } | |
1280 | ||
9f95a23c TL |
1281 | bool in_removed_snaps_queue(int64_t pool, snapid_t snap) const { |
1282 | auto p = removed_snaps_queue.find(pool); | |
1283 | if (p == removed_snaps_queue.end()) { | |
1284 | return false; | |
1285 | } | |
1286 | return p->second.contains(snap); | |
1287 | } | |
1288 | ||
11fdf7f2 TL |
1289 | const mempool::osdmap::map<int64_t,snap_interval_set_t>& |
1290 | get_removed_snaps_queue() const { | |
1291 | return removed_snaps_queue; | |
1292 | } | |
1293 | const mempool::osdmap::map<int64_t,snap_interval_set_t>& | |
1294 | get_new_removed_snaps() const { | |
1295 | return new_removed_snaps; | |
1296 | } | |
1297 | const mempool::osdmap::map<int64_t,snap_interval_set_t>& | |
1298 | get_new_purged_snaps() const { | |
1299 | return new_purged_snaps; | |
1300 | } | |
7c673cae | 1301 | |
9f95a23c | 1302 | int64_t lookup_pg_pool_name(const std::string& name) const { |
7c673cae FG |
1303 | auto p = name_pool.find(name); |
1304 | if (p == name_pool.end()) | |
1305 | return -ENOENT; | |
1306 | return p->second; | |
1307 | } | |
1308 | ||
1309 | int64_t get_pool_max() const { | |
1310 | return pool_max; | |
1311 | } | |
1312 | const mempool::osdmap::map<int64_t,pg_pool_t>& get_pools() const { | |
1313 | return pools; | |
1314 | } | |
1315 | mempool::osdmap::map<int64_t,pg_pool_t>& get_pools() { | |
1316 | return pools; | |
1317 | } | |
9f95a23c | 1318 | void get_pool_ids_by_rule(int rule_id, std::set<int64_t> *pool_ids) const { |
11fdf7f2 | 1319 | ceph_assert(pool_ids); |
3efd9988 | 1320 | for (auto &p: pools) { |
11fdf7f2 | 1321 | if (p.second.get_crush_rule() == rule_id) { |
3efd9988 FG |
1322 | pool_ids->insert(p.first); |
1323 | } | |
1324 | } | |
1325 | } | |
1326 | void get_pool_ids_by_osd(CephContext *cct, | |
1327 | int osd, | |
9f95a23c TL |
1328 | std::set<int64_t> *pool_ids) const; |
1329 | const std::string& get_pool_name(int64_t p) const { | |
7c673cae | 1330 | auto i = pool_name.find(p); |
11fdf7f2 | 1331 | ceph_assert(i != pool_name.end()); |
7c673cae FG |
1332 | return i->second; |
1333 | } | |
9f95a23c | 1334 | const mempool::osdmap::map<int64_t,std::string>& get_pool_names() const { |
c07f9fc5 FG |
1335 | return pool_name; |
1336 | } | |
7c673cae FG |
1337 | bool have_pg_pool(int64_t p) const { |
1338 | return pools.count(p); | |
1339 | } | |
1340 | const pg_pool_t* get_pg_pool(int64_t p) const { | |
1341 | auto i = pools.find(p); | |
1342 | if (i != pools.end()) | |
1343 | return &i->second; | |
1344 | return NULL; | |
1345 | } | |
1346 | unsigned get_pg_size(pg_t pg) const { | |
1347 | auto p = pools.find(pg.pool()); | |
11fdf7f2 | 1348 | ceph_assert(p != pools.end()); |
7c673cae FG |
1349 | return p->second.get_size(); |
1350 | } | |
1351 | int get_pg_type(pg_t pg) const { | |
1352 | auto p = pools.find(pg.pool()); | |
11fdf7f2 | 1353 | ceph_assert(p != pools.end()); |
7c673cae FG |
1354 | return p->second.get_type(); |
1355 | } | |
9f95a23c TL |
1356 | int get_pool_crush_rule(int64_t pool_id) const { |
1357 | auto pool = get_pg_pool(pool_id); | |
1358 | if (!pool) | |
1359 | return -ENOENT; | |
1360 | return pool->get_crush_rule(); | |
1361 | } | |
7c673cae FG |
1362 | |
1363 | ||
1364 | pg_t raw_pg_to_pg(pg_t pg) const { | |
1365 | auto p = pools.find(pg.pool()); | |
11fdf7f2 | 1366 | ceph_assert(p != pools.end()); |
7c673cae FG |
1367 | return p->second.raw_pg_to_pg(pg); |
1368 | } | |
1369 | ||
1370 | // pg -> acting primary osd | |
1371 | int get_pg_acting_primary(pg_t pg) const { | |
1372 | int primary = -1; | |
1373 | _pg_to_up_acting_osds(pg, nullptr, nullptr, nullptr, &primary); | |
1374 | return primary; | |
1375 | } | |
1376 | ||
1377 | /* | |
1378 | * check whether an spg_t maps to a particular osd | |
1379 | */ | |
1380 | bool is_up_acting_osd_shard(spg_t pg, int osd) const { | |
9f95a23c | 1381 | std::vector<int> up, acting; |
7c673cae | 1382 | _pg_to_up_acting_osds(pg.pgid, &up, NULL, &acting, NULL, false); |
9f95a23c TL |
1383 | if (calc_pg_role(pg_shard_t(osd, pg.shard), acting) >= 0 || |
1384 | calc_pg_role(pg_shard_t(osd, pg.shard), up) >= 0) { | |
1385 | return true; | |
7c673cae FG |
1386 | } |
1387 | return false; | |
1388 | } | |
1389 | ||
1390 | ||
9f95a23c TL |
1391 | static int calc_pg_role_broken(int osd, const std::vector<int>& acting, int nrep=0); |
1392 | static int calc_pg_role(pg_shard_t who, const std::vector<int>& acting); | |
1393 | static bool primary_changed_broken( | |
7c673cae | 1394 | int oldprimary, |
9f95a23c | 1395 | const std::vector<int> &oldacting, |
7c673cae | 1396 | int newprimary, |
9f95a23c | 1397 | const std::vector<int> &newacting); |
7c673cae FG |
1398 | |
1399 | /* rank is -1 (stray), 0 (primary), 1,2,3,... (replica) */ | |
9f95a23c TL |
1400 | int get_pg_acting_role(spg_t pg, int osd) const { |
1401 | std::vector<int> group; | |
1402 | pg_to_acting_osds(pg.pgid, group); | |
1403 | return calc_pg_role(pg_shard_t(osd, pg.shard), group); | |
7c673cae FG |
1404 | } |
1405 | ||
7c673cae FG |
1406 | bool try_pg_upmap( |
1407 | CephContext *cct, | |
1408 | pg_t pg, ///< pg to potentially remap | |
9f95a23c TL |
1409 | const std::set<int>& overfull, ///< osds we'd want to evacuate |
1410 | const std::vector<int>& underfull, ///< osds to move to, in order of preference | |
1411 | const std::vector<int>& more_underfull, ///< less full osds to move to, in order of preference | |
1412 | std::vector<int> *orig, | |
1413 | std::vector<int> *out); ///< resulting alternative mapping | |
7c673cae FG |
1414 | |
1415 | int calc_pg_upmaps( | |
1416 | CephContext *cct, | |
92f5a8d4 | 1417 | uint32_t max_deviation, ///< max deviation from target (value >= 1) |
7c673cae | 1418 | int max_iterations, ///< max iterations to run |
9f95a23c | 1419 | const std::set<int64_t>& pools, ///< [optional] restrict to pool |
7c673cae FG |
1420 | Incremental *pending_inc |
1421 | ); | |
1422 | ||
9f95a23c | 1423 | int get_osds_by_bucket_name(const std::string &name, std::set<int> *osds) const; |
31f18b77 | 1424 | |
f64942e4 AA |
1425 | bool have_pg_upmaps(pg_t pg) const { |
1426 | return pg_upmap.count(pg) || | |
1427 | pg_upmap_items.count(pg); | |
1428 | } | |
1429 | ||
9f95a23c TL |
1430 | bool check_full(const set<pg_shard_t> &missing_on) const { |
1431 | for (auto shard : missing_on) { | |
1432 | if (get_state(shard.osd) & CEPH_OSD_FULL) | |
1433 | return true; | |
1434 | } | |
1435 | return false; | |
1436 | } | |
1437 | ||
7c673cae FG |
1438 | /* |
1439 | * handy helpers to build simple maps... | |
1440 | */ | |
1441 | /** | |
1442 | * Build an OSD map suitable for basic usage. If **num_osd** is >= 0 | |
1443 | * it will be initialized with the specified number of OSDs in a | |
1444 | * single host. If **num_osd** is < 0 the layout of the OSD map will | |
1445 | * be built by reading the content of the configuration file. | |
1446 | * | |
1447 | * @param cct [in] in core ceph context | |
1448 | * @param e [in] initial epoch | |
1449 | * @param fsid [in] id of the cluster | |
1450 | * @param num_osd [in] number of OSDs if >= 0 or read from conf if < 0 | |
1451 | * @return **0** on success, negative errno on error. | |
1452 | */ | |
224ce89b WB |
1453 | private: |
1454 | int build_simple_optioned(CephContext *cct, epoch_t e, uuid_d &fsid, | |
1455 | int num_osd, int pg_bits, int pgp_bits, | |
1456 | bool default_pool); | |
1457 | public: | |
7c673cae | 1458 | int build_simple(CephContext *cct, epoch_t e, uuid_d &fsid, |
224ce89b WB |
1459 | int num_osd) { |
1460 | return build_simple_optioned(cct, e, fsid, num_osd, 0, 0, false); | |
1461 | } | |
1462 | int build_simple_with_pool(CephContext *cct, epoch_t e, uuid_d &fsid, | |
1463 | int num_osd, int pg_bits, int pgp_bits) { | |
1464 | return build_simple_optioned(cct, e, fsid, num_osd, | |
1465 | pg_bits, pgp_bits, true); | |
1466 | } | |
7c673cae FG |
1467 | static int _build_crush_types(CrushWrapper& crush); |
1468 | static int build_simple_crush_map(CephContext *cct, CrushWrapper& crush, | |
9f95a23c | 1469 | int num_osd, std::ostream *ss); |
7c673cae FG |
1470 | static int build_simple_crush_map_from_conf(CephContext *cct, |
1471 | CrushWrapper& crush, | |
9f95a23c | 1472 | std::ostream *ss); |
31f18b77 FG |
1473 | static int build_simple_crush_rules( |
1474 | CephContext *cct, CrushWrapper& crush, | |
9f95a23c TL |
1475 | const std::string& root, |
1476 | std::ostream *ss); | |
7c673cae | 1477 | |
3efd9988 FG |
1478 | bool crush_rule_in_use(int rule_id) const; |
1479 | ||
9f95a23c | 1480 | int validate_crush_rules(CrushWrapper *crush, std::ostream *ss) const; |
7c673cae FG |
1481 | |
1482 | void clear_temp() { | |
1483 | pg_temp->clear(); | |
1484 | primary_temp->clear(); | |
1485 | } | |
1486 | ||
1487 | private: | |
9f95a23c | 1488 | void print_osd_line(int cur, std::ostream *out, ceph::Formatter *f) const; |
7c673cae | 1489 | public: |
9f95a23c TL |
1490 | void print(std::ostream& out) const; |
1491 | void print_osd(int id, std::ostream& out) const; | |
1492 | void print_osds(std::ostream& out) const; | |
1493 | void print_pools(std::ostream& out) const; | |
1494 | void print_summary(ceph::Formatter *f, std::ostream& out, | |
1495 | const std::string& prefix, bool extra=false) const; | |
1496 | void print_oneline_summary(std::ostream& out) const; | |
31f18b77 FG |
1497 | |
1498 | enum { | |
c07f9fc5 FG |
1499 | DUMP_IN = 1, // only 'in' osds |
1500 | DUMP_OUT = 2, // only 'out' osds | |
1501 | DUMP_UP = 4, // only 'up' osds | |
1502 | DUMP_DOWN = 8, // only 'down' osds | |
1503 | DUMP_DESTROYED = 16, // only 'destroyed' osds | |
31f18b77 | 1504 | }; |
9f95a23c TL |
1505 | void print_tree(ceph::Formatter *f, std::ostream *out, |
1506 | unsigned dump_flags=0, std::string bucket="") const; | |
7c673cae FG |
1507 | |
1508 | int summarize_mapping_stats( | |
1509 | OSDMap *newmap, | |
9f95a23c | 1510 | const std::set<int64_t> *pools, |
7c673cae | 1511 | std::string *out, |
9f95a23c | 1512 | ceph::Formatter *f) const; |
7c673cae | 1513 | |
9f95a23c TL |
1514 | std::string get_flag_string() const; |
1515 | static std::string get_flag_string(unsigned flags); | |
7c673cae | 1516 | static void dump_erasure_code_profiles( |
9f95a23c TL |
1517 | const mempool::osdmap::map<std::string,std::map<std::string,std::string> > &profiles, |
1518 | ceph::Formatter *f); | |
1519 | void dump(ceph::Formatter *f) const; | |
1520 | void dump_osd(int id, ceph::Formatter *f) const; | |
1521 | void dump_osds(ceph::Formatter *f) const; | |
1522 | static void generate_test_instances(std::list<OSDMap*>& o); | |
7c673cae | 1523 | bool check_new_blacklist_entries() const { return new_blacklist_entries; } |
224ce89b | 1524 | |
92f5a8d4 | 1525 | void check_health(CephContext *cct, health_check_map_t *checks) const; |
35e4c445 | 1526 | |
9f95a23c TL |
1527 | int parse_osd_id_list(const std::vector<std::string>& ls, |
1528 | std::set<int> *out, | |
1529 | std::ostream *ss) const; | |
11fdf7f2 TL |
1530 | |
1531 | float pool_raw_used_rate(int64_t poolid) const; | |
1532 | ||
7c673cae FG |
1533 | }; |
1534 | WRITE_CLASS_ENCODER_FEATURES(OSDMap) | |
1535 | WRITE_CLASS_ENCODER_FEATURES(OSDMap::Incremental) | |
1536 | ||
9f95a23c TL |
1537 | #ifdef WITH_SEASTAR |
1538 | using OSDMapRef = boost::local_shared_ptr<const OSDMap>; | |
1539 | #else | |
1540 | using OSDMapRef = std::shared_ptr<const OSDMap>; | |
1541 | #endif | |
1542 | ||
7c673cae | 1543 | |
9f95a23c | 1544 | inline std::ostream& operator<<(std::ostream& out, const OSDMap& m) { |
7c673cae FG |
1545 | m.print_oneline_summary(out); |
1546 | return out; | |
1547 | } | |
1548 | ||
11fdf7f2 | 1549 | class PGMap; |
31f18b77 FG |
1550 | |
1551 | void print_osd_utilization(const OSDMap& osdmap, | |
11fdf7f2 | 1552 | const PGMap& pgmap, |
9f95a23c TL |
1553 | std::ostream& out, |
1554 | ceph::Formatter *f, | |
11fdf7f2 | 1555 | bool tree, |
9f95a23c | 1556 | const std::string& filter); |
7c673cae FG |
1557 | |
1558 | #endif |