]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com> | |
8 | * | |
9 | * Author: Loic Dachary <loic@dachary.org> | |
10 | * | |
11 | * This is free software; you can redistribute it and/or | |
12 | * modify it under the terms of the GNU Lesser General Public | |
13 | * License version 2.1, as published by the Free Software | |
14 | * Foundation. See file COPYING. | |
15 | * | |
16 | */ | |
17 | ||
18 | ||
19 | #ifndef CEPH_OSDMAP_H | |
20 | #define CEPH_OSDMAP_H | |
21 | ||
22 | /* | |
23 | * describe properties of the OSD cluster. | |
24 | * disks, disk groups, total # osds, | |
25 | * | |
26 | */ | |
7c673cae FG |
27 | #include <vector> |
28 | #include <list> | |
29 | #include <set> | |
30 | #include <map> | |
11fdf7f2 | 31 | #include <memory> |
9f95a23c TL |
32 | |
33 | #include <boost/smart_ptr/local_shared_ptr.hpp> | |
94b18763 | 34 | #include "include/btree_map.h" |
9f95a23c TL |
35 | #include "include/common_fwd.h" |
36 | #include "include/types.h" | |
37 | #include "common/ceph_releases.h" | |
38 | #include "osd_types.h" | |
39 | ||
40 | //#include "include/ceph_features.h" | |
41 | #include "crush/CrushWrapper.h" | |
7c673cae FG |
42 | |
43 | // forward declaration | |
7c673cae | 44 | class CrushWrapper; |
224ce89b | 45 | class health_check_map_t; |
7c673cae | 46 | |
7c673cae FG |
47 | /* |
48 | * we track up to two intervals during which the osd was alive and | |
49 | * healthy. the most recent is [up_from,up_thru), where up_thru is | |
50 | * the last epoch the osd is known to have _started_. i.e., a lower | |
51 | * bound on the actual osd death. down_at (if it is > up_from) is an | |
52 | * upper bound on the actual osd death. | |
53 | * | |
f67539c2 | 54 | * the second is the last_clean interval [begin,end). in that case, |
7c673cae FG |
55 | * the last interval is the last epoch known to have been either |
56 | * _finished_, or during which the osd cleanly shut down. when | |
57 | * possible, we push this forward to the epoch the osd was eventually | |
58 | * marked down. | |
59 | * | |
60 | * the lost_at is used to allow build_prior to proceed without waiting | |
61 | * for an osd to recover. In certain cases, progress may be blocked | |
62 | * because an osd is down that may contain updates (i.e., a pg may have | |
63 | * gone rw during an interval). If the osd can't be brought online, we | |
64 | * can force things to proceed knowing that we _might_ be losing some | |
65 | * acked writes. If the osd comes back to life later, that's fine to, | |
66 | * but those writes will still be lost (the divergent objects will be | |
67 | * thrown out). | |
68 | */ | |
69 | struct osd_info_t { | |
70 | epoch_t last_clean_begin; // last interval that ended with a clean osd shutdown | |
71 | epoch_t last_clean_end; | |
72 | epoch_t up_from; // epoch osd marked up | |
73 | epoch_t up_thru; // lower bound on actual osd death (if > up_from) | |
74 | epoch_t down_at; // upper bound on actual osd death (if > up_from) | |
75 | epoch_t lost_at; // last epoch we decided data was "lost" | |
76 | ||
77 | osd_info_t() : last_clean_begin(0), last_clean_end(0), | |
78 | up_from(0), up_thru(0), down_at(0), lost_at(0) {} | |
79 | ||
9f95a23c TL |
80 | void dump(ceph::Formatter *f) const; |
81 | void encode(ceph::buffer::list& bl) const; | |
82 | void decode(ceph::buffer::list::const_iterator& bl); | |
83 | static void generate_test_instances(std::list<osd_info_t*>& o); | |
7c673cae FG |
84 | }; |
85 | WRITE_CLASS_ENCODER(osd_info_t) | |
86 | ||
9f95a23c | 87 | std::ostream& operator<<(std::ostream& out, const osd_info_t& info); |
7c673cae FG |
88 | |
89 | struct osd_xinfo_t { | |
90 | utime_t down_stamp; ///< timestamp when we were last marked down | |
91 | float laggy_probability; ///< encoded as __u32: 0 = definitely not laggy, 0xffffffff definitely laggy | |
92 | __u32 laggy_interval; ///< average interval between being marked laggy and recovering | |
93 | uint64_t features; ///< features supported by this osd we should know about | |
94 | __u32 old_weight; ///< weight prior to being auto marked out | |
9f95a23c TL |
95 | utime_t last_purged_snaps_scrub; ///< last scrub of purged_snaps |
96 | epoch_t dead_epoch = 0; ///< last epoch we were confirmed dead (not just down) | |
7c673cae FG |
97 | |
98 | osd_xinfo_t() : laggy_probability(0), laggy_interval(0), | |
99 | features(0), old_weight(0) {} | |
100 | ||
9f95a23c TL |
101 | void dump(ceph::Formatter *f) const; |
102 | void encode(ceph::buffer::list& bl, uint64_t features) const; | |
103 | void decode(ceph::buffer::list::const_iterator& bl); | |
104 | static void generate_test_instances(std::list<osd_xinfo_t*>& o); | |
7c673cae | 105 | }; |
9f95a23c | 106 | WRITE_CLASS_ENCODER_FEATURES(osd_xinfo_t) |
7c673cae | 107 | |
9f95a23c | 108 | std::ostream& operator<<(std::ostream& out, const osd_xinfo_t& xi); |
7c673cae FG |
109 | |
110 | ||
31f18b77 FG |
111 | struct PGTempMap { |
112 | #if 1 | |
9f95a23c | 113 | ceph::buffer::list data; |
eafe8130 | 114 | typedef btree::btree_map<pg_t,ceph_le32*> map_t; |
31f18b77 FG |
115 | map_t map; |
116 | ||
9f95a23c | 117 | void encode(ceph::buffer::list& bl) const { |
11fdf7f2 | 118 | using ceph::encode; |
31f18b77 | 119 | uint32_t n = map.size(); |
11fdf7f2 | 120 | encode(n, bl); |
31f18b77 | 121 | for (auto &p : map) { |
11fdf7f2 | 122 | encode(p.first, bl); |
eafe8130 | 123 | bl.append((char*)p.second, (*p.second + 1) * sizeof(ceph_le32)); |
31f18b77 FG |
124 | } |
125 | } | |
9f95a23c | 126 | void decode(ceph::buffer::list::const_iterator& p) { |
11fdf7f2 | 127 | using ceph::decode; |
31f18b77 FG |
128 | data.clear(); |
129 | map.clear(); | |
130 | uint32_t n; | |
11fdf7f2 | 131 | decode(n, p); |
31f18b77 FG |
132 | if (!n) |
133 | return; | |
11fdf7f2 | 134 | auto pstart = p; |
31f18b77 | 135 | size_t start_off = pstart.get_off(); |
9f95a23c | 136 | std::vector<std::pair<pg_t,size_t>> offsets; |
31f18b77 FG |
137 | offsets.resize(n); |
138 | for (unsigned i=0; i<n; ++i) { | |
139 | pg_t pgid; | |
11fdf7f2 | 140 | decode(pgid, p); |
31f18b77 FG |
141 | offsets[i].first = pgid; |
142 | offsets[i].second = p.get_off() - start_off; | |
143 | uint32_t vn; | |
11fdf7f2 | 144 | decode(vn, p); |
9f95a23c | 145 | p += vn * sizeof(int32_t); |
31f18b77 FG |
146 | } |
147 | size_t len = p.get_off() - start_off; | |
148 | pstart.copy(len, data); | |
149 | if (data.get_num_buffers() > 1) { | |
150 | data.rebuild(); | |
151 | } | |
152 | //map.reserve(n); | |
153 | char *start = data.c_str(); | |
154 | for (auto i : offsets) { | |
9f95a23c | 155 | map.insert(map.end(), std::make_pair(i.first, (ceph_le32*)(start + i.second))); |
31f18b77 FG |
156 | } |
157 | } | |
158 | void rebuild() { | |
9f95a23c | 159 | ceph::buffer::list bl; |
31f18b77 | 160 | encode(bl); |
11fdf7f2 | 161 | auto p = std::cbegin(bl); |
31f18b77 FG |
162 | decode(p); |
163 | } | |
164 | friend bool operator==(const PGTempMap& l, const PGTempMap& r) { | |
165 | return | |
166 | l.map.size() == r.map.size() && | |
167 | l.data.contents_equal(r.data); | |
168 | } | |
169 | ||
170 | class iterator { | |
171 | map_t::const_iterator it; | |
172 | map_t::const_iterator end; | |
9f95a23c | 173 | std::pair<pg_t,std::vector<int32_t>> current; |
31f18b77 FG |
174 | void init_current() { |
175 | if (it != end) { | |
176 | current.first = it->first; | |
11fdf7f2 | 177 | ceph_assert(it->second); |
31f18b77 | 178 | current.second.resize(*it->second); |
eafe8130 TL |
179 | ceph_le32 *p = it->second + 1; |
180 | for (uint32_t n = 0; n < *it->second; ++n, ++p) { | |
31f18b77 FG |
181 | current.second[n] = *p; |
182 | } | |
183 | } | |
184 | } | |
185 | public: | |
186 | iterator(map_t::const_iterator p, | |
187 | map_t::const_iterator e) | |
188 | : it(p), end(e) { | |
189 | init_current(); | |
190 | } | |
191 | ||
9f95a23c | 192 | const std::pair<pg_t,std::vector<int32_t>>& operator*() const { |
31f18b77 FG |
193 | return current; |
194 | } | |
9f95a23c | 195 | const std::pair<pg_t,std::vector<int32_t>>* operator->() const { |
31f18b77 FG |
196 | return ¤t; |
197 | } | |
198 | friend bool operator==(const iterator& l, const iterator& r) { | |
199 | return l.it == r.it; | |
200 | } | |
201 | friend bool operator!=(const iterator& l, const iterator& r) { | |
202 | return l.it != r.it; | |
203 | } | |
204 | iterator& operator++() { | |
205 | ++it; | |
206 | if (it != end) | |
207 | init_current(); | |
208 | return *this; | |
209 | } | |
210 | iterator operator++(int) { | |
211 | iterator r = *this; | |
212 | ++it; | |
213 | if (it != end) | |
214 | init_current(); | |
215 | return r; | |
216 | } | |
217 | }; | |
218 | iterator begin() const { | |
219 | return iterator(map.begin(), map.end()); | |
220 | } | |
221 | iterator end() const { | |
222 | return iterator(map.end(), map.end()); | |
223 | } | |
224 | iterator find(pg_t pgid) const { | |
225 | return iterator(map.find(pgid), map.end()); | |
226 | } | |
227 | size_t size() const { | |
228 | return map.size(); | |
229 | } | |
230 | size_t count(pg_t pgid) const { | |
231 | return map.count(pgid); | |
232 | } | |
233 | void erase(pg_t pgid) { | |
234 | map.erase(pgid); | |
235 | } | |
236 | void clear() { | |
237 | map.clear(); | |
238 | data.clear(); | |
239 | } | |
240 | void set(pg_t pgid, const mempool::osdmap::vector<int32_t>& v) { | |
11fdf7f2 | 241 | using ceph::encode; |
eafe8130 | 242 | size_t need = sizeof(ceph_le32) * (1 + v.size()); |
31f18b77 | 243 | if (need < data.get_append_buffer_unused_tail_length()) { |
9f95a23c | 244 | ceph::buffer::ptr z(data.get_append_buffer_unused_tail_length()); |
31f18b77 FG |
245 | z.zero(); |
246 | data.append(z.c_str(), z.length()); | |
247 | } | |
11fdf7f2 | 248 | encode(v, data); |
eafe8130 | 249 | map[pgid] = (ceph_le32*)(data.back().end_c_str()) - (1 + v.size()); |
31f18b77 FG |
250 | } |
251 | mempool::osdmap::vector<int32_t> get(pg_t pgid) { | |
252 | mempool::osdmap::vector<int32_t> v; | |
eafe8130 | 253 | ceph_le32 *p = map[pgid]; |
31f18b77 FG |
254 | size_t n = *p++; |
255 | v.resize(n); | |
256 | for (size_t i = 0; i < n; ++i, ++p) { | |
257 | v[i] = *p; | |
258 | } | |
259 | return v; | |
260 | } | |
261 | #else | |
262 | // trivial implementation | |
263 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t> > pg_temp; | |
264 | ||
9f95a23c | 265 | void encode(ceph::buffer::list& bl) const { |
11fdf7f2 | 266 | encode(pg_temp, bl); |
31f18b77 | 267 | } |
9f95a23c | 268 | void decode(ceph::buffer::list::const_iterator& p) { |
11fdf7f2 | 269 | decode(pg_temp, p); |
31f18b77 FG |
270 | } |
271 | friend bool operator==(const PGTempMap& l, const PGTempMap& r) { | |
272 | return | |
273 | l.pg_temp.size() == r.pg_temp.size() && | |
274 | l.pg_temp == r.pg_temp; | |
275 | } | |
276 | ||
277 | class iterator { | |
278 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t> >::const_iterator it; | |
279 | public: | |
280 | iterator(mempool::osdmap::map<pg_t, | |
281 | mempool::osdmap::vector<int32_t> >::const_iterator p) | |
282 | : it(p) {} | |
283 | ||
9f95a23c | 284 | std::pair<pg_t,const mempool::osdmap::vector<int32_t>&> operator*() const { |
31f18b77 FG |
285 | return *it; |
286 | } | |
9f95a23c | 287 | const std::pair<const pg_t,mempool::osdmap::vector<int32_t>>* operator->() const { |
31f18b77 FG |
288 | return &*it; |
289 | } | |
290 | friend bool operator==(const iterator& l, const iterator& r) { | |
291 | return l.it == r.it; | |
292 | } | |
293 | friend bool operator!=(const iterator& l, const iterator& r) { | |
294 | return l.it != r.it; | |
295 | } | |
296 | iterator& operator++() { | |
297 | ++it; | |
298 | return *this; | |
299 | } | |
300 | iterator operator++(int) { | |
301 | iterator r = *this; | |
302 | ++it; | |
303 | return r; | |
304 | } | |
305 | }; | |
306 | iterator begin() const { | |
307 | return iterator(pg_temp.cbegin()); | |
308 | } | |
309 | iterator end() const { | |
310 | return iterator(pg_temp.cend()); | |
311 | } | |
312 | iterator find(pg_t pgid) const { | |
313 | return iterator(pg_temp.find(pgid)); | |
314 | } | |
315 | size_t size() const { | |
316 | return pg_temp.size(); | |
317 | } | |
318 | size_t count(pg_t pgid) const { | |
319 | return pg_temp.count(pgid); | |
320 | } | |
321 | void erase(pg_t pgid) { | |
322 | pg_temp.erase(pgid); | |
323 | } | |
324 | void clear() { | |
325 | pg_temp.clear(); | |
326 | } | |
327 | void set(pg_t pgid, const mempool::osdmap::vector<int32_t>& v) { | |
328 | pg_temp[pgid] = v; | |
329 | } | |
330 | const mempool::osdmap::vector<int32_t>& get(pg_t pgid) { | |
331 | return pg_temp.at(pgid); | |
332 | } | |
333 | #endif | |
9f95a23c | 334 | void dump(ceph::Formatter *f) const { |
31f18b77 FG |
335 | for (const auto &pg : *this) { |
336 | f->open_object_section("osds"); | |
337 | f->dump_stream("pgid") << pg.first; | |
338 | f->open_array_section("osds"); | |
339 | for (const auto osd : pg.second) | |
340 | f->dump_int("osd", osd); | |
341 | f->close_section(); | |
342 | f->close_section(); | |
343 | } | |
344 | } | |
345 | }; | |
346 | WRITE_CLASS_ENCODER(PGTempMap) | |
347 | ||
7c673cae FG |
348 | /** OSDMap |
349 | */ | |
350 | class OSDMap { | |
351 | public: | |
352 | MEMPOOL_CLASS_HELPERS(); | |
353 | ||
354 | class Incremental { | |
355 | public: | |
356 | MEMPOOL_CLASS_HELPERS(); | |
357 | ||
358 | /// feature bits we were encoded with. the subsequent OSDMap | |
359 | /// encoding should match. | |
360 | uint64_t encode_features; | |
361 | uuid_d fsid; | |
362 | epoch_t epoch; // new epoch; we are a diff from epoch-1 to epoch | |
363 | utime_t modified; | |
364 | int64_t new_pool_max; //incremented by the OSDMonitor on each pool create | |
365 | int32_t new_flags; | |
9f95a23c | 366 | ceph_release_t new_require_osd_release{0xff}; |
f67539c2 TL |
367 | uint32_t new_stretch_bucket_count{0}; |
368 | uint32_t new_degraded_stretch_mode{0}; | |
369 | uint32_t new_recovering_stretch_mode{0}; | |
370 | int32_t new_stretch_mode_bucket{0}; | |
371 | bool stretch_mode_enabled{false}; | |
372 | bool change_stretch_mode{false}; | |
7c673cae FG |
373 | |
374 | // full (rare) | |
9f95a23c TL |
375 | ceph::buffer::list fullmap; // in lieu of below. |
376 | ceph::buffer::list crush; | |
7c673cae FG |
377 | |
378 | // incremental | |
379 | int32_t new_max_osd; | |
380 | mempool::osdmap::map<int64_t,pg_pool_t> new_pools; | |
9f95a23c | 381 | mempool::osdmap::map<int64_t,std::string> new_pool_names; |
7c673cae | 382 | mempool::osdmap::set<int64_t> old_pools; |
9f95a23c TL |
383 | mempool::osdmap::map<std::string,std::map<std::string,std::string> > new_erasure_code_profiles; |
384 | mempool::osdmap::vector<std::string> old_erasure_code_profiles; | |
11fdf7f2 TL |
385 | mempool::osdmap::map<int32_t,entity_addrvec_t> new_up_client; |
386 | mempool::osdmap::map<int32_t,entity_addrvec_t> new_up_cluster; | |
31f18b77 | 387 | mempool::osdmap::map<int32_t,uint32_t> new_state; // XORed onto previous state. |
7c673cae FG |
388 | mempool::osdmap::map<int32_t,uint32_t> new_weight; |
389 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t> > new_pg_temp; // [] to remove | |
390 | mempool::osdmap::map<pg_t, int32_t> new_primary_temp; // [-1] to remove | |
391 | mempool::osdmap::map<int32_t,uint32_t> new_primary_affinity; | |
392 | mempool::osdmap::map<int32_t,epoch_t> new_up_thru; | |
9f95a23c | 393 | mempool::osdmap::map<int32_t,std::pair<epoch_t,epoch_t> > new_last_clean_interval; |
7c673cae FG |
394 | mempool::osdmap::map<int32_t,epoch_t> new_lost; |
395 | mempool::osdmap::map<int32_t,uuid_d> new_uuid; | |
396 | mempool::osdmap::map<int32_t,osd_xinfo_t> new_xinfo; | |
397 | ||
f67539c2 TL |
398 | mempool::osdmap::map<entity_addr_t,utime_t> new_blocklist; |
399 | mempool::osdmap::vector<entity_addr_t> old_blocklist; | |
33c7a0ef TL |
400 | mempool::osdmap::map<entity_addr_t,utime_t> new_range_blocklist; |
401 | mempool::osdmap::vector<entity_addr_t> old_range_blocklist; | |
11fdf7f2 TL |
402 | mempool::osdmap::map<int32_t, entity_addrvec_t> new_hb_back_up; |
403 | mempool::osdmap::map<int32_t, entity_addrvec_t> new_hb_front_up; | |
7c673cae FG |
404 | |
405 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t>> new_pg_upmap; | |
9f95a23c | 406 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<std::pair<int32_t,int32_t>>> new_pg_upmap_items; |
7c673cae | 407 | mempool::osdmap::set<pg_t> old_pg_upmap, old_pg_upmap_items; |
11fdf7f2 TL |
408 | mempool::osdmap::map<int64_t, snap_interval_set_t> new_removed_snaps; |
409 | mempool::osdmap::map<int64_t, snap_interval_set_t> new_purged_snaps; | |
7c673cae | 410 | |
81eedcae TL |
411 | mempool::osdmap::map<int32_t,uint32_t> new_crush_node_flags; |
412 | mempool::osdmap::map<int32_t,uint32_t> new_device_class_flags; | |
413 | ||
9f95a23c | 414 | std::string cluster_snapshot; |
7c673cae FG |
415 | |
416 | float new_nearfull_ratio = -1; | |
417 | float new_backfillfull_ratio = -1; | |
418 | float new_full_ratio = -1; | |
419 | ||
9f95a23c | 420 | ceph_release_t new_require_min_compat_client{0xff}; |
7c673cae | 421 | |
11fdf7f2 TL |
422 | utime_t new_last_up_change, new_last_in_change; |
423 | ||
7c673cae FG |
424 | mutable bool have_crc; ///< crc values are defined |
425 | uint32_t full_crc; ///< crc of the resulting OSDMap | |
426 | mutable uint32_t inc_crc; ///< crc of this incremental | |
427 | ||
428 | int get_net_marked_out(const OSDMap *previous) const; | |
429 | int get_net_marked_down(const OSDMap *previous) const; | |
430 | int identify_osd(uuid_d u) const; | |
431 | ||
9f95a23c TL |
432 | void encode_client_old(ceph::buffer::list& bl) const; |
433 | void encode_classic(ceph::buffer::list& bl, uint64_t features) const; | |
434 | void encode(ceph::buffer::list& bl, uint64_t features=CEPH_FEATURES_ALL) const; | |
435 | void decode_classic(ceph::buffer::list::const_iterator &p); | |
436 | void decode(ceph::buffer::list::const_iterator &bl); | |
437 | void dump(ceph::Formatter *f) const; | |
438 | static void generate_test_instances(std::list<Incremental*>& o); | |
7c673cae FG |
439 | |
440 | explicit Incremental(epoch_t e=0) : | |
441 | encode_features(0), | |
442 | epoch(e), new_pool_max(-1), new_flags(-1), new_max_osd(-1), | |
443 | have_crc(false), full_crc(0), inc_crc(0) { | |
7c673cae | 444 | } |
9f95a23c | 445 | explicit Incremental(ceph::buffer::list &bl) { |
11fdf7f2 | 446 | auto p = std::cbegin(bl); |
7c673cae FG |
447 | decode(p); |
448 | } | |
9f95a23c | 449 | explicit Incremental(ceph::buffer::list::const_iterator &p) { |
7c673cae FG |
450 | decode(p); |
451 | } | |
452 | ||
453 | pg_pool_t *get_new_pool(int64_t pool, const pg_pool_t *orig) { | |
454 | if (new_pools.count(pool) == 0) | |
455 | new_pools[pool] = *orig; | |
456 | return &new_pools[pool]; | |
457 | } | |
9f95a23c | 458 | bool has_erasure_code_profile(const std::string &name) const { |
7c673cae FG |
459 | auto i = new_erasure_code_profiles.find(name); |
460 | return i != new_erasure_code_profiles.end(); | |
461 | } | |
9f95a23c TL |
462 | void set_erasure_code_profile(const std::string &name, |
463 | const std::map<std::string,std::string>& profile) { | |
7c673cae FG |
464 | new_erasure_code_profiles[name] = profile; |
465 | } | |
9f95a23c | 466 | mempool::osdmap::map<std::string,std::map<std::string,std::string>> get_erasure_code_profiles() const { |
11fdf7f2 TL |
467 | return new_erasure_code_profiles; |
468 | } | |
7c673cae | 469 | |
f67539c2 TL |
470 | /// propagate update pools' (snap and other) metadata to any of their tiers |
471 | int propagate_base_properties_to_tiers(CephContext *cct, const OSDMap &base); | |
31f18b77 FG |
472 | |
473 | /// filter out osds with any pending state changing | |
9f95a23c | 474 | size_t get_pending_state_osds(std::vector<int> *osds) { |
11fdf7f2 | 475 | ceph_assert(osds); |
31f18b77 FG |
476 | osds->clear(); |
477 | ||
478 | for (auto &p : new_state) { | |
479 | osds->push_back(p.first); | |
480 | } | |
481 | ||
482 | return osds->size(); | |
483 | } | |
484 | ||
485 | bool pending_osd_has_state(int osd, unsigned state) { | |
486 | return new_state.count(osd) && (new_state[osd] & state) != 0; | |
487 | } | |
488 | ||
81eedcae TL |
489 | bool pending_osd_state_set(int osd, unsigned state) { |
490 | if (pending_osd_has_state(osd, state)) | |
491 | return false; | |
31f18b77 | 492 | new_state[osd] |= state; |
81eedcae | 493 | return true; |
31f18b77 FG |
494 | } |
495 | ||
496 | // cancel the specified pending osd state if there is any | |
497 | // return ture on success, false otherwise. | |
498 | bool pending_osd_state_clear(int osd, unsigned state) { | |
499 | if (!pending_osd_has_state(osd, state)) { | |
500 | // never has been set or already has been cancelled. | |
501 | return false; | |
502 | } | |
503 | ||
504 | new_state[osd] &= ~state; | |
11fdf7f2 TL |
505 | if (!new_state[osd]) { |
506 | // all flags cleared | |
507 | new_state.erase(osd); | |
508 | } | |
31f18b77 FG |
509 | return true; |
510 | } | |
511 | ||
9f95a23c TL |
512 | bool in_new_removed_snaps(int64_t pool, snapid_t snap) const { |
513 | auto p = new_removed_snaps.find(pool); | |
514 | if (p == new_removed_snaps.end()) { | |
515 | return false; | |
516 | } | |
517 | return p->second.contains(snap); | |
518 | } | |
7c673cae FG |
519 | }; |
520 | ||
521 | private: | |
522 | uuid_d fsid; | |
523 | epoch_t epoch; // what epoch of the osd cluster descriptor is this | |
524 | utime_t created, modified; // epoch start time | |
525 | int32_t pool_max; // the largest pool num, ever | |
526 | ||
527 | uint32_t flags; | |
528 | ||
529 | int num_osd; // not saved; see calc_num_osds | |
530 | int num_up_osd; // not saved; see calc_num_osds | |
531 | int num_in_osd; // not saved; see calc_num_osds | |
532 | ||
533 | int32_t max_osd; | |
9f95a23c | 534 | std::vector<uint32_t> osd_state; |
7c673cae | 535 | |
81eedcae TL |
536 | mempool::osdmap::map<int32_t,uint32_t> crush_node_flags; // crush node -> CEPH_OSD_* flags |
537 | mempool::osdmap::map<int32_t,uint32_t> device_class_flags; // device class -> CEPH_OSD_* flags | |
538 | ||
11fdf7f2 TL |
539 | utime_t last_up_change, last_in_change; |
540 | ||
28e407b8 AA |
541 | // These features affect OSDMap[::Incremental] encoding, or the |
542 | // encoding of some type embedded therein (CrushWrapper, something | |
543 | // from osd_types, etc.). | |
544 | static constexpr uint64_t SIGNIFICANT_FEATURES = | |
545 | CEPH_FEATUREMASK_PGID64 | | |
546 | CEPH_FEATUREMASK_PGPOOL3 | | |
547 | CEPH_FEATUREMASK_OSDENC | | |
548 | CEPH_FEATUREMASK_OSDMAP_ENC | | |
549 | CEPH_FEATUREMASK_OSD_POOLRESEND | | |
550 | CEPH_FEATUREMASK_NEW_OSDOP_ENCODING | | |
551 | CEPH_FEATUREMASK_MSG_ADDR2 | | |
552 | CEPH_FEATUREMASK_CRUSH_TUNABLES5 | | |
553 | CEPH_FEATUREMASK_CRUSH_CHOOSE_ARGS | | |
11fdf7f2 TL |
554 | CEPH_FEATUREMASK_SERVER_LUMINOUS | |
555 | CEPH_FEATUREMASK_SERVER_MIMIC | | |
9f95a23c TL |
556 | CEPH_FEATUREMASK_SERVER_NAUTILUS | |
557 | CEPH_FEATUREMASK_SERVER_OCTOPUS; | |
11fdf7f2 | 558 | |
7c673cae | 559 | struct addrs_s { |
11fdf7f2 TL |
560 | mempool::osdmap::vector<std::shared_ptr<entity_addrvec_t> > client_addrs; |
561 | mempool::osdmap::vector<std::shared_ptr<entity_addrvec_t> > cluster_addrs; | |
562 | mempool::osdmap::vector<std::shared_ptr<entity_addrvec_t> > hb_back_addrs; | |
563 | mempool::osdmap::vector<std::shared_ptr<entity_addrvec_t> > hb_front_addrs; | |
7c673cae | 564 | }; |
11fdf7f2 TL |
565 | std::shared_ptr<addrs_s> osd_addrs; |
566 | ||
567 | entity_addrvec_t _blank_addrvec; | |
7c673cae FG |
568 | |
569 | mempool::osdmap::vector<__u32> osd_weight; // 16.16 fixed point, 0x10000 = "in", 0 = "out" | |
570 | mempool::osdmap::vector<osd_info_t> osd_info; | |
11fdf7f2 TL |
571 | std::shared_ptr<PGTempMap> pg_temp; // temp pg mapping (e.g. while we rebuild) |
572 | std::shared_ptr< mempool::osdmap::map<pg_t,int32_t > > primary_temp; // temp primary mapping (e.g. while we rebuild) | |
573 | std::shared_ptr< mempool::osdmap::vector<__u32> > osd_primary_affinity; ///< 16.16 fixed point, 0x10000 = baseline | |
7c673cae FG |
574 | |
575 | // remap (post-CRUSH, pre-up) | |
576 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t>> pg_upmap; ///< remap pg | |
9f95a23c | 577 | mempool::osdmap::map<pg_t,mempool::osdmap::vector<std::pair<int32_t,int32_t>>> pg_upmap_items; ///< remap osds in up set |
7c673cae FG |
578 | |
579 | mempool::osdmap::map<int64_t,pg_pool_t> pools; | |
9f95a23c TL |
580 | mempool::osdmap::map<int64_t,std::string> pool_name; |
581 | mempool::osdmap::map<std::string, std::map<std::string,std::string>> erasure_code_profiles; | |
f67539c2 | 582 | mempool::osdmap::map<std::string,int64_t, std::less<>> name_pool; |
7c673cae | 583 | |
11fdf7f2 | 584 | std::shared_ptr< mempool::osdmap::vector<uuid_d> > osd_uuid; |
7c673cae FG |
585 | mempool::osdmap::vector<osd_xinfo_t> osd_xinfo; |
586 | ||
33c7a0ef TL |
587 | class range_bits { |
588 | struct ip6 { | |
589 | uint64_t upper_64_bits, lower_64_bits; | |
590 | uint64_t upper_mask, lower_mask; | |
591 | }; | |
592 | struct ip4 { | |
593 | uint32_t ip_32_bits; | |
594 | uint32_t mask; | |
595 | }; | |
596 | union { | |
597 | ip6 ipv6; | |
598 | ip4 ipv4; | |
599 | } bits; | |
600 | bool ipv6; | |
601 | static void get_ipv6_bytes(unsigned const char *addr, | |
602 | uint64_t *upper, uint64_t *lower); | |
603 | public: | |
604 | range_bits(); | |
605 | range_bits(const entity_addr_t& addr); | |
606 | void parse(const entity_addr_t& addr); | |
607 | bool matches(const entity_addr_t& addr) const; | |
608 | }; | |
f67539c2 | 609 | mempool::osdmap::unordered_map<entity_addr_t,utime_t> blocklist; |
33c7a0ef TL |
610 | mempool::osdmap::map<entity_addr_t,utime_t> range_blocklist; |
611 | mempool::osdmap::map<entity_addr_t,range_bits> calculated_ranges; | |
7c673cae | 612 | |
11fdf7f2 TL |
613 | /// queue of snaps to remove |
614 | mempool::osdmap::map<int64_t, snap_interval_set_t> removed_snaps_queue; | |
615 | ||
616 | /// removed_snaps additions this epoch | |
617 | mempool::osdmap::map<int64_t, snap_interval_set_t> new_removed_snaps; | |
618 | ||
619 | /// removed_snaps removals this epoch | |
620 | mempool::osdmap::map<int64_t, snap_interval_set_t> new_purged_snaps; | |
621 | ||
7c673cae | 622 | epoch_t cluster_snapshot_epoch; |
9f95a23c | 623 | std::string cluster_snapshot; |
f67539c2 | 624 | bool new_blocklist_entries; |
7c673cae FG |
625 | |
626 | float full_ratio = 0, backfillfull_ratio = 0, nearfull_ratio = 0; | |
627 | ||
628 | /// min compat client we want to support | |
9f95a23c | 629 | ceph_release_t require_min_compat_client{ceph_release_t::unknown}; |
7c673cae | 630 | |
31f18b77 FG |
631 | public: |
632 | /// require osds to run at least this release | |
9f95a23c | 633 | ceph_release_t require_osd_release{ceph_release_t::unknown}; |
31f18b77 FG |
634 | |
635 | private: | |
7c673cae FG |
636 | mutable uint64_t cached_up_osd_features; |
637 | ||
638 | mutable bool crc_defined; | |
639 | mutable uint32_t crc; | |
640 | ||
641 | void _calc_up_osd_features(); | |
642 | ||
643 | public: | |
644 | bool have_crc() const { return crc_defined; } | |
645 | uint32_t get_crc() const { return crc; } | |
646 | ||
11fdf7f2 | 647 | std::shared_ptr<CrushWrapper> crush; // hierarchical map |
f67539c2 TL |
648 | bool stretch_mode_enabled; // we are in stretch mode, requiring multiple sites |
649 | uint32_t stretch_bucket_count; // number of sites we expect to be in | |
650 | uint32_t degraded_stretch_mode; // 0 if not degraded; else count of up sites | |
651 | uint32_t recovering_stretch_mode; // 0 if not recovering; else 1 | |
652 | int32_t stretch_mode_bucket; // the bucket type we're stretched across | |
31f18b77 FG |
653 | private: |
654 | uint32_t crush_version = 1; | |
7c673cae FG |
655 | |
656 | friend class OSDMonitor; | |
657 | ||
658 | public: | |
659 | OSDMap() : epoch(0), | |
224ce89b | 660 | pool_max(0), |
7c673cae FG |
661 | flags(0), |
662 | num_osd(0), num_up_osd(0), num_in_osd(0), | |
663 | max_osd(0), | |
664 | osd_addrs(std::make_shared<addrs_s>()), | |
31f18b77 | 665 | pg_temp(std::make_shared<PGTempMap>()), |
7c673cae FG |
666 | primary_temp(std::make_shared<mempool::osdmap::map<pg_t,int32_t>>()), |
667 | osd_uuid(std::make_shared<mempool::osdmap::vector<uuid_d>>()), | |
668 | cluster_snapshot_epoch(0), | |
f67539c2 | 669 | new_blocklist_entries(false), |
7c673cae FG |
670 | cached_up_osd_features(0), |
671 | crc_defined(false), crc(0), | |
f67539c2 TL |
672 | crush(std::make_shared<CrushWrapper>()), |
673 | stretch_mode_enabled(false), stretch_bucket_count(0), | |
674 | degraded_stretch_mode(0), recovering_stretch_mode(0), stretch_mode_bucket(0) { | |
7c673cae FG |
675 | } |
676 | ||
7c673cae FG |
677 | private: |
678 | OSDMap(const OSDMap& other) = default; | |
679 | OSDMap& operator=(const OSDMap& other) = default; | |
680 | public: | |
681 | ||
28e407b8 AA |
682 | /// return feature mask subset that is relevant to OSDMap encoding |
683 | static uint64_t get_significant_features(uint64_t features) { | |
684 | return SIGNIFICANT_FEATURES & features; | |
685 | } | |
686 | ||
687 | uint64_t get_encoding_features() const; | |
688 | ||
7c673cae FG |
689 | void deepish_copy_from(const OSDMap& o) { |
690 | *this = o; | |
691 | primary_temp.reset(new mempool::osdmap::map<pg_t,int32_t>(*o.primary_temp)); | |
31f18b77 | 692 | pg_temp.reset(new PGTempMap(*o.pg_temp)); |
7c673cae FG |
693 | osd_uuid.reset(new mempool::osdmap::vector<uuid_d>(*o.osd_uuid)); |
694 | ||
695 | if (o.osd_primary_affinity) | |
696 | osd_primary_affinity.reset(new mempool::osdmap::vector<__u32>(*o.osd_primary_affinity)); | |
697 | ||
11fdf7f2 | 698 | // NOTE: this still references shared entity_addrvec_t's. |
7c673cae FG |
699 | osd_addrs.reset(new addrs_s(*o.osd_addrs)); |
700 | ||
701 | // NOTE: we do not copy crush. note that apply_incremental will | |
702 | // allocate a new CrushWrapper, though. | |
703 | } | |
704 | ||
705 | // map info | |
706 | const uuid_d& get_fsid() const { return fsid; } | |
707 | void set_fsid(uuid_d& f) { fsid = f; } | |
708 | ||
709 | epoch_t get_epoch() const { return epoch; } | |
710 | void inc_epoch() { epoch++; } | |
711 | ||
712 | void set_epoch(epoch_t e); | |
713 | ||
31f18b77 FG |
714 | uint32_t get_crush_version() const { |
715 | return crush_version; | |
716 | } | |
717 | ||
7c673cae FG |
718 | /* stamps etc */ |
719 | const utime_t& get_created() const { return created; } | |
720 | const utime_t& get_modified() const { return modified; } | |
721 | ||
33c7a0ef TL |
722 | bool is_blocklisted(const entity_addr_t& a, CephContext *cct=nullptr) const; |
723 | bool is_blocklisted(const entity_addrvec_t& a, CephContext *cct=nullptr) const; | |
724 | void get_blocklist(std::list<std::pair<entity_addr_t,utime_t > > *bl, | |
725 | std::list<std::pair<entity_addr_t,utime_t> > *rl) const; | |
726 | void get_blocklist(std::set<entity_addr_t> *bl, | |
727 | std::set<entity_addr_t> *rl) const; | |
7c673cae | 728 | |
9f95a23c | 729 | std::string get_cluster_snapshot() const { |
7c673cae FG |
730 | if (cluster_snapshot_epoch == epoch) |
731 | return cluster_snapshot; | |
9f95a23c | 732 | return std::string(); |
7c673cae FG |
733 | } |
734 | ||
735 | float get_full_ratio() const { | |
736 | return full_ratio; | |
737 | } | |
738 | float get_backfillfull_ratio() const { | |
739 | return backfillfull_ratio; | |
740 | } | |
741 | float get_nearfull_ratio() const { | |
742 | return nearfull_ratio; | |
743 | } | |
3efd9988 | 744 | void get_full_pools(CephContext *cct, |
9f95a23c TL |
745 | std::set<int64_t> *full, |
746 | std::set<int64_t> *backfillfull, | |
747 | std::set<int64_t> *nearfull) const; | |
748 | void get_full_osd_counts(std::set<int> *full, std::set<int> *backfill, | |
749 | std::set<int> *nearfull) const; | |
31f18b77 FG |
750 | |
751 | ||
7c673cae FG |
752 | /***** cluster state *****/ |
753 | /* osds */ | |
754 | int get_max_osd() const { return max_osd; } | |
755 | void set_max_osd(int m); | |
756 | ||
757 | unsigned get_num_osds() const { | |
758 | return num_osd; | |
759 | } | |
760 | unsigned get_num_up_osds() const { | |
761 | return num_up_osd; | |
762 | } | |
763 | unsigned get_num_in_osds() const { | |
764 | return num_in_osd; | |
765 | } | |
766 | /// recalculate cached values for get_num{,_up,_in}_osds | |
767 | int calc_num_osds(); | |
768 | ||
9f95a23c TL |
769 | void get_all_osds(std::set<int32_t>& ls) const; |
770 | void get_up_osds(std::set<int32_t>& ls) const; | |
81eedcae | 771 | void get_out_existing_osds(std::set<int32_t>& ls) const; |
7c673cae FG |
772 | unsigned get_num_pg_temp() const { |
773 | return pg_temp->size(); | |
774 | } | |
775 | ||
776 | int get_flags() const { return flags; } | |
777 | bool test_flag(int f) const { return flags & f; } | |
778 | void set_flag(int f) { flags |= f; } | |
779 | void clear_flag(int f) { flags &= ~f; } | |
780 | ||
9f95a23c | 781 | void get_flag_set(std::set<std::string> *flagset) const; |
11fdf7f2 | 782 | |
9f95a23c | 783 | static void calc_state_set(int state, std::set<std::string>& st); |
7c673cae FG |
784 | |
785 | int get_state(int o) const { | |
11fdf7f2 | 786 | ceph_assert(o < max_osd); |
7c673cae FG |
787 | return osd_state[o]; |
788 | } | |
9f95a23c | 789 | int get_state(int o, std::set<std::string>& st) const { |
11fdf7f2 | 790 | ceph_assert(o < max_osd); |
7c673cae FG |
791 | unsigned t = osd_state[o]; |
792 | calc_state_set(t, st); | |
793 | return osd_state[o]; | |
794 | } | |
795 | void set_state(int o, unsigned s) { | |
11fdf7f2 | 796 | ceph_assert(o < max_osd); |
7c673cae FG |
797 | osd_state[o] = s; |
798 | } | |
799 | void set_weight(int o, unsigned w) { | |
11fdf7f2 | 800 | ceph_assert(o < max_osd); |
7c673cae FG |
801 | osd_weight[o] = w; |
802 | if (w) | |
803 | osd_state[o] |= CEPH_OSD_EXISTS; | |
804 | } | |
805 | unsigned get_weight(int o) const { | |
11fdf7f2 | 806 | ceph_assert(o < max_osd); |
7c673cae FG |
807 | return osd_weight[o]; |
808 | } | |
809 | float get_weightf(int o) const { | |
810 | return (float)get_weight(o) / (float)CEPH_OSD_IN; | |
811 | } | |
9f95a23c | 812 | void adjust_osd_weights(const std::map<int,double>& weights, Incremental& inc) const; |
7c673cae FG |
813 | |
814 | void set_primary_affinity(int o, int w) { | |
11fdf7f2 | 815 | ceph_assert(o < max_osd); |
7c673cae FG |
816 | if (!osd_primary_affinity) |
817 | osd_primary_affinity.reset( | |
818 | new mempool::osdmap::vector<__u32>( | |
819 | max_osd, CEPH_OSD_DEFAULT_PRIMARY_AFFINITY)); | |
820 | (*osd_primary_affinity)[o] = w; | |
821 | } | |
822 | unsigned get_primary_affinity(int o) const { | |
11fdf7f2 | 823 | ceph_assert(o < max_osd); |
7c673cae FG |
824 | if (!osd_primary_affinity) |
825 | return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; | |
826 | return (*osd_primary_affinity)[o]; | |
827 | } | |
828 | float get_primary_affinityf(int o) const { | |
829 | return (float)get_primary_affinity(o) / (float)CEPH_OSD_MAX_PRIMARY_AFFINITY; | |
830 | } | |
831 | ||
9f95a23c | 832 | bool has_erasure_code_profile(const std::string &name) const { |
7c673cae FG |
833 | auto i = erasure_code_profiles.find(name); |
834 | return i != erasure_code_profiles.end(); | |
835 | } | |
836 | int get_erasure_code_profile_default(CephContext *cct, | |
9f95a23c TL |
837 | std::map<std::string,std::string> &profile_map, |
838 | std::ostream *ss); | |
839 | void set_erasure_code_profile(const std::string &name, | |
840 | const std::map<std::string,std::string>& profile) { | |
7c673cae FG |
841 | erasure_code_profiles[name] = profile; |
842 | } | |
9f95a23c TL |
843 | const std::map<std::string,std::string> &get_erasure_code_profile( |
844 | const std::string &name) const { | |
845 | static std::map<std::string,std::string> empty; | |
7c673cae FG |
846 | auto i = erasure_code_profiles.find(name); |
847 | if (i == erasure_code_profiles.end()) | |
848 | return empty; | |
849 | else | |
850 | return i->second; | |
851 | } | |
9f95a23c | 852 | const mempool::osdmap::map<std::string,std::map<std::string,std::string>> &get_erasure_code_profiles() const { |
7c673cae FG |
853 | return erasure_code_profiles; |
854 | } | |
855 | ||
856 | bool exists(int osd) const { | |
857 | //assert(osd >= 0); | |
858 | return osd >= 0 && osd < max_osd && (osd_state[osd] & CEPH_OSD_EXISTS); | |
859 | } | |
860 | ||
31f18b77 FG |
861 | bool is_destroyed(int osd) const { |
862 | return exists(osd) && (osd_state[osd] & CEPH_OSD_DESTROYED); | |
863 | } | |
864 | ||
7c673cae FG |
865 | bool is_up(int osd) const { |
866 | return exists(osd) && (osd_state[osd] & CEPH_OSD_UP); | |
867 | } | |
868 | ||
869 | bool has_been_up_since(int osd, epoch_t epoch) const { | |
870 | return is_up(osd) && get_up_from(osd) <= epoch; | |
871 | } | |
872 | ||
873 | bool is_down(int osd) const { | |
874 | return !is_up(osd); | |
875 | } | |
876 | ||
9f95a23c TL |
877 | bool is_stop(int osd) const { |
878 | return exists(osd) && is_down(osd) && | |
879 | (osd_state[osd] & CEPH_OSD_STOP); | |
880 | } | |
881 | ||
7c673cae FG |
882 | bool is_out(int osd) const { |
883 | return !exists(osd) || get_weight(osd) == CEPH_OSD_OUT; | |
884 | } | |
885 | ||
886 | bool is_in(int osd) const { | |
887 | return !is_out(osd); | |
888 | } | |
889 | ||
9f95a23c TL |
890 | bool is_dead(int osd) const { |
891 | if (!exists(osd)) { | |
892 | return false; // unclear if they know they are removed from map | |
893 | } | |
894 | return get_xinfo(osd).dead_epoch > get_info(osd).up_from; | |
895 | } | |
896 | ||
81eedcae TL |
897 | unsigned get_osd_crush_node_flags(int osd) const; |
898 | unsigned get_crush_node_flags(int id) const; | |
899 | unsigned get_device_class_flags(int id) const; | |
900 | ||
901 | bool is_noup_by_osd(int osd) const { | |
31f18b77 FG |
902 | return exists(osd) && (osd_state[osd] & CEPH_OSD_NOUP); |
903 | } | |
904 | ||
81eedcae | 905 | bool is_nodown_by_osd(int osd) const { |
31f18b77 FG |
906 | return exists(osd) && (osd_state[osd] & CEPH_OSD_NODOWN); |
907 | } | |
908 | ||
81eedcae | 909 | bool is_noin_by_osd(int osd) const { |
31f18b77 FG |
910 | return exists(osd) && (osd_state[osd] & CEPH_OSD_NOIN); |
911 | } | |
912 | ||
81eedcae | 913 | bool is_noout_by_osd(int osd) const { |
31f18b77 FG |
914 | return exists(osd) && (osd_state[osd] & CEPH_OSD_NOOUT); |
915 | } | |
916 | ||
81eedcae TL |
917 | bool is_noup(int osd) const { |
918 | if (test_flag(CEPH_OSDMAP_NOUP)) // global? | |
919 | return true; | |
920 | if (is_noup_by_osd(osd)) // by osd? | |
921 | return true; | |
922 | if (get_osd_crush_node_flags(osd) & CEPH_OSD_NOUP) // by crush-node? | |
923 | return true; | |
924 | if (auto class_id = crush->get_item_class_id(osd); class_id >= 0 && | |
925 | get_device_class_flags(class_id) & CEPH_OSD_NOUP) // by device-class? | |
926 | return true; | |
927 | return false; | |
31f18b77 FG |
928 | } |
929 | ||
81eedcae TL |
930 | bool is_nodown(int osd) const { |
931 | if (test_flag(CEPH_OSDMAP_NODOWN)) | |
932 | return true; | |
933 | if (is_nodown_by_osd(osd)) | |
934 | return true; | |
935 | if (get_osd_crush_node_flags(osd) & CEPH_OSD_NODOWN) | |
936 | return true; | |
937 | if (auto class_id = crush->get_item_class_id(osd); class_id >= 0 && | |
938 | get_device_class_flags(class_id) & CEPH_OSD_NODOWN) | |
939 | return true; | |
940 | return false; | |
31f18b77 FG |
941 | } |
942 | ||
81eedcae TL |
943 | bool is_noin(int osd) const { |
944 | if (test_flag(CEPH_OSDMAP_NOIN)) | |
945 | return true; | |
946 | if (is_noin_by_osd(osd)) | |
947 | return true; | |
948 | if (get_osd_crush_node_flags(osd) & CEPH_OSD_NOIN) | |
949 | return true; | |
950 | if (auto class_id = crush->get_item_class_id(osd); class_id >= 0 && | |
951 | get_device_class_flags(class_id) & CEPH_OSD_NOIN) | |
952 | return true; | |
953 | return false; | |
31f18b77 FG |
954 | } |
955 | ||
81eedcae TL |
956 | bool is_noout(int osd) const { |
957 | if (test_flag(CEPH_OSDMAP_NOOUT)) | |
958 | return true; | |
959 | if (is_noout_by_osd(osd)) | |
960 | return true; | |
961 | if (get_osd_crush_node_flags(osd) & CEPH_OSD_NOOUT) | |
962 | return true; | |
963 | if (auto class_id = crush->get_item_class_id(osd); class_id >= 0 && | |
964 | get_device_class_flags(class_id) & CEPH_OSD_NOOUT) | |
965 | return true; | |
966 | return false; | |
31f18b77 FG |
967 | } |
968 | ||
7c673cae FG |
969 | /** |
970 | * check if an entire crush subtree is down | |
971 | */ | |
9f95a23c TL |
972 | bool subtree_is_down(int id, std::set<int> *down_cache) const; |
973 | bool containing_subtree_is_down(CephContext *cct, int osd, int subtree_type, std::set<int> *down_cache) const; | |
974 | ||
975 | bool subtree_type_is_down(CephContext *cct, int id, int subtree_type, std::set<int> *down_in_osds, std::set<int> *up_in_osds, | |
976 | std::set<int> *subtree_up, std::unordered_map<int, std::set<int> > *subtree_type_down) const; | |
31f18b77 | 977 | |
7c673cae FG |
978 | int identify_osd(const entity_addr_t& addr) const; |
979 | int identify_osd(const uuid_d& u) const; | |
980 | int identify_osd_on_all_channels(const entity_addr_t& addr) const; | |
981 | ||
982 | bool have_addr(const entity_addr_t& addr) const { | |
983 | return identify_osd(addr) >= 0; | |
984 | } | |
985 | int find_osd_on_ip(const entity_addr_t& ip) const; | |
11fdf7f2 TL |
986 | |
987 | const entity_addrvec_t& get_addrs(int osd) const { | |
988 | ceph_assert(exists(osd)); | |
989 | return osd_addrs->client_addrs[osd] ? | |
990 | *osd_addrs->client_addrs[osd] : _blank_addrvec; | |
7c673cae | 991 | } |
11fdf7f2 TL |
992 | const entity_addrvec_t& get_most_recent_addrs(int osd) const { |
993 | return get_addrs(osd); | |
7c673cae | 994 | } |
11fdf7f2 TL |
995 | const entity_addrvec_t &get_cluster_addrs(int osd) const { |
996 | ceph_assert(exists(osd)); | |
997 | return osd_addrs->cluster_addrs[osd] ? | |
998 | *osd_addrs->cluster_addrs[osd] : _blank_addrvec; | |
7c673cae | 999 | } |
11fdf7f2 TL |
1000 | const entity_addrvec_t &get_hb_back_addrs(int osd) const { |
1001 | ceph_assert(exists(osd)); | |
1002 | return osd_addrs->hb_back_addrs[osd] ? | |
1003 | *osd_addrs->hb_back_addrs[osd] : _blank_addrvec; | |
7c673cae | 1004 | } |
11fdf7f2 TL |
1005 | const entity_addrvec_t &get_hb_front_addrs(int osd) const { |
1006 | ceph_assert(exists(osd)); | |
1007 | return osd_addrs->hb_front_addrs[osd] ? | |
1008 | *osd_addrs->hb_front_addrs[osd] : _blank_addrvec; | |
7c673cae FG |
1009 | } |
1010 | ||
1011 | const uuid_d& get_uuid(int osd) const { | |
11fdf7f2 | 1012 | ceph_assert(exists(osd)); |
7c673cae FG |
1013 | return (*osd_uuid)[osd]; |
1014 | } | |
1015 | ||
1016 | const epoch_t& get_up_from(int osd) const { | |
11fdf7f2 | 1017 | ceph_assert(exists(osd)); |
7c673cae FG |
1018 | return osd_info[osd].up_from; |
1019 | } | |
1020 | const epoch_t& get_up_thru(int osd) const { | |
11fdf7f2 | 1021 | ceph_assert(exists(osd)); |
7c673cae FG |
1022 | return osd_info[osd].up_thru; |
1023 | } | |
1024 | const epoch_t& get_down_at(int osd) const { | |
11fdf7f2 | 1025 | ceph_assert(exists(osd)); |
7c673cae FG |
1026 | return osd_info[osd].down_at; |
1027 | } | |
1028 | const osd_info_t& get_info(int osd) const { | |
11fdf7f2 | 1029 | ceph_assert(osd < max_osd); |
7c673cae FG |
1030 | return osd_info[osd]; |
1031 | } | |
1032 | ||
1033 | const osd_xinfo_t& get_xinfo(int osd) const { | |
11fdf7f2 | 1034 | ceph_assert(osd < max_osd); |
7c673cae FG |
1035 | return osd_xinfo[osd]; |
1036 | } | |
1037 | ||
1038 | int get_next_up_osd_after(int n) const { | |
1039 | if (get_max_osd() == 0) | |
1040 | return -1; | |
1041 | for (int i = n + 1; i != n; ++i) { | |
1042 | if (i >= get_max_osd()) | |
1043 | i = 0; | |
1044 | if (i == n) | |
1045 | break; | |
1046 | if (is_up(i)) | |
1047 | return i; | |
1048 | } | |
1049 | return -1; | |
1050 | } | |
1051 | ||
1052 | int get_previous_up_osd_before(int n) const { | |
1053 | if (get_max_osd() == 0) | |
1054 | return -1; | |
1055 | for (int i = n - 1; i != n; --i) { | |
1056 | if (i < 0) | |
1057 | i = get_max_osd() - 1; | |
1058 | if (i == n) | |
1059 | break; | |
1060 | if (is_up(i)) | |
1061 | return i; | |
1062 | } | |
1063 | return -1; | |
1064 | } | |
1065 | ||
11fdf7f2 TL |
1066 | |
1067 | void get_random_up_osds_by_subtree(int n, // whoami | |
9f95a23c | 1068 | std::string &subtree, |
11fdf7f2 | 1069 | int limit, // how many |
9f95a23c TL |
1070 | std::set<int> skip, |
1071 | std::set<int> *want) const; | |
11fdf7f2 | 1072 | |
7c673cae FG |
1073 | /** |
1074 | * get feature bits required by the current structure | |
1075 | * | |
1076 | * @param entity_type [in] what entity type we are asking about | |
9f95a23c | 1077 | * @param mask [out] std::set of all possible map-related features we could std::set |
7c673cae FG |
1078 | * @return feature bits used by this map |
1079 | */ | |
1080 | uint64_t get_features(int entity_type, uint64_t *mask) const; | |
1081 | ||
1082 | /** | |
1083 | * get oldest *client* version (firefly, hammer, etc.) that can connect given | |
1084 | * the feature bits required (according to get_features()). | |
1085 | */ | |
9f95a23c | 1086 | ceph_release_t get_min_compat_client() const; |
7c673cae | 1087 | |
11fdf7f2 TL |
1088 | /** |
1089 | * gets the required minimum *client* version that can connect to the cluster. | |
1090 | */ | |
9f95a23c | 1091 | ceph_release_t get_require_min_compat_client() const; |
11fdf7f2 | 1092 | |
7c673cae FG |
1093 | /** |
1094 | * get intersection of features supported by up osds | |
1095 | */ | |
1096 | uint64_t get_up_osd_features() const; | |
1097 | ||
f67539c2 | 1098 | void get_upmap_pgs(std::vector<pg_t> *upmap_pgs) const; |
494da23a TL |
1099 | bool check_pg_upmaps( |
1100 | CephContext *cct, | |
f67539c2 TL |
1101 | const std::vector<pg_t>& to_check, |
1102 | std::vector<pg_t> *to_cancel, | |
1103 | std::map<pg_t, mempool::osdmap::vector<std::pair<int,int>>> *to_remap) const; | |
494da23a TL |
1104 | void clean_pg_upmaps( |
1105 | CephContext *cct, | |
1106 | Incremental *pending_inc, | |
f67539c2 TL |
1107 | const std::vector<pg_t>& to_cancel, |
1108 | const std::map<pg_t, mempool::osdmap::vector<std::pair<int,int>>>& to_remap) const; | |
494da23a | 1109 | bool clean_pg_upmaps(CephContext *cct, Incremental *pending_inc) const; |
94b18763 | 1110 | |
7c673cae FG |
1111 | int apply_incremental(const Incremental &inc); |
1112 | ||
1113 | /// try to re-use/reference addrs in oldmap from newmap | |
1114 | static void dedup(const OSDMap *oldmap, OSDMap *newmap); | |
1115 | ||
11fdf7f2 TL |
1116 | static void clean_temps(CephContext *cct, |
1117 | const OSDMap& oldmap, | |
1118 | const OSDMap& nextmap, | |
7c673cae FG |
1119 | Incremental *pending_inc); |
1120 | ||
1121 | // serialize, unserialize | |
1122 | private: | |
9f95a23c TL |
1123 | void encode_client_old(ceph::buffer::list& bl) const; |
1124 | void encode_classic(ceph::buffer::list& bl, uint64_t features) const; | |
1125 | void decode_classic(ceph::buffer::list::const_iterator& p); | |
7c673cae FG |
1126 | void post_decode(); |
1127 | public: | |
9f95a23c TL |
1128 | void encode(ceph::buffer::list& bl, uint64_t features=CEPH_FEATURES_ALL) const; |
1129 | void decode(ceph::buffer::list& bl); | |
1130 | void decode(ceph::buffer::list::const_iterator& bl); | |
7c673cae FG |
1131 | |
1132 | ||
1133 | /**** mapping facilities ****/ | |
1134 | int map_to_pg( | |
1135 | int64_t pool, | |
9f95a23c TL |
1136 | const std::string& name, |
1137 | const std::string& key, | |
1138 | const std::string& nspace, | |
7c673cae FG |
1139 | pg_t *pg) const; |
1140 | int object_locator_to_pg(const object_t& oid, const object_locator_t& loc, | |
1141 | pg_t &pg) const; | |
1142 | pg_t object_locator_to_pg(const object_t& oid, | |
1143 | const object_locator_t& loc) const { | |
1144 | pg_t pg; | |
1145 | int ret = object_locator_to_pg(oid, loc, pg); | |
11fdf7f2 | 1146 | ceph_assert(ret == 0); |
7c673cae FG |
1147 | return pg; |
1148 | } | |
1149 | ||
1150 | ||
1151 | static object_locator_t file_to_object_locator(const file_layout_t& layout) { | |
1152 | return object_locator_t(layout.pool_id, layout.pool_ns); | |
1153 | } | |
1154 | ||
1155 | ceph_object_layout file_to_object_layout(object_t oid, | |
1156 | file_layout_t& layout) const { | |
1157 | return make_object_layout(oid, layout.pool_id, layout.pool_ns); | |
1158 | } | |
1159 | ||
1160 | ceph_object_layout make_object_layout(object_t oid, int pg_pool, | |
9f95a23c | 1161 | std::string nspace) const; |
7c673cae FG |
1162 | |
1163 | int get_pg_num(int pg_pool) const | |
1164 | { | |
1165 | const pg_pool_t *pool = get_pg_pool(pg_pool); | |
11fdf7f2 | 1166 | ceph_assert(NULL != pool); |
7c673cae FG |
1167 | return pool->get_pg_num(); |
1168 | } | |
1169 | ||
1170 | bool pg_exists(pg_t pgid) const { | |
1171 | const pg_pool_t *p = get_pg_pool(pgid.pool()); | |
1172 | return p && pgid.ps() < p->get_pg_num(); | |
1173 | } | |
1174 | ||
224ce89b WB |
1175 | int get_pg_pool_min_size(pg_t pgid) const { |
1176 | if (!pg_exists(pgid)) { | |
1177 | return -ENOENT; | |
1178 | } | |
1179 | const pg_pool_t *p = get_pg_pool(pgid.pool()); | |
11fdf7f2 | 1180 | ceph_assert(p); |
224ce89b WB |
1181 | return p->get_min_size(); |
1182 | } | |
1183 | ||
1184 | int get_pg_pool_size(pg_t pgid) const { | |
1185 | if (!pg_exists(pgid)) { | |
1186 | return -ENOENT; | |
1187 | } | |
1188 | const pg_pool_t *p = get_pg_pool(pgid.pool()); | |
11fdf7f2 | 1189 | ceph_assert(p); |
224ce89b WB |
1190 | return p->get_size(); |
1191 | } | |
1192 | ||
94b18763 FG |
1193 | int get_pg_pool_crush_rule(pg_t pgid) const { |
1194 | if (!pg_exists(pgid)) { | |
1195 | return -ENOENT; | |
1196 | } | |
1197 | const pg_pool_t *p = get_pg_pool(pgid.pool()); | |
11fdf7f2 | 1198 | ceph_assert(p); |
94b18763 FG |
1199 | return p->get_crush_rule(); |
1200 | } | |
1201 | ||
7c673cae | 1202 | private: |
9f95a23c | 1203 | /// pg -> (raw osd std::list) |
31f18b77 | 1204 | void _pg_to_raw_osds( |
7c673cae | 1205 | const pg_pool_t& pool, pg_t pg, |
9f95a23c | 1206 | std::vector<int> *osds, |
7c673cae | 1207 | ps_t *ppps) const; |
9f95a23c TL |
1208 | int _pick_primary(const std::vector<int>& osds) const; |
1209 | void _remove_nonexistent_osds(const pg_pool_t& pool, std::vector<int>& osds) const; | |
7c673cae FG |
1210 | |
1211 | void _apply_primary_affinity(ps_t seed, const pg_pool_t& pool, | |
9f95a23c | 1212 | std::vector<int> *osds, int *primary) const; |
7c673cae FG |
1213 | |
1214 | /// apply pg_upmap[_items] mappings | |
9f95a23c | 1215 | void _apply_upmap(const pg_pool_t& pi, pg_t pg, std::vector<int> *raw) const; |
7c673cae | 1216 | |
9f95a23c TL |
1217 | /// pg -> (up osd std::list) |
1218 | void _raw_to_up_osds(const pg_pool_t& pool, const std::vector<int>& raw, | |
1219 | std::vector<int> *up) const; | |
7c673cae FG |
1220 | |
1221 | ||
1222 | /** | |
1223 | * Get the pg and primary temp, if they are specified. | |
1224 | * @param temp_pg [out] Will be empty or contain the temp PG mapping on return | |
1225 | * @param temp_primary [out] Will be the value in primary_temp, or a value derived | |
1226 | * from the pg_temp (if specified), or -1 if you should use the calculated (up_)primary. | |
1227 | */ | |
1228 | void _get_temp_osds(const pg_pool_t& pool, pg_t pg, | |
9f95a23c | 1229 | std::vector<int> *temp_pg, int *temp_primary) const; |
7c673cae FG |
1230 | |
1231 | /** | |
1232 | * map to up and acting. Fills in whatever fields are non-NULL. | |
1233 | */ | |
9f95a23c TL |
1234 | void _pg_to_up_acting_osds(const pg_t& pg, std::vector<int> *up, int *up_primary, |
1235 | std::vector<int> *acting, int *acting_primary, | |
7c673cae FG |
1236 | bool raw_pg_to_pg = true) const; |
1237 | ||
1238 | public: | |
1239 | /*** | |
1240 | * This is suitable only for looking at raw CRUSH outputs. It skips | |
1241 | * applying the temp and up checks and should not be used | |
1242 | * by anybody for data mapping purposes. | |
1243 | * raw and primary must be non-NULL | |
1244 | */ | |
9f95a23c TL |
1245 | void pg_to_raw_osds(pg_t pg, std::vector<int> *raw, int *primary) const; |
1246 | void pg_to_raw_upmap(pg_t pg, std::vector<int> *raw, | |
1247 | std::vector<int> *raw_upmap) const; | |
7c673cae | 1248 | /// map a pg to its acting set. @return acting set size |
9f95a23c | 1249 | void pg_to_acting_osds(const pg_t& pg, std::vector<int> *acting, |
7c673cae FG |
1250 | int *acting_primary) const { |
1251 | _pg_to_up_acting_osds(pg, NULL, NULL, acting, acting_primary); | |
7c673cae | 1252 | } |
9f95a23c | 1253 | void pg_to_acting_osds(pg_t pg, std::vector<int>& acting) const { |
7c673cae FG |
1254 | return pg_to_acting_osds(pg, &acting, NULL); |
1255 | } | |
1256 | /** | |
1257 | * This does not apply temp overrides and should not be used | |
1258 | * by anybody for data mapping purposes. Specify both pointers. | |
1259 | */ | |
9f95a23c | 1260 | void pg_to_raw_up(pg_t pg, std::vector<int> *up, int *primary) const; |
7c673cae FG |
1261 | /** |
1262 | * map a pg to its acting set as well as its up set. You must use | |
1263 | * the acting set for data mapping purposes, but some users will | |
1264 | * also find the up set useful for things like deciding what to | |
1265 | * set as pg_temp. | |
1266 | * Each of these pointers must be non-NULL. | |
1267 | */ | |
9f95a23c TL |
1268 | void pg_to_up_acting_osds(pg_t pg, std::vector<int> *up, int *up_primary, |
1269 | std::vector<int> *acting, int *acting_primary) const { | |
7c673cae FG |
1270 | _pg_to_up_acting_osds(pg, up, up_primary, acting, acting_primary); |
1271 | } | |
9f95a23c | 1272 | void pg_to_up_acting_osds(pg_t pg, std::vector<int>& up, std::vector<int>& acting) const { |
7c673cae FG |
1273 | int up_primary, acting_primary; |
1274 | pg_to_up_acting_osds(pg, &up, &up_primary, &acting, &acting_primary); | |
1275 | } | |
1276 | bool pg_is_ec(pg_t pg) const { | |
1277 | auto i = pools.find(pg.pool()); | |
11fdf7f2 TL |
1278 | ceph_assert(i != pools.end()); |
1279 | return i->second.is_erasure(); | |
7c673cae FG |
1280 | } |
1281 | bool get_primary_shard(const pg_t& pgid, spg_t *out) const { | |
1282 | auto i = get_pools().find(pgid.pool()); | |
1283 | if (i == get_pools().end()) { | |
1284 | return false; | |
1285 | } | |
11fdf7f2 | 1286 | if (!i->second.is_erasure()) { |
7c673cae FG |
1287 | *out = spg_t(pgid); |
1288 | return true; | |
1289 | } | |
1290 | int primary; | |
9f95a23c | 1291 | std::vector<int> acting; |
7c673cae FG |
1292 | pg_to_acting_osds(pgid, &acting, &primary); |
1293 | for (uint8_t i = 0; i < acting.size(); ++i) { | |
1294 | if (acting[i] == primary) { | |
1295 | *out = spg_t(pgid, shard_id_t(i)); | |
1296 | return true; | |
1297 | } | |
1298 | } | |
1299 | return false; | |
1300 | } | |
11fdf7f2 TL |
1301 | bool get_primary_shard(const pg_t& pgid, int *primary, spg_t *out) const { |
1302 | auto i = get_pools().find(pgid.pool()); | |
1303 | if (i == get_pools().end()) { | |
1304 | return false; | |
1305 | } | |
9f95a23c | 1306 | std::vector<int> acting; |
11fdf7f2 TL |
1307 | pg_to_acting_osds(pgid, &acting, primary); |
1308 | if (i->second.is_erasure()) { | |
1309 | for (uint8_t i = 0; i < acting.size(); ++i) { | |
1310 | if (acting[i] == *primary) { | |
1311 | *out = spg_t(pgid, shard_id_t(i)); | |
1312 | return true; | |
1313 | } | |
1314 | } | |
1315 | } else { | |
1316 | *out = spg_t(pgid); | |
1317 | return true; | |
1318 | } | |
1319 | return false; | |
1320 | } | |
1321 | ||
9f95a23c TL |
1322 | bool in_removed_snaps_queue(int64_t pool, snapid_t snap) const { |
1323 | auto p = removed_snaps_queue.find(pool); | |
1324 | if (p == removed_snaps_queue.end()) { | |
1325 | return false; | |
1326 | } | |
1327 | return p->second.contains(snap); | |
1328 | } | |
1329 | ||
11fdf7f2 TL |
1330 | const mempool::osdmap::map<int64_t,snap_interval_set_t>& |
1331 | get_removed_snaps_queue() const { | |
1332 | return removed_snaps_queue; | |
1333 | } | |
1334 | const mempool::osdmap::map<int64_t,snap_interval_set_t>& | |
1335 | get_new_removed_snaps() const { | |
1336 | return new_removed_snaps; | |
1337 | } | |
1338 | const mempool::osdmap::map<int64_t,snap_interval_set_t>& | |
1339 | get_new_purged_snaps() const { | |
1340 | return new_purged_snaps; | |
1341 | } | |
7c673cae | 1342 | |
f67539c2 | 1343 | int64_t lookup_pg_pool_name(std::string_view name) const { |
7c673cae FG |
1344 | auto p = name_pool.find(name); |
1345 | if (p == name_pool.end()) | |
1346 | return -ENOENT; | |
1347 | return p->second; | |
1348 | } | |
1349 | ||
1350 | int64_t get_pool_max() const { | |
1351 | return pool_max; | |
1352 | } | |
1353 | const mempool::osdmap::map<int64_t,pg_pool_t>& get_pools() const { | |
1354 | return pools; | |
1355 | } | |
1356 | mempool::osdmap::map<int64_t,pg_pool_t>& get_pools() { | |
1357 | return pools; | |
1358 | } | |
9f95a23c | 1359 | void get_pool_ids_by_rule(int rule_id, std::set<int64_t> *pool_ids) const { |
11fdf7f2 | 1360 | ceph_assert(pool_ids); |
3efd9988 | 1361 | for (auto &p: pools) { |
11fdf7f2 | 1362 | if (p.second.get_crush_rule() == rule_id) { |
3efd9988 FG |
1363 | pool_ids->insert(p.first); |
1364 | } | |
1365 | } | |
1366 | } | |
1367 | void get_pool_ids_by_osd(CephContext *cct, | |
1368 | int osd, | |
9f95a23c TL |
1369 | std::set<int64_t> *pool_ids) const; |
1370 | const std::string& get_pool_name(int64_t p) const { | |
7c673cae | 1371 | auto i = pool_name.find(p); |
11fdf7f2 | 1372 | ceph_assert(i != pool_name.end()); |
7c673cae FG |
1373 | return i->second; |
1374 | } | |
9f95a23c | 1375 | const mempool::osdmap::map<int64_t,std::string>& get_pool_names() const { |
c07f9fc5 FG |
1376 | return pool_name; |
1377 | } | |
7c673cae FG |
1378 | bool have_pg_pool(int64_t p) const { |
1379 | return pools.count(p); | |
1380 | } | |
1381 | const pg_pool_t* get_pg_pool(int64_t p) const { | |
1382 | auto i = pools.find(p); | |
1383 | if (i != pools.end()) | |
1384 | return &i->second; | |
1385 | return NULL; | |
1386 | } | |
1387 | unsigned get_pg_size(pg_t pg) const { | |
1388 | auto p = pools.find(pg.pool()); | |
11fdf7f2 | 1389 | ceph_assert(p != pools.end()); |
7c673cae FG |
1390 | return p->second.get_size(); |
1391 | } | |
1392 | int get_pg_type(pg_t pg) const { | |
1393 | auto p = pools.find(pg.pool()); | |
11fdf7f2 | 1394 | ceph_assert(p != pools.end()); |
7c673cae FG |
1395 | return p->second.get_type(); |
1396 | } | |
9f95a23c TL |
1397 | int get_pool_crush_rule(int64_t pool_id) const { |
1398 | auto pool = get_pg_pool(pool_id); | |
1399 | if (!pool) | |
1400 | return -ENOENT; | |
1401 | return pool->get_crush_rule(); | |
1402 | } | |
7c673cae FG |
1403 | |
1404 | ||
1405 | pg_t raw_pg_to_pg(pg_t pg) const { | |
1406 | auto p = pools.find(pg.pool()); | |
11fdf7f2 | 1407 | ceph_assert(p != pools.end()); |
7c673cae FG |
1408 | return p->second.raw_pg_to_pg(pg); |
1409 | } | |
1410 | ||
1411 | // pg -> acting primary osd | |
1412 | int get_pg_acting_primary(pg_t pg) const { | |
1413 | int primary = -1; | |
1414 | _pg_to_up_acting_osds(pg, nullptr, nullptr, nullptr, &primary); | |
1415 | return primary; | |
1416 | } | |
1417 | ||
1418 | /* | |
1419 | * check whether an spg_t maps to a particular osd | |
1420 | */ | |
1421 | bool is_up_acting_osd_shard(spg_t pg, int osd) const { | |
9f95a23c | 1422 | std::vector<int> up, acting; |
7c673cae | 1423 | _pg_to_up_acting_osds(pg.pgid, &up, NULL, &acting, NULL, false); |
9f95a23c TL |
1424 | if (calc_pg_role(pg_shard_t(osd, pg.shard), acting) >= 0 || |
1425 | calc_pg_role(pg_shard_t(osd, pg.shard), up) >= 0) { | |
1426 | return true; | |
7c673cae FG |
1427 | } |
1428 | return false; | |
1429 | } | |
1430 | ||
1431 | ||
9f95a23c TL |
1432 | static int calc_pg_role_broken(int osd, const std::vector<int>& acting, int nrep=0); |
1433 | static int calc_pg_role(pg_shard_t who, const std::vector<int>& acting); | |
1434 | static bool primary_changed_broken( | |
7c673cae | 1435 | int oldprimary, |
9f95a23c | 1436 | const std::vector<int> &oldacting, |
7c673cae | 1437 | int newprimary, |
9f95a23c | 1438 | const std::vector<int> &newacting); |
7c673cae FG |
1439 | |
1440 | /* rank is -1 (stray), 0 (primary), 1,2,3,... (replica) */ | |
9f95a23c TL |
1441 | int get_pg_acting_role(spg_t pg, int osd) const { |
1442 | std::vector<int> group; | |
1443 | pg_to_acting_osds(pg.pgid, group); | |
1444 | return calc_pg_role(pg_shard_t(osd, pg.shard), group); | |
7c673cae FG |
1445 | } |
1446 | ||
7c673cae FG |
1447 | bool try_pg_upmap( |
1448 | CephContext *cct, | |
1449 | pg_t pg, ///< pg to potentially remap | |
9f95a23c TL |
1450 | const std::set<int>& overfull, ///< osds we'd want to evacuate |
1451 | const std::vector<int>& underfull, ///< osds to move to, in order of preference | |
1452 | const std::vector<int>& more_underfull, ///< less full osds to move to, in order of preference | |
1453 | std::vector<int> *orig, | |
1454 | std::vector<int> *out); ///< resulting alternative mapping | |
7c673cae FG |
1455 | |
1456 | int calc_pg_upmaps( | |
1457 | CephContext *cct, | |
92f5a8d4 | 1458 | uint32_t max_deviation, ///< max deviation from target (value >= 1) |
7c673cae | 1459 | int max_iterations, ///< max iterations to run |
9f95a23c | 1460 | const std::set<int64_t>& pools, ///< [optional] restrict to pool |
20effc67 TL |
1461 | Incremental *pending_inc, |
1462 | std::random_device::result_type *p_seed = nullptr ///< [optional] for regression tests | |
7c673cae FG |
1463 | ); |
1464 | ||
20effc67 TL |
1465 | private: // Bunch of internal functions used only by calc_pg_upmaps (result of code refactoring) |
1466 | float build_pool_pgs_info ( | |
1467 | CephContext *cct, | |
1468 | const std::set<int64_t>& pools, ///< [optional] restrict to pool | |
1469 | const OSDMap& tmp_osd_map, | |
1470 | int& total_pgs, | |
1471 | std::map<int, std::set<pg_t>>& pgs_by_osd, | |
1472 | std::map<int,float>& osd_weight | |
1473 | ); // return total weight of all OSDs | |
1474 | ||
1475 | float calc_deviations ( | |
1476 | CephContext *cct, | |
1477 | const std::map<int,std::set<pg_t>>& pgs_by_osd, | |
1478 | const std::map<int,float>& osd_weight, | |
1479 | float pgs_per_weight, | |
1480 | std::map<int,float>& osd_deviation, | |
1481 | std::multimap<float,int>& deviation_osd, | |
1482 | float& stddev | |
1483 | ); // return current max deviation | |
1484 | ||
1485 | void fill_overfull_underfull ( | |
1486 | CephContext *cct, | |
1487 | const std::multimap<float,int>& deviation_osd, | |
1488 | int max_deviation, | |
1489 | std::set<int>& overfull, | |
1490 | std::set<int>& more_overfull, | |
1491 | std::vector<int>& underfull, | |
1492 | std::vector<int>& more_underfull | |
1493 | ); | |
1494 | ||
1495 | int pack_upmap_results( | |
1496 | CephContext *cct, | |
1497 | const std::set<pg_t>& to_unmap, | |
1498 | const std::map<pg_t, mempool::osdmap::vector<std::pair<int, int>>>& to_upmap, | |
1499 | OSDMap& tmp_osd_map, | |
1500 | OSDMap::Incremental *pending_inc | |
1501 | ); | |
1502 | ||
1503 | std::default_random_engine get_random_engine( | |
1504 | CephContext *cct, | |
1505 | std::random_device::result_type *p_seed | |
1506 | ); | |
1507 | ||
1508 | bool try_drop_remap_overfull( | |
1509 | CephContext *cct, | |
1510 | const std::vector<pg_t>& pgs, | |
1511 | const OSDMap& tmp_osd_map, | |
1512 | int osd, | |
1513 | std::map<int,std::set<pg_t>>& temp_pgs_by_osd, | |
1514 | std::set<pg_t>& to_unmap, | |
1515 | std::map<pg_t, mempool::osdmap::vector<std::pair<int32_t,int32_t>>>& to_upmap | |
1516 | ); | |
1517 | ||
1518 | typedef std::vector<std::pair<pg_t, mempool::osdmap::vector<std::pair<int, int>>>> | |
1519 | candidates_t; | |
1520 | ||
1521 | bool try_drop_remap_underfull( | |
1522 | CephContext *cct, | |
1523 | const candidates_t& candidates, | |
1524 | int osd, | |
1525 | std::map<int,std::set<pg_t>>& temp_pgs_by_osd, | |
1526 | std::set<pg_t>& to_unmap, | |
1527 | std::map<pg_t, mempool::osdmap::vector<std::pair<int32_t,int32_t>>>& to_upmap | |
1528 | ); | |
1529 | ||
1530 | void add_remap_pair( | |
1531 | CephContext *cct, | |
1532 | int orig, | |
1533 | int out, | |
1534 | pg_t pg, | |
1535 | size_t pg_pool_size, | |
1536 | int osd, | |
1537 | std::set<int>& existing, | |
1538 | std::map<int,std::set<pg_t>>& temp_pgs_by_osd, | |
1539 | mempool::osdmap::vector<std::pair<int32_t,int32_t>> new_upmap_items, | |
1540 | std::map<pg_t, mempool::osdmap::vector<std::pair<int32_t,int32_t>>>& to_upmap | |
1541 | ); | |
1542 | ||
1543 | int find_best_remap ( | |
1544 | CephContext *cct, | |
1545 | const std::vector<int>& orig, | |
1546 | const std::vector<int>& out, | |
1547 | const std::set<int>& existing, | |
1548 | const std::map<int,float> osd_deviation | |
1549 | ); | |
1550 | ||
1551 | candidates_t build_candidates( | |
1552 | CephContext *cct, | |
1553 | const OSDMap& tmp_osd_map, | |
1554 | const std::set<pg_t> to_skip, | |
1555 | const std::set<int64_t>& only_pools, | |
1556 | bool aggressive, | |
1557 | std::random_device::result_type *p_seed | |
1558 | ); | |
1559 | ||
1560 | public: | |
9f95a23c | 1561 | int get_osds_by_bucket_name(const std::string &name, std::set<int> *osds) const; |
31f18b77 | 1562 | |
f64942e4 AA |
1563 | bool have_pg_upmaps(pg_t pg) const { |
1564 | return pg_upmap.count(pg) || | |
1565 | pg_upmap_items.count(pg); | |
1566 | } | |
1567 | ||
f67539c2 | 1568 | bool check_full(const std::set<pg_shard_t> &missing_on) const { |
9f95a23c TL |
1569 | for (auto shard : missing_on) { |
1570 | if (get_state(shard.osd) & CEPH_OSD_FULL) | |
1571 | return true; | |
1572 | } | |
1573 | return false; | |
1574 | } | |
1575 | ||
7c673cae FG |
1576 | /* |
1577 | * handy helpers to build simple maps... | |
1578 | */ | |
1579 | /** | |
1580 | * Build an OSD map suitable for basic usage. If **num_osd** is >= 0 | |
1581 | * it will be initialized with the specified number of OSDs in a | |
1582 | * single host. If **num_osd** is < 0 the layout of the OSD map will | |
1583 | * be built by reading the content of the configuration file. | |
1584 | * | |
1585 | * @param cct [in] in core ceph context | |
1586 | * @param e [in] initial epoch | |
1587 | * @param fsid [in] id of the cluster | |
1588 | * @param num_osd [in] number of OSDs if >= 0 or read from conf if < 0 | |
1589 | * @return **0** on success, negative errno on error. | |
1590 | */ | |
224ce89b WB |
1591 | private: |
1592 | int build_simple_optioned(CephContext *cct, epoch_t e, uuid_d &fsid, | |
1593 | int num_osd, int pg_bits, int pgp_bits, | |
1594 | bool default_pool); | |
1595 | public: | |
7c673cae | 1596 | int build_simple(CephContext *cct, epoch_t e, uuid_d &fsid, |
224ce89b WB |
1597 | int num_osd) { |
1598 | return build_simple_optioned(cct, e, fsid, num_osd, 0, 0, false); | |
1599 | } | |
1600 | int build_simple_with_pool(CephContext *cct, epoch_t e, uuid_d &fsid, | |
1601 | int num_osd, int pg_bits, int pgp_bits) { | |
1602 | return build_simple_optioned(cct, e, fsid, num_osd, | |
1603 | pg_bits, pgp_bits, true); | |
1604 | } | |
7c673cae FG |
1605 | static int _build_crush_types(CrushWrapper& crush); |
1606 | static int build_simple_crush_map(CephContext *cct, CrushWrapper& crush, | |
9f95a23c | 1607 | int num_osd, std::ostream *ss); |
7c673cae FG |
1608 | static int build_simple_crush_map_from_conf(CephContext *cct, |
1609 | CrushWrapper& crush, | |
9f95a23c | 1610 | std::ostream *ss); |
31f18b77 FG |
1611 | static int build_simple_crush_rules( |
1612 | CephContext *cct, CrushWrapper& crush, | |
9f95a23c TL |
1613 | const std::string& root, |
1614 | std::ostream *ss); | |
7c673cae | 1615 | |
3efd9988 FG |
1616 | bool crush_rule_in_use(int rule_id) const; |
1617 | ||
9f95a23c | 1618 | int validate_crush_rules(CrushWrapper *crush, std::ostream *ss) const; |
7c673cae FG |
1619 | |
1620 | void clear_temp() { | |
1621 | pg_temp->clear(); | |
1622 | primary_temp->clear(); | |
1623 | } | |
1624 | ||
1625 | private: | |
9f95a23c | 1626 | void print_osd_line(int cur, std::ostream *out, ceph::Formatter *f) const; |
7c673cae | 1627 | public: |
9f95a23c TL |
1628 | void print(std::ostream& out) const; |
1629 | void print_osd(int id, std::ostream& out) const; | |
1630 | void print_osds(std::ostream& out) const; | |
1631 | void print_pools(std::ostream& out) const; | |
1632 | void print_summary(ceph::Formatter *f, std::ostream& out, | |
1633 | const std::string& prefix, bool extra=false) const; | |
1634 | void print_oneline_summary(std::ostream& out) const; | |
31f18b77 FG |
1635 | |
1636 | enum { | |
c07f9fc5 FG |
1637 | DUMP_IN = 1, // only 'in' osds |
1638 | DUMP_OUT = 2, // only 'out' osds | |
1639 | DUMP_UP = 4, // only 'up' osds | |
1640 | DUMP_DOWN = 8, // only 'down' osds | |
1641 | DUMP_DESTROYED = 16, // only 'destroyed' osds | |
31f18b77 | 1642 | }; |
9f95a23c TL |
1643 | void print_tree(ceph::Formatter *f, std::ostream *out, |
1644 | unsigned dump_flags=0, std::string bucket="") const; | |
7c673cae FG |
1645 | |
1646 | int summarize_mapping_stats( | |
1647 | OSDMap *newmap, | |
9f95a23c | 1648 | const std::set<int64_t> *pools, |
7c673cae | 1649 | std::string *out, |
9f95a23c | 1650 | ceph::Formatter *f) const; |
7c673cae | 1651 | |
9f95a23c TL |
1652 | std::string get_flag_string() const; |
1653 | static std::string get_flag_string(unsigned flags); | |
7c673cae | 1654 | static void dump_erasure_code_profiles( |
9f95a23c TL |
1655 | const mempool::osdmap::map<std::string,std::map<std::string,std::string> > &profiles, |
1656 | ceph::Formatter *f); | |
1657 | void dump(ceph::Formatter *f) const; | |
1658 | void dump_osd(int id, ceph::Formatter *f) const; | |
1659 | void dump_osds(ceph::Formatter *f) const; | |
1660 | static void generate_test_instances(std::list<OSDMap*>& o); | |
f67539c2 | 1661 | bool check_new_blocklist_entries() const { return new_blocklist_entries; } |
224ce89b | 1662 | |
92f5a8d4 | 1663 | void check_health(CephContext *cct, health_check_map_t *checks) const; |
35e4c445 | 1664 | |
9f95a23c TL |
1665 | int parse_osd_id_list(const std::vector<std::string>& ls, |
1666 | std::set<int> *out, | |
1667 | std::ostream *ss) const; | |
11fdf7f2 TL |
1668 | |
1669 | float pool_raw_used_rate(int64_t poolid) const; | |
20effc67 | 1670 | std::optional<std::string> pending_require_osd_release() const; |
11fdf7f2 | 1671 | |
7c673cae FG |
1672 | }; |
1673 | WRITE_CLASS_ENCODER_FEATURES(OSDMap) | |
1674 | WRITE_CLASS_ENCODER_FEATURES(OSDMap::Incremental) | |
1675 | ||
9f95a23c TL |
1676 | #ifdef WITH_SEASTAR |
1677 | using OSDMapRef = boost::local_shared_ptr<const OSDMap>; | |
1678 | #else | |
1679 | using OSDMapRef = std::shared_ptr<const OSDMap>; | |
1680 | #endif | |
1681 | ||
7c673cae | 1682 | |
9f95a23c | 1683 | inline std::ostream& operator<<(std::ostream& out, const OSDMap& m) { |
7c673cae FG |
1684 | m.print_oneline_summary(out); |
1685 | return out; | |
1686 | } | |
1687 | ||
11fdf7f2 | 1688 | class PGMap; |
31f18b77 FG |
1689 | |
1690 | void print_osd_utilization(const OSDMap& osdmap, | |
11fdf7f2 | 1691 | const PGMap& pgmap, |
9f95a23c TL |
1692 | std::ostream& out, |
1693 | ceph::Formatter *f, | |
11fdf7f2 | 1694 | bool tree, |
9f95a23c | 1695 | const std::string& filter); |
7c673cae FG |
1696 | |
1697 | #endif |