1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 #include "gtest/gtest.h"
3 #include "osd/OSDMap.h"
4 #include "osd/OSDMapMapping.h"
5 #include "mon/OSDMonitor.h"
8 #include "global/global_context.h"
9 #include "global/global_init.h"
10 #include "common/common_init.h"
11 #include "common/ceph_argparse.h"
12 #include "common/ceph_json.h"
19 int main(int argc
, char **argv
) {
20 map
<string
,string
> defaults
= {
21 // make sure we have 3 copies, or some tests won't work
22 { "osd_pool_default_size", "3" },
23 // our map is flat, so just try and split across OSDs, not hosts or whatever
24 { "osd_crush_chooseleaf_type", "0" },
26 std::vector
<const char*> args(argv
, argv
+argc
);
27 auto cct
= global_init(&defaults
, args
, CEPH_ENTITY_TYPE_CLIENT
,
28 CODE_ENVIRONMENT_UTILITY
,
29 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE
);
30 common_init_finish(g_ceph_context
);
31 ::testing::InitGoogleTest(&argc
, argv
);
32 return RUN_ALL_TESTS();
35 class OSDMapTest
: public testing::Test
,
36 public ::testing::WithParamInterface
<std::pair
<int, int>> {
40 OSDMapMapping mapping
;
41 const uint64_t my_ec_pool
= 1;
42 const uint64_t my_rep_pool
= 2;
44 // Blacklist testing lists
45 // I pulled the first two ranges and their start/end points from
46 // https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing#CIDR_notation
47 static const string range_addrs
[];
48 static const string ip_addrs
[];
49 static const string unblocked_ip_addrs
[];
50 const string EC_RULE_NAME
= "erasure";
54 void set_up_map(int new_num_osds
= 6, bool no_default_pools
= false) {
55 num_osds
= new_num_osds
;
57 osdmap
.build_simple(g_ceph_context
, 0, fsid
, num_osds
);
58 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
59 pending_inc
.fsid
= osdmap
.get_fsid();
60 entity_addrvec_t sample_addrs
;
61 sample_addrs
.v
.push_back(entity_addr_t());
63 for (int i
= 0; i
< num_osds
; ++i
) {
64 sample_uuid
.generate_random();
65 sample_addrs
.v
[0].nonce
= i
;
66 pending_inc
.new_state
[i
] = CEPH_OSD_EXISTS
| CEPH_OSD_NEW
;
67 pending_inc
.new_up_client
[i
] = sample_addrs
;
68 pending_inc
.new_up_cluster
[i
] = sample_addrs
;
69 pending_inc
.new_hb_back_up
[i
] = sample_addrs
;
70 pending_inc
.new_hb_front_up
[i
] = sample_addrs
;
71 pending_inc
.new_weight
[i
] = CEPH_OSD_IN
;
72 pending_inc
.new_uuid
[i
] = sample_uuid
;
74 osdmap
.apply_incremental(pending_inc
);
75 if (no_default_pools
) // do not create any default pool(s)
78 OSDMap::Incremental
new_pool_inc(osdmap
.get_epoch() + 1);
79 new_pool_inc
.new_pool_max
= osdmap
.get_pool_max();
80 new_pool_inc
.fsid
= osdmap
.get_fsid();
82 set_ec_pool("ec", new_pool_inc
);
83 // and a replicated pool
84 set_rep_pool("reppool",new_pool_inc
);
85 osdmap
.apply_incremental(new_pool_inc
);
87 int get_ec_crush_rule() {
88 int r
= osdmap
.crush
->get_rule_id(EC_RULE_NAME
);
90 r
= osdmap
.crush
->add_simple_rule(
91 EC_RULE_NAME
, "default", "osd", "",
92 "indep", pg_pool_t::TYPE_ERASURE
,
97 uint64_t set_ec_pool(const string
&name
, OSDMap::Incremental
&new_pool_inc
,
98 bool assert_pool_id
= true) {
100 uint64_t pool_id
= ++new_pool_inc
.new_pool_max
;
102 ceph_assert(pool_id
== my_ec_pool
);
103 pg_pool_t
*p
= new_pool_inc
.get_new_pool(pool_id
, &empty
);
107 p
->type
= pg_pool_t::TYPE_ERASURE
;
108 p
->crush_rule
= get_ec_crush_rule();
109 new_pool_inc
.new_pool_names
[pool_id
] = name
;//"ec";
112 uint64_t set_rep_pool(const string name
, OSDMap::Incremental
&new_pool_inc
,
113 bool assert_pool_id
= true) {
115 uint64_t pool_id
= ++new_pool_inc
.new_pool_max
;
117 ceph_assert(pool_id
== my_rep_pool
);
118 pg_pool_t
*p
= new_pool_inc
.get_new_pool(pool_id
, &empty
);
122 p
->type
= pg_pool_t::TYPE_REPLICATED
;
124 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
125 new_pool_inc
.new_pool_names
[pool_id
] = name
;//"reppool";
129 unsigned int get_num_osds() { return num_osds
; }
130 void get_crush(const OSDMap
& tmap
, CrushWrapper
& newcrush
) {
132 tmap
.crush
->encode(bl
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
133 auto p
= bl
.cbegin();
136 int crush_move(OSDMap
& tmap
, const string
&name
, const vector
<string
> &argvec
) {
137 map
<string
,string
> loc
;
138 CrushWrapper::parse_loc_map(argvec
, &loc
);
139 CrushWrapper newcrush
;
140 get_crush(tmap
, newcrush
);
141 if (!newcrush
.name_exists(name
)) {
144 int id
= newcrush
.get_item_id(name
);
146 if (!newcrush
.check_item_loc(g_ceph_context
, id
, loc
, (int *)NULL
)) {
148 err
= newcrush
.create_or_move_item(g_ceph_context
, id
, 0, name
, loc
);
150 err
= newcrush
.move_bucket(g_ceph_context
, id
, loc
);
153 OSDMap::Incremental
pending_inc(tmap
.get_epoch() + 1);
154 pending_inc
.crush
.clear();
155 newcrush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
156 tmap
.apply_incremental(pending_inc
);
165 int crush_rule_create_replicated(const string
&name
,
167 const string
&type
) {
168 if (osdmap
.crush
->rule_exists(name
)) {
169 return osdmap
.crush
->get_rule_id(name
);
171 CrushWrapper newcrush
;
172 get_crush(osdmap
, newcrush
);
175 int ruleno
= newcrush
.add_simple_rule(
176 name
, root
, type
, device_class
,
177 "firstn", pg_pool_t::TYPE_REPLICATED
, &ss
);
179 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
180 pending_inc
.crush
.clear();
181 newcrush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
182 osdmap
.apply_incremental(pending_inc
);
186 void test_mappings(int pool
,
190 vector
<int> *primary
) {
191 mapping
.update(osdmap
);
192 for (int i
=0; i
<num
; ++i
) {
193 vector
<int> up
, acting
;
194 int up_primary
, acting_primary
;
196 osdmap
.pg_to_up_acting_osds(pgid
,
197 &up
, &up_primary
, &acting
, &acting_primary
);
198 for (unsigned j
=0; j
<acting
.size(); ++j
)
201 (*first
)[acting
[0]]++;
202 if (acting_primary
>= 0)
203 (*primary
)[acting_primary
]++;
205 // compare to precalc mapping
206 vector
<int> up2
, acting2
;
207 int up_primary2
, acting_primary2
;
208 pgid
= osdmap
.raw_pg_to_pg(pgid
);
209 mapping
.get(pgid
, &up2
, &up_primary2
, &acting2
, &acting_primary2
);
211 ASSERT_EQ(up_primary
, up_primary2
);
212 ASSERT_EQ(acting
, acting2
);
213 ASSERT_EQ(acting_primary
, acting_primary2
);
215 cout
<< "any: " << *any
<< std::endl
;;
216 cout
<< "first: " << *first
<< std::endl
;;
217 cout
<< "primary: " << *primary
<< std::endl
;;
219 void clean_pg_upmaps(CephContext
*cct
,
221 OSDMap::Incremental
& pending_inc
) {
223 int pgs_per_chunk
= 256;
224 ThreadPool
tp(cct
, "BUG_40104::clean_upmap_tp", "clean_upmap_tp", cpu_num
);
226 ParallelPGMapper
mapper(cct
, &tp
);
227 vector
<pg_t
> pgs_to_check
;
228 om
.get_upmap_pgs(&pgs_to_check
);
229 OSDMonitor::CleanUpmapJob
job(cct
, om
, pending_inc
);
230 mapper
.queue(&job
, pgs_per_chunk
, pgs_to_check
);
234 void set_primary_affinity_all(float pa
) {
235 for (uint i
= 0 ; i
< get_num_osds() ; i
++) {
236 osdmap
.set_primary_affinity(i
, int(pa
* CEPH_OSD_MAX_PRIMARY_AFFINITY
));
239 bool score_in_range(float score
, uint nosds
= 0) {
241 nosds
= get_num_osds();
243 return score
>= 1.0 && score
<= float(nosds
);
247 TEST_F(OSDMapTest
, Create
) {
249 ASSERT_EQ(get_num_osds(), (unsigned)osdmap
.get_max_osd());
250 ASSERT_EQ(get_num_osds(), osdmap
.get_num_in_osds());
253 TEST_F(OSDMapTest
, Features
) {
256 uint64_t features
= osdmap
.get_features(CEPH_ENTITY_TYPE_OSD
, NULL
);
257 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES
);
258 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES2
);
259 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES3
);
260 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_V2
);
261 ASSERT_TRUE(features
& CEPH_FEATURE_OSDHASHPSPOOL
);
262 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_PRIMARY_AFFINITY
);
264 // clients have a slightly different view
265 features
= osdmap
.get_features(CEPH_ENTITY_TYPE_CLIENT
, NULL
);
266 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES
);
267 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES2
);
268 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES3
);
269 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_V2
);
270 ASSERT_TRUE(features
& CEPH_FEATURE_OSDHASHPSPOOL
);
271 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_PRIMARY_AFFINITY
);
273 // remove teh EC pool, but leave the rule. add primary affinity.
275 OSDMap::Incremental
new_pool_inc(osdmap
.get_epoch() + 1);
276 new_pool_inc
.old_pools
.insert(osdmap
.lookup_pg_pool_name("ec"));
277 new_pool_inc
.new_primary_affinity
[0] = 0x8000;
278 osdmap
.apply_incremental(new_pool_inc
);
281 features
= osdmap
.get_features(CEPH_ENTITY_TYPE_MON
, NULL
);
282 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES
);
283 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES2
);
284 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES3
); // shared bit with primary affinity
285 ASSERT_FALSE(features
& CEPH_FEATURE_CRUSH_V2
);
286 ASSERT_TRUE(features
& CEPH_FEATURE_OSDHASHPSPOOL
);
287 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_PRIMARY_AFFINITY
);
289 // FIXME: test tiering feature bits
292 TEST_F(OSDMapTest
, MapPG
) {
295 std::cerr
<< " osdmap.pool_max==" << osdmap
.get_pool_max() << std::endl
;
296 pg_t
rawpg(0, my_rep_pool
);
297 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
298 vector
<int> up_osds
, acting_osds
;
299 int up_primary
, acting_primary
;
301 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
302 &acting_osds
, &acting_primary
);
304 vector
<int> old_up_osds
, old_acting_osds
;
305 osdmap
.pg_to_up_acting_osds(pgid
, old_up_osds
, old_acting_osds
);
306 ASSERT_EQ(old_up_osds
, up_osds
);
307 ASSERT_EQ(old_acting_osds
, acting_osds
);
309 ASSERT_EQ(osdmap
.get_pg_pool(my_rep_pool
)->get_size(), up_osds
.size());
312 TEST_F(OSDMapTest
, MapFunctionsMatch
) {
313 // TODO: make sure pg_to_up_acting_osds and pg_to_acting_osds match
315 pg_t
rawpg(0, my_rep_pool
);
316 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
317 vector
<int> up_osds
, acting_osds
;
318 int up_primary
, acting_primary
;
320 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
321 &acting_osds
, &acting_primary
);
323 vector
<int> up_osds_two
, acting_osds_two
;
325 osdmap
.pg_to_up_acting_osds(pgid
, up_osds_two
, acting_osds_two
);
327 ASSERT_EQ(up_osds
, up_osds_two
);
328 ASSERT_EQ(acting_osds
, acting_osds_two
);
330 int acting_primary_two
;
331 osdmap
.pg_to_acting_osds(pgid
, &acting_osds_two
, &acting_primary_two
);
332 EXPECT_EQ(acting_osds
, acting_osds_two
);
333 EXPECT_EQ(acting_primary
, acting_primary_two
);
334 osdmap
.pg_to_acting_osds(pgid
, acting_osds_two
);
335 EXPECT_EQ(acting_osds
, acting_osds_two
);
338 /** This test must be removed or modified appropriately when we allow
339 * other ways to specify a primary. */
340 TEST_F(OSDMapTest
, PrimaryIsFirst
) {
343 pg_t
rawpg(0, my_rep_pool
);
344 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
345 vector
<int> up_osds
, acting_osds
;
346 int up_primary
, acting_primary
;
348 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
349 &acting_osds
, &acting_primary
);
350 EXPECT_EQ(up_osds
[0], up_primary
);
351 EXPECT_EQ(acting_osds
[0], acting_primary
);
354 TEST_F(OSDMapTest
, PGTempRespected
) {
357 pg_t
rawpg(0, my_rep_pool
);
358 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
359 vector
<int> up_osds
, acting_osds
;
360 int up_primary
, acting_primary
;
362 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
363 &acting_osds
, &acting_primary
);
365 // copy and swap first and last element in acting_osds
366 vector
<int> new_acting_osds(acting_osds
);
367 int first
= new_acting_osds
[0];
368 new_acting_osds
[0] = *new_acting_osds
.rbegin();
369 *new_acting_osds
.rbegin() = first
;
371 // apply pg_temp to osdmap
372 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
373 pgtemp_map
.new_pg_temp
[pgid
] = mempool::osdmap::vector
<int>(
374 new_acting_osds
.begin(), new_acting_osds
.end());
375 osdmap
.apply_incremental(pgtemp_map
);
377 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
378 &acting_osds
, &acting_primary
);
379 EXPECT_EQ(new_acting_osds
, acting_osds
);
382 TEST_F(OSDMapTest
, PrimaryTempRespected
) {
385 pg_t
rawpg(0, my_rep_pool
);
386 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
388 vector
<int> acting_osds
;
389 int up_primary
, acting_primary
;
391 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
392 &acting_osds
, &acting_primary
);
394 // make second OSD primary via incremental
395 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
396 pgtemp_map
.new_primary_temp
[pgid
] = acting_osds
[1];
397 osdmap
.apply_incremental(pgtemp_map
);
399 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
400 &acting_osds
, &acting_primary
);
401 EXPECT_EQ(acting_primary
, acting_osds
[1]);
404 TEST_F(OSDMapTest
, CleanTemps
) {
407 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
408 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 2);
409 pg_t pga
= osdmap
.raw_pg_to_pg(pg_t(0, my_rep_pool
));
411 vector
<int> up_osds
, acting_osds
;
412 int up_primary
, acting_primary
;
413 osdmap
.pg_to_up_acting_osds(pga
, &up_osds
, &up_primary
,
414 &acting_osds
, &acting_primary
);
415 pgtemp_map
.new_pg_temp
[pga
] = mempool::osdmap::vector
<int>(
416 up_osds
.begin(), up_osds
.end());
417 pgtemp_map
.new_primary_temp
[pga
] = up_primary
;
419 pg_t pgb
= osdmap
.raw_pg_to_pg(pg_t(1, my_rep_pool
));
421 vector
<int> up_osds
, acting_osds
;
422 int up_primary
, acting_primary
;
423 osdmap
.pg_to_up_acting_osds(pgb
, &up_osds
, &up_primary
,
424 &acting_osds
, &acting_primary
);
425 pending_inc
.new_pg_temp
[pgb
] = mempool::osdmap::vector
<int>(
426 up_osds
.begin(), up_osds
.end());
427 pending_inc
.new_primary_temp
[pgb
] = up_primary
;
430 osdmap
.apply_incremental(pgtemp_map
);
433 tmpmap
.deepish_copy_from(osdmap
);
434 tmpmap
.apply_incremental(pending_inc
);
435 OSDMap::clean_temps(g_ceph_context
, osdmap
, tmpmap
, &pending_inc
);
437 EXPECT_TRUE(pending_inc
.new_pg_temp
.count(pga
) &&
438 pending_inc
.new_pg_temp
[pga
].size() == 0);
439 EXPECT_EQ(-1, pending_inc
.new_primary_temp
[pga
]);
441 EXPECT_TRUE(!pending_inc
.new_pg_temp
.count(pgb
) &&
442 !pending_inc
.new_primary_temp
.count(pgb
));
445 TEST_F(OSDMapTest
, KeepsNecessaryTemps
) {
448 pg_t
rawpg(0, my_rep_pool
);
449 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
450 vector
<int> up_osds
, acting_osds
;
451 int up_primary
, acting_primary
;
453 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
454 &acting_osds
, &acting_primary
);
456 // find unused OSD and stick it in there
457 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
458 // find an unused osd and put it in place of the first one
460 for(; i
!= (int)get_num_osds(); ++i
) {
462 for (vector
<int>::iterator osd_it
= up_osds
.begin();
463 osd_it
!= up_osds
.end();
475 if (i
== (int)get_num_osds())
476 FAIL() << "did not find unused OSD for temp mapping";
478 pgtemp_map
.new_pg_temp
[pgid
] = mempool::osdmap::vector
<int>(
479 up_osds
.begin(), up_osds
.end());
480 pgtemp_map
.new_primary_temp
[pgid
] = up_osds
[1];
481 osdmap
.apply_incremental(pgtemp_map
);
483 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
486 tmpmap
.deepish_copy_from(osdmap
);
487 tmpmap
.apply_incremental(pending_inc
);
488 OSDMap::clean_temps(g_ceph_context
, osdmap
, tmpmap
, &pending_inc
);
489 EXPECT_FALSE(pending_inc
.new_pg_temp
.count(pgid
));
490 EXPECT_FALSE(pending_inc
.new_primary_temp
.count(pgid
));
493 TEST_F(OSDMapTest
, PrimaryAffinity
) {
496 int n
= get_num_osds();
497 for (map
<int64_t,pg_pool_t
>::const_iterator p
= osdmap
.get_pools().begin();
498 p
!= osdmap
.get_pools().end();
501 int expect_primary
= 10000 / n
;
502 cout
<< "pool " << pool
<< " size " << (int)p
->second
.size
503 << " expect_primary " << expect_primary
<< std::endl
;
505 vector
<int> any(n
, 0);
506 vector
<int> first(n
, 0);
507 vector
<int> primary(n
, 0);
508 test_mappings(pool
, 10000, &any
, &first
, &primary
);
509 for (int i
=0; i
<n
; ++i
) {
510 ASSERT_LT(0, any
[i
]);
511 ASSERT_LT(0, first
[i
]);
512 ASSERT_LT(0, primary
[i
]);
516 osdmap
.set_primary_affinity(0, 0);
517 osdmap
.set_primary_affinity(1, 0);
519 vector
<int> any(n
, 0);
520 vector
<int> first(n
, 0);
521 vector
<int> primary(n
, 0);
522 test_mappings(pool
, 10000, &any
, &first
, &primary
);
523 for (int i
=0; i
<n
; ++i
) {
524 ASSERT_LT(0, any
[i
]);
526 ASSERT_LT(0, first
[i
]);
527 ASSERT_LT(0, primary
[i
]);
529 if (p
->second
.is_replicated()) {
530 ASSERT_EQ(0, first
[i
]);
532 ASSERT_EQ(0, primary
[i
]);
537 osdmap
.set_primary_affinity(0, 0x8000);
538 osdmap
.set_primary_affinity(1, 0);
540 vector
<int> any(n
, 0);
541 vector
<int> first(n
, 0);
542 vector
<int> primary(n
, 0);
543 test_mappings(pool
, 10000, &any
, &first
, &primary
);
544 int expect
= (10000 / (n
-2)) / 2; // half weight
545 cout
<< "expect " << expect
<< std::endl
;
546 for (int i
=0; i
<n
; ++i
) {
547 ASSERT_LT(0, any
[i
]);
549 ASSERT_LT(0, first
[i
]);
550 ASSERT_LT(0, primary
[i
]);
552 if (p
->second
.is_replicated()) {
553 ASSERT_EQ(0, first
[i
]);
555 ASSERT_EQ(0, primary
[i
]);
557 ASSERT_LT(expect
*2/3, primary
[0]);
558 ASSERT_GT(expect
*4/3, primary
[0]);
563 osdmap
.set_primary_affinity(0, 0x10000);
564 osdmap
.set_primary_affinity(1, 0x10000);
568 TEST_F(OSDMapTest
, get_osd_crush_node_flags
) {
571 for (unsigned i
=0; i
<get_num_osds(); ++i
) {
572 ASSERT_EQ(0u, osdmap
.get_osd_crush_node_flags(i
));
575 OSDMap::Incremental
inc(osdmap
.get_epoch() + 1);
576 inc
.new_crush_node_flags
[-1] = 123u;
577 osdmap
.apply_incremental(inc
);
578 for (unsigned i
=0; i
<get_num_osds(); ++i
) {
579 ASSERT_EQ(123u, osdmap
.get_osd_crush_node_flags(i
));
581 ASSERT_EQ(0u, osdmap
.get_osd_crush_node_flags(1000));
583 OSDMap::Incremental
inc3(osdmap
.get_epoch() + 1);
584 inc3
.new_crush_node_flags
[-1] = 456u;
585 osdmap
.apply_incremental(inc3
);
586 for (unsigned i
=0; i
<get_num_osds(); ++i
) {
587 ASSERT_EQ(456u, osdmap
.get_osd_crush_node_flags(i
));
589 ASSERT_EQ(0u, osdmap
.get_osd_crush_node_flags(1000));
591 OSDMap::Incremental
inc2(osdmap
.get_epoch() + 1);
592 inc2
.new_crush_node_flags
[-1] = 0;
593 osdmap
.apply_incremental(inc2
);
594 for (unsigned i
=0; i
<get_num_osds(); ++i
) {
595 ASSERT_EQ(0u, osdmap
.get_crush_node_flags(i
));
599 TEST_F(OSDMapTest
, parse_osd_id_list
) {
603 osdmap
.get_all_osds(all
);
605 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"osd.0"}, &out
, &cout
));
606 ASSERT_EQ(1u, out
.size());
607 ASSERT_EQ(0, *out
.begin());
609 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"1"}, &out
, &cout
));
610 ASSERT_EQ(1u, out
.size());
611 ASSERT_EQ(1, *out
.begin());
613 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"osd.0","osd.1"}, &out
, &cout
));
614 ASSERT_EQ(2u, out
.size());
615 ASSERT_EQ(0, *out
.begin());
616 ASSERT_EQ(1, *out
.rbegin());
618 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"osd.0","1"}, &out
, &cout
));
619 ASSERT_EQ(2u, out
.size());
620 ASSERT_EQ(0, *out
.begin());
621 ASSERT_EQ(1, *out
.rbegin());
623 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"*"}, &out
, &cout
));
624 ASSERT_EQ(all
.size(), out
.size());
627 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"all"}, &out
, &cout
));
630 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"any"}, &out
, &cout
));
633 ASSERT_EQ(-EINVAL
, osdmap
.parse_osd_id_list({"foo"}, &out
, &cout
));
634 ASSERT_EQ(-EINVAL
, osdmap
.parse_osd_id_list({"-12"}, &out
, &cout
));
637 TEST_F(OSDMapTest
, CleanPGUpmaps
) {
640 // build a crush rule of type host
641 const int expected_host_num
= 3;
642 int osd_per_host
= get_num_osds() / expected_host_num
;
643 ASSERT_GE(2, osd_per_host
);
645 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
646 if (i
&& i
% osd_per_host
== 0) {
649 stringstream osd_name
;
650 stringstream host_name
;
651 vector
<string
> move_to
;
652 osd_name
<< "osd." << i
;
653 host_name
<< "host-" << index
;
654 move_to
.push_back("root=default");
655 string host_loc
= "host=" + host_name
.str();
656 move_to
.push_back(host_loc
);
657 int r
= crush_move(osdmap
, osd_name
.str(), move_to
);
660 const string upmap_rule
= "upmap";
661 int upmap_rule_no
= crush_rule_create_replicated(
662 upmap_rule
, "default", "host");
663 ASSERT_LT(0, upmap_rule_no
);
665 // create a replicated pool which references the above rule
666 OSDMap::Incremental
new_pool_inc(osdmap
.get_epoch() + 1);
667 new_pool_inc
.new_pool_max
= osdmap
.get_pool_max();
668 new_pool_inc
.fsid
= osdmap
.get_fsid();
670 uint64_t upmap_pool_id
= ++new_pool_inc
.new_pool_max
;
671 pg_pool_t
*p
= new_pool_inc
.get_new_pool(upmap_pool_id
, &empty
);
675 p
->type
= pg_pool_t::TYPE_REPLICATED
;
676 p
->crush_rule
= upmap_rule_no
;
677 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
678 new_pool_inc
.new_pool_names
[upmap_pool_id
] = "upmap_pool";
679 osdmap
.apply_incremental(new_pool_inc
);
681 pg_t
rawpg(0, upmap_pool_id
);
682 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
685 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
686 ASSERT_LT(1U, up
.size());
688 // validate we won't have two OSDs from a same host
689 int parent_0
= osdmap
.crush
->get_parent_of_type(up
[0],
690 osdmap
.crush
->get_type_id("host"));
691 int parent_1
= osdmap
.crush
->get_parent_of_type(up
[1],
692 osdmap
.crush
->get_type_id("host"));
693 ASSERT_TRUE(parent_0
!= parent_1
);
697 // cancel stale upmaps
698 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
700 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
701 if (std::find(up
.begin(), up
.end(), i
) == up
.end()) {
706 ASSERT_TRUE(from
>= 0);
708 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
709 if (std::find(up
.begin(), up
.end(), i
) == up
.end() && i
!= from
) {
714 ASSERT_TRUE(to
>= 0);
715 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
716 new_pg_upmap_items
.push_back(make_pair(from
, to
));
717 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
718 pending_inc
.new_pg_upmap_items
[pgid
] =
719 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
720 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
722 nextmap
.deepish_copy_from(osdmap
);
723 nextmap
.apply_incremental(pending_inc
);
724 ASSERT_TRUE(nextmap
.have_pg_upmaps(pgid
));
725 OSDMap::Incremental
new_pending_inc(nextmap
.get_epoch() + 1);
726 clean_pg_upmaps(g_ceph_context
, nextmap
, new_pending_inc
);
727 nextmap
.apply_incremental(new_pending_inc
);
728 ASSERT_TRUE(!nextmap
.have_pg_upmaps(pgid
));
732 // https://tracker.ceph.com/issues/37493
733 pg_t
ec_pg(0, my_ec_pool
);
734 pg_t ec_pgid
= osdmap
.raw_pg_to_pg(ec_pg
);
735 OSDMap tmpmap
; // use a tmpmap here, so we do not dirty origin map..
739 // insert a valid pg_upmap_item
742 osdmap
.pg_to_raw_up(ec_pgid
, &ec_up
, &ec_up_primary
);
743 ASSERT_TRUE(!ec_up
.empty());
744 from
= *(ec_up
.begin());
745 ASSERT_TRUE(from
>= 0);
746 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
747 if (std::find(ec_up
.begin(), ec_up
.end(), i
) == ec_up
.end()) {
752 ASSERT_TRUE(to
>= 0);
753 ASSERT_TRUE(from
!= to
);
754 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
755 new_pg_upmap_items
.push_back(make_pair(from
, to
));
756 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
757 pending_inc
.new_pg_upmap_items
[ec_pgid
] =
758 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
759 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
760 tmpmap
.deepish_copy_from(osdmap
);
761 tmpmap
.apply_incremental(pending_inc
);
762 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
765 // mark one of the target OSDs of the above pg_upmap_item as down
766 OSDMap::Incremental
pending_inc(tmpmap
.get_epoch() + 1);
767 pending_inc
.new_state
[to
] = CEPH_OSD_UP
;
768 tmpmap
.apply_incremental(pending_inc
);
769 ASSERT_TRUE(!tmpmap
.is_up(to
));
770 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
773 // confirm *clean_pg_upmaps* won't do anything bad
774 OSDMap::Incremental
pending_inc(tmpmap
.get_epoch() + 1);
775 clean_pg_upmaps(g_ceph_context
, tmpmap
, pending_inc
);
776 tmpmap
.apply_incremental(pending_inc
);
777 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
782 // http://tracker.ceph.com/issues/37501
783 pg_t
ec_pg(0, my_ec_pool
);
784 pg_t ec_pgid
= osdmap
.raw_pg_to_pg(ec_pg
);
785 OSDMap tmpmap
; // use a tmpmap here, so we do not dirty origin map..
789 // insert a valid pg_upmap_item
792 osdmap
.pg_to_raw_up(ec_pgid
, &ec_up
, &ec_up_primary
);
793 ASSERT_TRUE(!ec_up
.empty());
794 from
= *(ec_up
.begin());
795 ASSERT_TRUE(from
>= 0);
796 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
797 if (std::find(ec_up
.begin(), ec_up
.end(), i
) == ec_up
.end()) {
802 ASSERT_TRUE(to
>= 0);
803 ASSERT_TRUE(from
!= to
);
804 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
805 new_pg_upmap_items
.push_back(make_pair(from
, to
));
806 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
807 pending_inc
.new_pg_upmap_items
[ec_pgid
] =
808 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
809 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
810 tmpmap
.deepish_copy_from(osdmap
);
811 tmpmap
.apply_incremental(pending_inc
);
812 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
815 // mark one of the target OSDs of the above pg_upmap_item as out
816 OSDMap::Incremental
pending_inc(tmpmap
.get_epoch() + 1);
817 pending_inc
.new_weight
[to
] = CEPH_OSD_OUT
;
818 tmpmap
.apply_incremental(pending_inc
);
819 ASSERT_TRUE(tmpmap
.is_out(to
));
820 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
823 // *clean_pg_upmaps* should be able to remove the above *bad* mapping
824 OSDMap::Incremental
pending_inc(tmpmap
.get_epoch() + 1);
825 clean_pg_upmaps(g_ceph_context
, tmpmap
, pending_inc
);
826 tmpmap
.apply_incremental(pending_inc
);
827 ASSERT_TRUE(!tmpmap
.have_pg_upmaps(ec_pgid
));
832 // http://tracker.ceph.com/issues/37968
834 // build a temporary crush topology of 2 hosts, 3 osds per host
835 OSDMap tmp
; // use a tmpmap here, so we do not dirty origin map..
836 tmp
.deepish_copy_from(osdmap
);
837 const int expected_host_num
= 2;
838 int osd_per_host
= get_num_osds() / expected_host_num
;
839 ASSERT_GE(osd_per_host
, 3);
841 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
842 if (i
&& i
% osd_per_host
== 0) {
845 stringstream osd_name
;
846 stringstream host_name
;
847 vector
<string
> move_to
;
848 osd_name
<< "osd." << i
;
849 host_name
<< "host-" << index
;
850 move_to
.push_back("root=default");
851 string host_loc
= "host=" + host_name
.str();
852 move_to
.push_back(host_loc
);
853 auto r
= crush_move(tmp
, osd_name
.str(), move_to
);
859 get_crush(tmp
, crush
);
860 string rule_name
= "rule_37968";
861 int rule_type
= pg_pool_t::TYPE_ERASURE
;
862 ASSERT_TRUE(!crush
.rule_exists(rule_name
));
864 for (rno
= 0; rno
< crush
.get_max_rules(); rno
++) {
865 if (!crush
.rule_exists(rno
))
868 string root_name
= "default";
869 int root
= crush
.get_item_id(root_name
);
871 crush_rule
*rule
= crush_make_rule(steps
, rule_type
);
873 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 5, 0);
874 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSE_TRIES
, 100, 0);
875 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, root
, 0);
876 crush_rule_set_step(rule
, step
++, CRUSH_RULE_CHOOSE_INDEP
, 2, 1 /* host*/);
877 crush_rule_set_step(rule
, step
++, CRUSH_RULE_CHOOSE_INDEP
, 2, 0 /* osd */);
878 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
879 ASSERT_TRUE(step
== steps
);
880 auto r
= crush_add_rule(crush
.get_crush_map(), rule
, rno
);
882 crush
.set_rule_name(rno
, rule_name
);
884 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
885 pending_inc
.crush
.clear();
886 crush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
887 tmp
.apply_incremental(pending_inc
);
890 // create a erasuce-coded pool referencing the above rule
893 OSDMap::Incremental
new_pool_inc(tmp
.get_epoch() + 1);
894 new_pool_inc
.new_pool_max
= tmp
.get_pool_max();
895 new_pool_inc
.fsid
= tmp
.get_fsid();
897 pool_37968
= ++new_pool_inc
.new_pool_max
;
898 pg_pool_t
*p
= new_pool_inc
.get_new_pool(pool_37968
, &empty
);
902 p
->type
= pg_pool_t::TYPE_ERASURE
;
904 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
905 new_pool_inc
.new_pool_names
[pool_37968
] = "pool_37968";
906 tmp
.apply_incremental(new_pool_inc
);
909 pg_t
ec_pg(0, pool_37968
);
910 pg_t ec_pgid
= tmp
.raw_pg_to_pg(ec_pg
);
914 // insert a valid pg_upmap_item
917 tmp
.pg_to_raw_up(ec_pgid
, &ec_up
, &ec_up_primary
);
918 ASSERT_TRUE(ec_up
.size() == 4);
919 from
= *(ec_up
.begin());
920 ASSERT_TRUE(from
>= 0);
921 auto parent
= tmp
.crush
->get_parent_of_type(from
, 1 /* host */, rno
);
922 ASSERT_TRUE(parent
< 0);
923 // pick an osd of the same parent with *from*
924 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
925 if (std::find(ec_up
.begin(), ec_up
.end(), i
) == ec_up
.end()) {
926 auto p
= tmp
.crush
->get_parent_of_type(i
, 1 /* host */, rno
);
933 ASSERT_TRUE(to
>= 0);
934 ASSERT_TRUE(from
!= to
);
935 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
936 new_pg_upmap_items
.push_back(make_pair(from
, to
));
937 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
938 pending_inc
.new_pg_upmap_items
[ec_pgid
] =
939 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
940 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
941 tmp
.apply_incremental(pending_inc
);
942 ASSERT_TRUE(tmp
.have_pg_upmaps(ec_pgid
));
945 // *clean_pg_upmaps* should not remove the above upmap_item
946 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
947 clean_pg_upmaps(g_ceph_context
, tmp
, pending_inc
);
948 tmp
.apply_incremental(pending_inc
);
949 ASSERT_TRUE(tmp
.have_pg_upmaps(ec_pgid
));
956 // STEP-1: enumerate all children of up[0]'s parent,
957 // replace up[1] with one of them (other than up[0])
958 int parent
= osdmap
.crush
->get_parent_of_type(up
[0],
959 osdmap
.crush
->get_type_id("host"));
961 osdmap
.crush
->get_leaves(osdmap
.crush
->get_item_name(parent
), &candidates
);
962 ASSERT_LT(1U, candidates
.size());
963 int replaced_by
= -1;
964 for (auto c
: candidates
) {
971 // Check we can handle a negative pg_upmap value
972 vector
<int32_t> new_pg_upmap
;
973 new_pg_upmap
.push_back(up
[0]);
974 new_pg_upmap
.push_back(-823648512);
975 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
976 pending_inc
.new_pg_upmap
[pgid
] = mempool::osdmap::vector
<int32_t>(
977 new_pg_upmap
.begin(), new_pg_upmap
.end());
978 osdmap
.apply_incremental(pending_inc
);
981 // crucial call - _apply_upmap should ignore the negative value
982 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
984 ASSERT_NE(-1, replaced_by
);
985 // generate a new pg_upmap item and apply
986 vector
<int32_t> new_pg_upmap
;
987 new_pg_upmap
.push_back(up
[0]);
988 new_pg_upmap
.push_back(replaced_by
); // up[1] -> replaced_by
989 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
990 pending_inc
.new_pg_upmap
[pgid
] = mempool::osdmap::vector
<int32_t>(
991 new_pg_upmap
.begin(), new_pg_upmap
.end());
992 osdmap
.apply_incremental(pending_inc
);
994 // validate pg_upmap is there
997 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
998 ASSERT_EQ(new_up
.size(), up
.size());
999 ASSERT_EQ(new_up
[0], new_pg_upmap
[0]);
1000 ASSERT_EQ(new_up
[1], new_pg_upmap
[1]);
1001 // and we shall have two OSDs from a same host now..
1002 int parent_0
= osdmap
.crush
->get_parent_of_type(new_up
[0],
1003 osdmap
.crush
->get_type_id("host"));
1004 int parent_1
= osdmap
.crush
->get_parent_of_type(new_up
[1],
1005 osdmap
.crush
->get_type_id("host"));
1006 ASSERT_EQ(parent_0
, parent_1
);
1010 // STEP-2: apply cure
1011 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1012 clean_pg_upmaps(g_ceph_context
, osdmap
, pending_inc
);
1013 osdmap
.apply_incremental(pending_inc
);
1015 // validate pg_upmap is gone (reverted)
1018 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
1019 ASSERT_EQ(new_up
, up
);
1020 ASSERT_EQ(new_up_primary
, up_primary
);
1026 // TEST pg_upmap_items
1027 // enumerate all used hosts first
1030 int parent
= osdmap
.crush
->get_parent_of_type(u
,
1031 osdmap
.crush
->get_type_id("host"));
1032 ASSERT_GT(0, parent
);
1033 parents
.insert(parent
);
1035 int candidate_parent
= 0;
1036 set
<int> candidate_children
;
1037 vector
<int> up_after_out
;
1039 // STEP-1: try mark out up[1] and all other OSDs from the same host
1040 int parent
= osdmap
.crush
->get_parent_of_type(up
[1],
1041 osdmap
.crush
->get_type_id("host"));
1043 osdmap
.crush
->get_leaves(osdmap
.crush
->get_item_name(parent
),
1045 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1046 for (auto c
: children
) {
1047 pending_inc
.new_weight
[c
] = CEPH_OSD_OUT
;
1050 tmpmap
.deepish_copy_from(osdmap
);
1051 tmpmap
.apply_incremental(pending_inc
);
1054 tmpmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
1055 // verify that we'll have OSDs from a different host..
1056 int will_choose
= -1;
1057 for (auto o
: new_up
) {
1058 int parent
= tmpmap
.crush
->get_parent_of_type(o
,
1059 osdmap
.crush
->get_type_id("host"));
1060 if (!parents
.count(parent
)) {
1062 candidate_parent
= parent
; // record
1066 ASSERT_LT(-1, will_choose
); // it is an OSD!
1067 ASSERT_NE(candidate_parent
, 0);
1068 osdmap
.crush
->get_leaves(osdmap
.crush
->get_item_name(candidate_parent
),
1069 &candidate_children
);
1070 ASSERT_TRUE(candidate_children
.count(will_choose
));
1071 candidate_children
.erase(will_choose
);
1072 ASSERT_FALSE(candidate_children
.empty());
1073 up_after_out
= new_up
; // needed for verification..
1076 // Make sure we can handle a negative pg_upmap_item
1078 int replaced_by
= -823648512;
1079 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
1080 new_pg_upmap_items
.push_back(make_pair(victim
, replaced_by
));
1082 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1083 pending_inc
.new_pg_upmap_items
[pgid
] =
1084 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
1085 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
1086 osdmap
.apply_incremental(pending_inc
);
1089 // crucial call - _apply_upmap should ignore the negative value
1090 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
1093 // STEP-2: generating a new pg_upmap_items entry by
1094 // replacing up[0] with one coming from candidate_children
1096 int replaced_by
= *candidate_children
.begin();
1097 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
1098 new_pg_upmap_items
.push_back(make_pair(victim
, replaced_by
));
1100 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1101 pending_inc
.new_pg_upmap_items
[pgid
] =
1102 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
1103 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
1104 osdmap
.apply_incremental(pending_inc
);
1106 // validate pg_upmap_items is there
1109 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
1110 ASSERT_EQ(new_up
.size(), up
.size());
1111 ASSERT_TRUE(std::find(new_up
.begin(), new_up
.end(), replaced_by
) !=
1114 ASSERT_TRUE(std::find(new_up
.begin(), new_up
.end(), up
[1]) !=
1119 // STEP-3: mark out up[1] and all other OSDs from the same host
1120 int parent
= osdmap
.crush
->get_parent_of_type(up
[1],
1121 osdmap
.crush
->get_type_id("host"));
1123 osdmap
.crush
->get_leaves(osdmap
.crush
->get_item_name(parent
),
1125 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1126 for (auto c
: children
) {
1127 pending_inc
.new_weight
[c
] = CEPH_OSD_OUT
;
1129 osdmap
.apply_incremental(pending_inc
);
1131 // validate we have two OSDs from the same host now..
1134 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
1135 ASSERT_EQ(up
.size(), new_up
.size());
1136 int parent_0
= osdmap
.crush
->get_parent_of_type(new_up
[0],
1137 osdmap
.crush
->get_type_id("host"));
1138 int parent_1
= osdmap
.crush
->get_parent_of_type(new_up
[1],
1139 osdmap
.crush
->get_type_id("host"));
1140 ASSERT_EQ(parent_0
, parent_1
);
1144 // STEP-4: apply cure
1145 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1146 clean_pg_upmaps(g_ceph_context
, osdmap
, pending_inc
);
1147 osdmap
.apply_incremental(pending_inc
);
1149 // validate pg_upmap_items is gone (reverted)
1152 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
1153 ASSERT_EQ(new_up
, up_after_out
);
1159 TEST_F(OSDMapTest
, BUG_38897
) {
1160 // http://tracker.ceph.com/issues/38897
1161 // build a fresh map with 12 OSDs, without any default pools
1162 set_up_map(12, true);
1163 const string
pool_1("pool1");
1164 const string
pool_2("pool2");
1165 int64_t pool_1_id
= -1;
1168 // build customized crush rule for "pool1"
1169 string host_name
= "host_for_pool_1";
1170 // build a customized host to capture osd.1~5
1171 for (int i
= 1; i
< 5; i
++) {
1172 stringstream osd_name
;
1173 vector
<string
> move_to
;
1174 osd_name
<< "osd." << i
;
1175 move_to
.push_back("root=default");
1176 string host_loc
= "host=" + host_name
;
1177 move_to
.push_back(host_loc
);
1178 auto r
= crush_move(osdmap
, osd_name
.str(), move_to
);
1182 get_crush(osdmap
, crush
);
1183 auto host_id
= crush
.get_item_id(host_name
);
1184 ASSERT_TRUE(host_id
< 0);
1185 string rule_name
= "rule_for_pool1";
1186 int rule_type
= pg_pool_t::TYPE_REPLICATED
;
1187 ASSERT_TRUE(!crush
.rule_exists(rule_name
));
1189 for (rno
= 0; rno
< crush
.get_max_rules(); rno
++) {
1190 if (!crush
.rule_exists(rno
))
1194 crush_rule
*rule
= crush_make_rule(steps
, rule_type
);
1196 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 5, 0);
1197 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSE_TRIES
, 100, 0);
1198 // always choose osd.0
1199 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, 0, 0);
1200 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1201 // then pick any other random osds
1202 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, host_id
, 0);
1203 crush_rule_set_step(rule
, step
++, CRUSH_RULE_CHOOSELEAF_FIRSTN
, 2, 0);
1204 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1205 ASSERT_TRUE(step
== steps
);
1206 auto r
= crush_add_rule(crush
.get_crush_map(), rule
, rno
);
1207 ASSERT_TRUE(r
>= 0);
1208 crush
.set_rule_name(rno
, rule_name
);
1210 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1211 pending_inc
.crush
.clear();
1212 crush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
1213 osdmap
.apply_incremental(pending_inc
);
1217 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1218 pending_inc
.new_pool_max
= osdmap
.get_pool_max();
1219 auto pool_id
= ++pending_inc
.new_pool_max
;
1220 pool_1_id
= pool_id
;
1222 auto p
= pending_inc
.get_new_pool(pool_id
, &empty
);
1227 p
->type
= pg_pool_t::TYPE_REPLICATED
;
1228 p
->crush_rule
= rno
;
1229 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
1230 pending_inc
.new_pool_names
[pool_id
] = pool_1
;
1231 osdmap
.apply_incremental(pending_inc
);
1232 ASSERT_TRUE(osdmap
.have_pg_pool(pool_id
));
1233 ASSERT_TRUE(osdmap
.get_pool_name(pool_id
) == pool_1
);
1235 for (unsigned i
= 0; i
< 3; i
++) {
1237 pg_t
rawpg(i
, pool_id
);
1238 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
1241 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
1242 ASSERT_TRUE(up
.size() == 3);
1243 ASSERT_TRUE(up
[0] == 0);
1245 // insert a new pg_upmap
1246 vector
<int32_t> new_up
;
1247 // and remap 1.x to osd.1 only
1248 // this way osd.0 is deemed to be *underfull*
1249 // and osd.1 is deemed to be *overfull*
1250 new_up
.push_back(1);
1252 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1253 pending_inc
.new_pg_upmap
[pgid
] = mempool::osdmap::vector
<int32_t>(
1254 new_up
.begin(), new_up
.end());
1255 osdmap
.apply_incremental(pending_inc
);
1257 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
1258 ASSERT_TRUE(up
.size() == 1);
1259 ASSERT_TRUE(up
[0] == 1);
1265 // build customized crush rule for "pool2"
1266 string host_name
= "host_for_pool_2";
1267 // build a customized host to capture osd.6~11
1268 for (int i
= 6; i
< (int)get_num_osds(); i
++) {
1269 stringstream osd_name
;
1270 vector
<string
> move_to
;
1271 osd_name
<< "osd." << i
;
1272 move_to
.push_back("root=default");
1273 string host_loc
= "host=" + host_name
;
1274 move_to
.push_back(host_loc
);
1275 auto r
= crush_move(osdmap
, osd_name
.str(), move_to
);
1279 get_crush(osdmap
, crush
);
1280 auto host_id
= crush
.get_item_id(host_name
);
1281 ASSERT_TRUE(host_id
< 0);
1282 string rule_name
= "rule_for_pool2";
1283 int rule_type
= pg_pool_t::TYPE_REPLICATED
;
1284 ASSERT_TRUE(!crush
.rule_exists(rule_name
));
1286 for (rno
= 0; rno
< crush
.get_max_rules(); rno
++) {
1287 if (!crush
.rule_exists(rno
))
1291 crush_rule
*rule
= crush_make_rule(steps
, rule_type
);
1293 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 5, 0);
1294 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSE_TRIES
, 100, 0);
1295 // always choose osd.0
1296 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, 0, 0);
1297 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1298 // then pick any other random osds
1299 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, host_id
, 0);
1300 crush_rule_set_step(rule
, step
++, CRUSH_RULE_CHOOSELEAF_FIRSTN
, 2, 0);
1301 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1302 ASSERT_TRUE(step
== steps
);
1303 auto r
= crush_add_rule(crush
.get_crush_map(), rule
, rno
);
1304 ASSERT_TRUE(r
>= 0);
1305 crush
.set_rule_name(rno
, rule_name
);
1307 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1308 pending_inc
.crush
.clear();
1309 crush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
1310 osdmap
.apply_incremental(pending_inc
);
1314 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1315 pending_inc
.new_pool_max
= osdmap
.get_pool_max();
1316 auto pool_id
= ++pending_inc
.new_pool_max
;
1318 auto p
= pending_inc
.get_new_pool(pool_id
, &empty
);
1320 // include a single PG
1323 p
->type
= pg_pool_t::TYPE_REPLICATED
;
1324 p
->crush_rule
= rno
;
1325 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
1326 pending_inc
.new_pool_names
[pool_id
] = pool_2
;
1327 osdmap
.apply_incremental(pending_inc
);
1328 ASSERT_TRUE(osdmap
.have_pg_pool(pool_id
));
1329 ASSERT_TRUE(osdmap
.get_pool_name(pool_id
) == pool_2
);
1330 pg_t
rawpg(0, pool_id
);
1331 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
1332 EXPECT_TRUE(!osdmap
.have_pg_upmaps(pgid
));
1335 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
1336 ASSERT_TRUE(up
.size() == 3);
1337 ASSERT_TRUE(up
[0] == 0);
1340 // build a pg_upmap_item that will
1341 // remap pg out from *underfull* osd.0
1342 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
1343 new_pg_upmap_items
.push_back(make_pair(0, 10)); // osd.0 -> osd.10
1344 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1345 pending_inc
.new_pg_upmap_items
[pgid
] =
1346 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
1347 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
1348 osdmap
.apply_incremental(pending_inc
);
1349 ASSERT_TRUE(osdmap
.have_pg_upmaps(pgid
));
1352 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
1353 ASSERT_TRUE(up
.size() == 3);
1354 ASSERT_TRUE(up
[0] == 10);
1360 set
<int64_t> only_pools
;
1361 ASSERT_TRUE(pool_1_id
>= 0);
1362 only_pools
.insert(pool_1_id
);
1363 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1364 // require perfect distribution! (max deviation 0)
1365 osdmap
.calc_pg_upmaps(g_ceph_context
,
1366 0, // so we can force optimizing
1370 osdmap
.apply_incremental(pending_inc
);
1374 TEST_F(OSDMapTest
, BUG_40104
) {
1375 // http://tracker.ceph.com/issues/40104
1376 int big_osd_num
= 5000;
1377 int big_pg_num
= 10000;
1378 set_up_map(big_osd_num
, true);
1381 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1382 pending_inc
.new_pool_max
= osdmap
.get_pool_max();
1383 pool_id
= ++pending_inc
.new_pool_max
;
1385 auto p
= pending_inc
.get_new_pool(pool_id
, &empty
);
1388 p
->set_pg_num(big_pg_num
);
1389 p
->set_pgp_num(big_pg_num
);
1390 p
->type
= pg_pool_t::TYPE_REPLICATED
;
1392 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
1393 pending_inc
.new_pool_names
[pool_id
] = "big_pool";
1394 osdmap
.apply_incremental(pending_inc
);
1395 ASSERT_TRUE(osdmap
.have_pg_pool(pool_id
));
1396 ASSERT_TRUE(osdmap
.get_pool_name(pool_id
) == "big_pool");
1399 // generate pg_upmap_items for each pg
1400 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1401 for (int i
= 0; i
< big_pg_num
; i
++) {
1402 pg_t
rawpg(i
, pool_id
);
1403 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
1406 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
1407 ASSERT_TRUE(up
.size() == 3);
1409 int replaced_by
= random() % big_osd_num
;
1410 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
1411 // note that it might or might not be valid, we don't care
1412 new_pg_upmap_items
.push_back(make_pair(victim
, replaced_by
));
1413 pending_inc
.new_pg_upmap_items
[pgid
] =
1414 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
1415 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
1417 osdmap
.apply_incremental(pending_inc
);
1420 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1421 auto start
= mono_clock::now();
1422 clean_pg_upmaps(g_ceph_context
, osdmap
, pending_inc
);
1423 auto latency
= mono_clock::now() - start
;
1424 std::cout
<< "clean_pg_upmaps (~" << big_pg_num
1425 << " pg_upmap_items) latency:" << timespan_str(latency
)
1430 TEST_F(OSDMapTest
, BUG_42052
) {
1431 // https://tracker.ceph.com/issues/42052
1432 set_up_map(6, true);
1433 const string
pool_name("pool");
1434 // build customized crush rule for "pool"
1436 get_crush(osdmap
, crush
);
1437 string rule_name
= "rule";
1438 int rule_type
= pg_pool_t::TYPE_REPLICATED
;
1439 ASSERT_TRUE(!crush
.rule_exists(rule_name
));
1441 for (rno
= 0; rno
< crush
.get_max_rules(); rno
++) {
1442 if (!crush
.rule_exists(rno
))
1446 crush_rule
*rule
= crush_make_rule(steps
, rule_type
);
1448 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 5, 0);
1449 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSE_TRIES
, 100, 0);
1450 // always choose osd.0, osd.1, osd.2
1451 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, 0, 0);
1452 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1453 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, 0, 1);
1454 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1455 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, 0, 2);
1456 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1457 ASSERT_TRUE(step
== steps
);
1458 auto r
= crush_add_rule(crush
.get_crush_map(), rule
, rno
);
1459 ASSERT_TRUE(r
>= 0);
1460 crush
.set_rule_name(rno
, rule_name
);
1462 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1463 pending_inc
.crush
.clear();
1464 crush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
1465 osdmap
.apply_incremental(pending_inc
);
1469 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1470 pending_inc
.new_pool_max
= osdmap
.get_pool_max();
1471 auto pool_id
= ++pending_inc
.new_pool_max
;
1473 auto p
= pending_inc
.get_new_pool(pool_id
, &empty
);
1478 p
->type
= pg_pool_t::TYPE_REPLICATED
;
1479 p
->crush_rule
= rno
;
1480 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
1481 pending_inc
.new_pool_names
[pool_id
] = pool_name
;
1482 osdmap
.apply_incremental(pending_inc
);
1483 ASSERT_TRUE(osdmap
.have_pg_pool(pool_id
));
1484 ASSERT_TRUE(osdmap
.get_pool_name(pool_id
) == pool_name
);
1485 pg_t
rawpg(0, pool_id
);
1486 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
1488 // pg_upmap 1.0 [2,3,5]
1489 vector
<int32_t> new_up
{2,3,5};
1490 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1491 pending_inc
.new_pg_upmap
[pgid
] = mempool::osdmap::vector
<int32_t>(
1492 new_up
.begin(), new_up
.end());
1493 osdmap
.apply_incremental(pending_inc
);
1496 // pg_upmap_items 1.0 [0,3,4,5]
1497 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
1498 new_pg_upmap_items
.push_back(make_pair(0, 3));
1499 new_pg_upmap_items
.push_back(make_pair(4, 5));
1500 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1501 pending_inc
.new_pg_upmap_items
[pgid
] =
1502 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
1503 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
1504 osdmap
.apply_incremental(pending_inc
);
1507 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1508 clean_pg_upmaps(g_ceph_context
, osdmap
, pending_inc
);
1509 osdmap
.apply_incremental(pending_inc
);
1510 ASSERT_FALSE(osdmap
.have_pg_upmaps(pgid
));
1514 TEST_F(OSDMapTest
, BUG_42485
) {
1517 // build a temporary crush topology of 2datacenters, 3racks per dc,
1518 // 1host per rack, 10osds per host
1519 OSDMap tmp
; // use a tmpmap here, so we do not dirty origin map..
1520 tmp
.deepish_copy_from(osdmap
);
1521 const int expected_host_num
= 6;
1522 int osd_per_host
= (int)get_num_osds() / expected_host_num
;
1523 ASSERT_GE(osd_per_host
, 10);
1524 int host_per_dc
= 3;
1527 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
1528 if (i
&& i
% osd_per_host
== 0) {
1531 if (i
&& i
% (host_per_dc
* osd_per_host
) == 0) {
1534 stringstream osd_name
;
1535 stringstream host_name
;
1536 stringstream rack_name
;
1537 stringstream dc_name
;
1538 vector
<string
> move_to
;
1539 osd_name
<< "osd." << i
;
1540 host_name
<< "host-" << index
;
1541 rack_name
<< "rack-" << index
;
1542 dc_name
<< "dc-" << dc_index
;
1543 move_to
.push_back("root=default");
1544 string dc_loc
= "datacenter=" + dc_name
.str();
1545 move_to
.push_back(dc_loc
);
1546 string rack_loc
= "rack=" + rack_name
.str();
1547 move_to
.push_back(rack_loc
);
1548 string host_loc
= "host=" + host_name
.str();
1549 move_to
.push_back(host_loc
);
1550 auto r
= crush_move(tmp
, osd_name
.str(), move_to
);
1556 get_crush(tmp
, crush
);
1557 string rule_name
= "rule_xeus_993_1";
1558 int rule_type
= pg_pool_t::TYPE_REPLICATED
;
1559 ASSERT_TRUE(!crush
.rule_exists(rule_name
));
1561 for (rno
= 0; rno
< crush
.get_max_rules(); rno
++) {
1562 if (!crush
.rule_exists(rno
))
1565 string root_name
= "default";
1566 string dc_1
= "dc-0";
1567 int dc1
= crush
.get_item_id(dc_1
);
1568 string dc_2
= "dc-1";
1569 int dc2
= crush
.get_item_id(dc_2
);
1571 crush_rule
*rule
= crush_make_rule(steps
, rule_type
);
1573 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 5, 0);
1574 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSE_TRIES
, 100, 0);
1575 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, dc1
, 0);
1576 crush_rule_set_step(rule
, step
++, CRUSH_RULE_CHOOSELEAF_FIRSTN
, 2, 3 /* rack */);
1577 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1578 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, dc2
, 0);
1579 crush_rule_set_step(rule
, step
++, CRUSH_RULE_CHOOSELEAF_FIRSTN
, 2, 3 /* rack */);
1580 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1581 ASSERT_TRUE(step
== steps
);
1582 auto r
= crush_add_rule(crush
.get_crush_map(), rule
, rno
);
1583 ASSERT_TRUE(r
>= 0);
1584 crush
.set_rule_name(rno
, rule_name
);
1586 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
1587 pending_inc
.crush
.clear();
1588 crush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
1589 tmp
.apply_incremental(pending_inc
);
1591 // create a repliacted pool referencing the above rule
1592 int64_t pool_xeus_993
;
1594 OSDMap::Incremental
new_pool_inc(tmp
.get_epoch() + 1);
1595 new_pool_inc
.new_pool_max
= tmp
.get_pool_max();
1596 new_pool_inc
.fsid
= tmp
.get_fsid();
1598 pool_xeus_993
= ++new_pool_inc
.new_pool_max
;
1599 pg_pool_t
*p
= new_pool_inc
.get_new_pool(pool_xeus_993
, &empty
);
1601 p
->set_pg_num(4096);
1602 p
->set_pgp_num(4096);
1603 p
->type
= pg_pool_t::TYPE_REPLICATED
;
1604 p
->crush_rule
= rno
;
1605 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
1606 new_pool_inc
.new_pool_names
[pool_xeus_993
] = "pool_xeus_993";
1607 tmp
.apply_incremental(new_pool_inc
);
1610 pg_t
rep_pg(0, pool_xeus_993
);
1611 pg_t rep_pgid
= tmp
.raw_pg_to_pg(rep_pg
);
1617 tmp
.pg_to_raw_up(rep_pgid
, &rep_up
, &rep_up_primary
);
1618 std::cout
<< "pgid " << rep_up
<< " up " << rep_up
<< std::endl
;
1619 ASSERT_TRUE(rep_up
.size() == 4);
1620 from
= *(rep_up
.begin());
1621 ASSERT_TRUE(from
>= 0);
1622 auto dc_parent
= tmp
.crush
->get_parent_of_type(from
, 8 /* dc */, rno
);
1623 if (dc_parent
== dc1
)
1627 auto rack_parent
= tmp
.crush
->get_parent_of_type(from
, 3 /* rack */, rno
);
1628 ASSERT_TRUE(dc_parent
< 0);
1629 ASSERT_TRUE(rack_parent
< 0);
1630 set
<int> rack_parents
;
1631 for (auto &i
: rep_up
) {
1632 if (i
== from
) continue;
1633 auto rack_parent
= tmp
.crush
->get_parent_of_type(i
, 3 /* rack */, rno
);
1634 rack_parents
.insert(rack_parent
);
1636 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
1637 if (std::find(rep_up
.begin(), rep_up
.end(), i
) == rep_up
.end()) {
1638 auto dc_p
= tmp
.crush
->get_parent_of_type(i
, 8 /* dc */, rno
);
1639 auto rack_p
= tmp
.crush
->get_parent_of_type(i
, 3 /* rack */, rno
);
1640 if (dc_p
== dc_parent
&&
1641 rack_parents
.find(rack_p
) == rack_parents
.end()) {
1647 ASSERT_TRUE(to
>= 0);
1648 ASSERT_TRUE(from
!= to
);
1649 std::cout
<< "from " << from
<< " to " << to
<< std::endl
;
1650 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
1651 new_pg_upmap_items
.push_back(make_pair(from
, to
));
1652 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
1653 pending_inc
.new_pg_upmap_items
[rep_pgid
] =
1654 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
1655 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
1656 tmp
.apply_incremental(pending_inc
);
1657 ASSERT_TRUE(tmp
.have_pg_upmaps(rep_pgid
));
1659 pg_t
rep_pg2(2, pool_xeus_993
);
1660 pg_t rep_pgid2
= tmp
.raw_pg_to_pg(rep_pg2
);
1662 pg_t rep_pgid
= rep_pgid2
;
1663 vector
<int> from_osds
{-1, -1};
1666 tmp
.pg_to_raw_up(rep_pgid
, &rep_up
, &rep_up_primary
);
1667 ASSERT_TRUE(rep_up
.size() == 4);
1668 from_osds
[0] = *(rep_up
.begin());
1669 from_osds
[1] = *(rep_up
.rbegin());
1670 std::cout
<< "pgid " << rep_pgid2
<< " up " << rep_up
<< std::endl
;
1671 ASSERT_TRUE(*(from_osds
.begin()) >= 0);
1672 ASSERT_TRUE(*(from_osds
.rbegin()) >= 0);
1673 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
1674 for (auto &from
: from_osds
) {
1676 auto dc_parent
= tmp
.crush
->get_parent_of_type(from
, 8 /* dc */, rno
);
1677 if (dc_parent
== dc1
)
1681 auto rack_parent
= tmp
.crush
->get_parent_of_type(from
, 3 /* rack */, rno
);
1682 ASSERT_TRUE(dc_parent
< 0);
1683 ASSERT_TRUE(rack_parent
< 0);
1684 set
<int> rack_parents
;
1685 for (auto &i
: rep_up
) {
1686 if (i
== from
) continue;
1687 auto rack_parent
= tmp
.crush
->get_parent_of_type(i
, 3 /* rack */, rno
);
1688 rack_parents
.insert(rack_parent
);
1690 for (auto &i
: new_pg_upmap_items
) {
1691 auto rack_from
= tmp
.crush
->get_parent_of_type(i
.first
, 3, rno
);
1692 auto rack_to
= tmp
.crush
->get_parent_of_type(i
.second
, 3, rno
);
1693 rack_parents
.insert(rack_from
);
1694 rack_parents
.insert(rack_to
);
1696 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
1697 if (std::find(rep_up
.begin(), rep_up
.end(), i
) == rep_up
.end()) {
1698 auto dc_p
= tmp
.crush
->get_parent_of_type(i
, 8 /* dc */, rno
);
1699 auto rack_p
= tmp
.crush
->get_parent_of_type(i
, 3 /* rack */, rno
);
1700 if (dc_p
== dc_parent
&&
1701 rack_parents
.find(rack_p
) == rack_parents
.end()) {
1707 ASSERT_TRUE(to
>= 0);
1708 ASSERT_TRUE(from
!= to
);
1709 std::cout
<< "from " << from
<< " to " << to
<< std::endl
;
1710 new_pg_upmap_items
.push_back(make_pair(from
, to
));
1712 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
1713 pending_inc
.new_pg_upmap_items
[rep_pgid
] =
1714 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
1715 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
1716 tmp
.apply_incremental(pending_inc
);
1717 ASSERT_TRUE(tmp
.have_pg_upmaps(rep_pgid
));
1720 // *maybe_remove_pg_upmaps* should remove the above upmap_item
1721 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
1722 clean_pg_upmaps(g_ceph_context
, tmp
, pending_inc
);
1723 tmp
.apply_incremental(pending_inc
);
1724 ASSERT_FALSE(tmp
.have_pg_upmaps(rep_pgid
));
1725 ASSERT_FALSE(tmp
.have_pg_upmaps(rep_pgid2
));
1730 TEST(PGTempMap
, basic
)
1734 for (auto i
=3; i
<1000; ++i
) {
1736 m
.set(x
, {static_cast<int>(i
)});
1740 ASSERT_NE(m
.find(a
), m
.end());
1741 ASSERT_EQ(m
.find(a
), m
.begin());
1742 ASSERT_EQ(m
.find(b
), m
.end());
1743 ASSERT_EQ(998u, m
.size());
1746 TEST_F(OSDMapTest
, BUG_43124
) {
1749 // https://tracker.ceph.com/issues/43124
1751 // build a temporary crush topology of 5racks,
1752 // 4 hosts per rack, 10osds per host
1753 OSDMap tmp
; // use a tmpmap here, so we do not dirty origin map..
1754 tmp
.deepish_copy_from(osdmap
);
1755 const int expected_host_num
= 20;
1756 int osd_per_host
= (int)get_num_osds() / expected_host_num
;
1757 ASSERT_GE(osd_per_host
, 10);
1758 int host_per_rack
= 4;
1761 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
1762 if (i
&& i
% osd_per_host
== 0) {
1765 if (i
&& i
% (host_per_rack
* osd_per_host
) == 0) {
1768 stringstream osd_name
;
1769 stringstream host_name
;
1770 stringstream rack_name
;
1771 vector
<string
> move_to
;
1772 osd_name
<< "osd." << i
;
1773 host_name
<< "host-" << index
;
1774 rack_name
<< "rack-" << rack_index
;
1775 move_to
.push_back("root=default");
1776 string rack_loc
= "rack=" + rack_name
.str();
1777 move_to
.push_back(rack_loc
);
1778 string host_loc
= "host=" + host_name
.str();
1779 move_to
.push_back(host_loc
);
1780 auto r
= crush_move(tmp
, osd_name
.str(), move_to
);
1786 get_crush(tmp
, crush
);
1787 string rule_name
= "rule_angel_1944";
1788 int rule_type
= pg_pool_t::TYPE_ERASURE
;
1789 ASSERT_TRUE(!crush
.rule_exists(rule_name
));
1791 for (rno
= 0; rno
< crush
.get_max_rules(); rno
++) {
1792 if (!crush
.rule_exists(rno
))
1796 string root_name
= "default";
1797 int root
= crush
.get_item_id(root_name
);
1798 crush_rule
*rule
= crush_make_rule(steps
, rule_type
);
1800 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 5, 0);
1801 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSE_TRIES
, 100, 0);
1802 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, root
, 0);
1803 crush_rule_set_step(rule
, step
++, CRUSH_RULE_CHOOSE_FIRSTN
, 4, 3 /* rack */);
1804 crush_rule_set_step(rule
, step
++, CRUSH_RULE_CHOOSELEAF_INDEP
, 3, 1 /* host */);
1805 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1806 ASSERT_TRUE(step
== steps
);
1807 auto r
= crush_add_rule(crush
.get_crush_map(), rule
, rno
);
1808 ASSERT_TRUE(r
>= 0);
1809 crush
.set_rule_name(rno
, rule_name
);
1811 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
1812 pending_inc
.crush
.clear();
1813 crush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
1814 tmp
.apply_incremental(pending_inc
);
1818 crush
.dump_tree(&oss
, NULL
);
1819 std::cout
<< oss
.str() << std::endl
;
1820 Formatter
*f
= Formatter::create("json-pretty");
1821 f
->open_object_section("crush_rules");
1822 crush
.dump_rules(f
);
1827 // create a erasuce-coded pool referencing the above rule
1828 int64_t pool_angel_1944
;
1830 OSDMap::Incremental
new_pool_inc(tmp
.get_epoch() + 1);
1831 new_pool_inc
.new_pool_max
= tmp
.get_pool_max();
1832 new_pool_inc
.fsid
= tmp
.get_fsid();
1834 pool_angel_1944
= ++new_pool_inc
.new_pool_max
;
1835 pg_pool_t
*p
= new_pool_inc
.get_new_pool(pool_angel_1944
, &empty
);
1837 p
->set_pg_num(4096);
1838 p
->set_pgp_num(4096);
1839 p
->type
= pg_pool_t::TYPE_ERASURE
;
1840 p
->crush_rule
= rno
;
1841 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
1842 new_pool_inc
.new_pool_names
[pool_angel_1944
] = "pool_angel_1944";
1843 tmp
.apply_incremental(new_pool_inc
);
1846 pg_t
rep_pg(0, pool_angel_1944
);
1847 pg_t rep_pgid
= tmp
.raw_pg_to_pg(rep_pg
);
1849 // insert a pg_upmap_item
1854 tmp
.pg_to_raw_up(rep_pgid
, &rep_up
, &rep_up_primary
);
1855 std::cout
<< "pgid " << rep_pgid
<< " up " << rep_up
<< std::endl
;
1856 ASSERT_TRUE(rep_up
.size() == 12);
1857 from
= *(rep_up
.begin());
1858 ASSERT_TRUE(from
>= 0);
1859 auto from_rack
= tmp
.crush
->get_parent_of_type(from
, 3 /* rack */, rno
);
1860 set
<int> failure_domains
;
1861 for (auto &osd
: rep_up
) {
1862 failure_domains
.insert(tmp
.crush
->get_parent_of_type(osd
, 1 /* host */, rno
));
1864 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
1865 if (std::find(rep_up
.begin(), rep_up
.end(), i
) == rep_up
.end()) {
1866 auto to_rack
= tmp
.crush
->get_parent_of_type(i
, 3 /* rack */, rno
);
1867 auto to_host
= tmp
.crush
->get_parent_of_type(i
, 1 /* host */, rno
);
1868 if (to_rack
!= from_rack
&& failure_domains
.count(to_host
) == 0) {
1874 ASSERT_TRUE(to
>= 0);
1875 ASSERT_TRUE(from
!= to
);
1876 std::cout
<< "from " << from
<< " to " << to
<< std::endl
;
1877 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
1878 new_pg_upmap_items
.push_back(make_pair(from
, to
));
1879 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
1880 pending_inc
.new_pg_upmap_items
[rep_pgid
] =
1881 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
1882 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
1883 tmp
.apply_incremental(pending_inc
);
1884 ASSERT_TRUE(tmp
.have_pg_upmaps(rep_pgid
));
1887 // *maybe_remove_pg_upmaps* should not remove the above upmap_item
1888 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
1889 clean_pg_upmaps(g_ceph_context
, tmp
, pending_inc
);
1890 tmp
.apply_incremental(pending_inc
);
1891 ASSERT_TRUE(tmp
.have_pg_upmaps(rep_pgid
));
1896 TEST_F(OSDMapTest
, BUG_48884
)
1901 unsigned int host_index
= 1;
1902 for (unsigned int x
=0; x
< get_num_osds();) {
1903 // Create three hosts with four osds each
1904 for (unsigned int y
=0; y
< 4; y
++) {
1905 stringstream osd_name
;
1906 stringstream host_name
;
1907 vector
<string
> move_to
;
1908 osd_name
<< "osd." << x
;
1909 host_name
<< "host-" << host_index
;
1910 move_to
.push_back("root=default");
1911 move_to
.push_back("rack=localrack");
1912 string host_loc
= "host=" + host_name
.str();
1913 move_to
.push_back(host_loc
);
1914 int r
= crush_move(osdmap
, osd_name
.str(), move_to
);
1922 get_crush(osdmap
, crush
);
1923 auto host_id
= crush
.get_item_id("localhost");
1924 crush
.remove_item(g_ceph_context
, host_id
, false);
1925 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1926 pending_inc
.crush
.clear();
1927 crush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
1928 osdmap
.apply_incremental(pending_inc
);
1931 osd_stat_t stats
, stats_null
;
1932 stats
.statfs
.total
= 500000;
1933 stats
.statfs
.available
= 50000;
1934 stats
.statfs
.omap_allocated
= 50000;
1935 stats
.statfs
.internal_metadata
= 50000;
1936 stats_null
.statfs
.total
= 0;
1937 stats_null
.statfs
.available
= 0;
1938 stats_null
.statfs
.omap_allocated
= 0;
1939 stats_null
.statfs
.internal_metadata
= 0;
1940 for (unsigned int x
=0; x
< get_num_osds(); x
++) {
1941 if (x
> 3 && x
< 8) {
1942 pgmap
.osd_stat
.insert({x
,stats_null
});
1944 pgmap
.osd_stat
.insert({x
,stats
});
1949 boost::scoped_ptr
<Formatter
> f(Formatter::create("json-pretty"));
1950 print_osd_utilization(osdmap
, pgmap
, ss
, f
.get(), true, "root");
1952 parser
.parse(ss
.str().c_str(), static_cast<int>(ss
.str().size()));
1953 auto iter
= parser
.find_first();
1954 for (const auto& bucket
: (*iter
)->get_array_elements()) {
1956 parser2
.parse(bucket
.c_str(), static_cast<int>(bucket
.size()));
1957 auto* obj
= parser2
.find_obj("name");
1958 if (obj
->get_data().compare("localrack") == 0) {
1959 obj
= parser2
.find_obj("kb");
1960 ASSERT_EQ(obj
->get_data(), "3904");
1961 obj
= parser2
.find_obj("kb_used");
1962 ASSERT_EQ(obj
->get_data(), "3512");
1963 obj
= parser2
.find_obj("kb_used_omap");
1964 ASSERT_EQ(obj
->get_data(), "384");
1965 obj
= parser2
.find_obj("kb_used_meta");
1966 ASSERT_EQ(obj
->get_data(), "384");
1967 obj
= parser2
.find_obj("kb_avail");
1968 ASSERT_EQ(obj
->get_data(), "384");
1973 TEST_P(OSDMapTest
, BUG_51842
) {
1974 set_up_map(3, true);
1975 OSDMap tmp
; // use a tmpmap here, so we do not dirty origin map..
1976 tmp
.deepish_copy_from(osdmap
);
1977 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
1978 stringstream osd_name
;
1979 stringstream host_name
;
1980 vector
<string
> move_to
;
1981 osd_name
<< "osd." << i
;
1982 host_name
<< "host=host-" << i
;
1983 move_to
.push_back("root=infra-1706");
1984 move_to
.push_back(host_name
.str());
1985 auto r
= crush_move(tmp
, osd_name
.str(), move_to
);
1991 get_crush(tmp
, crush
);
1992 string rule_name
= "infra-1706";
1993 int rule_type
= pg_pool_t::TYPE_REPLICATED
;
1994 ASSERT_TRUE(!crush
.rule_exists(rule_name
));
1996 for (rno
= 0; rno
< crush
.get_max_rules(); rno
++) {
1997 if (!crush
.rule_exists(rno
))
2000 string root_bucket
= "infra-1706";
2001 int root
= crush
.get_item_id(root_bucket
);
2003 crush_rule
*rule
= crush_make_rule(steps
, rule_type
);
2005 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 5, 0);
2006 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSE_TRIES
, 100, 0);
2007 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, root
, 0);
2008 // note: it's ok to set like 'step chooseleaf_firstn 0 host'
2009 std::pair
<int, int> param
= GetParam();
2010 int rep_num
= std::get
<0>(param
);
2011 int domain
= std::get
<1>(param
);
2012 crush_rule_set_step(rule
, step
++, CRUSH_RULE_CHOOSELEAF_FIRSTN
, rep_num
, domain
);
2013 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
2014 ASSERT_TRUE(step
== steps
);
2015 auto r
= crush_add_rule(crush
.get_crush_map(), rule
, rno
);
2016 ASSERT_TRUE(r
>= 0);
2017 crush
.set_rule_name(rno
, rule_name
);
2019 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
2020 pending_inc
.crush
.clear();
2021 crush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
2022 tmp
.apply_incremental(pending_inc
);
2026 crush
.dump_tree(&oss
, NULL
);
2027 std::cout
<< oss
.str() << std::endl
;
2028 Formatter
*f
= Formatter::create("json-pretty");
2029 f
->open_object_section("crush_rules");
2030 crush
.dump_rules(f
);
2035 // create a replicated pool referencing the above rule
2036 int64_t pool_infra_1706
;
2038 OSDMap::Incremental
new_pool_inc(tmp
.get_epoch() + 1);
2039 new_pool_inc
.new_pool_max
= tmp
.get_pool_max();
2040 new_pool_inc
.fsid
= tmp
.get_fsid();
2042 pool_infra_1706
= ++new_pool_inc
.new_pool_max
;
2043 pg_pool_t
*p
= new_pool_inc
.get_new_pool(pool_infra_1706
, &empty
);
2047 p
->set_pgp_num(256);
2048 p
->type
= pg_pool_t::TYPE_REPLICATED
;
2049 p
->crush_rule
= rno
;
2050 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
2051 new_pool_inc
.new_pool_names
[pool_infra_1706
] = "pool_infra_1706";
2052 tmp
.apply_incremental(new_pool_inc
);
2056 pg_t
rep_pg(3, pool_infra_1706
);
2057 pg_t rep_pgid
= tmp
.raw_pg_to_pg(rep_pg
);
2058 pg_t
rep_pg2(4, pool_infra_1706
);
2059 pg_t rep_pgid2
= tmp
.raw_pg_to_pg(rep_pg2
);
2060 pg_t
rep_pg3(6, pool_infra_1706
);
2061 pg_t rep_pgid3
= tmp
.raw_pg_to_pg(rep_pg3
);
2063 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
2064 pending_inc
.new_pg_upmap
[rep_pgid
] = mempool::osdmap::vector
<int32_t>({1,0,2});
2065 pending_inc
.new_pg_upmap
[rep_pgid2
] = mempool::osdmap::vector
<int32_t>({1,2,0});
2066 pending_inc
.new_pg_upmap
[rep_pgid3
] = mempool::osdmap::vector
<int32_t>({1,2,0});
2067 tmp
.apply_incremental(pending_inc
);
2068 ASSERT_TRUE(tmp
.have_pg_upmaps(rep_pgid
));
2069 ASSERT_TRUE(tmp
.have_pg_upmaps(rep_pgid2
));
2070 ASSERT_TRUE(tmp
.have_pg_upmaps(rep_pgid3
));
2074 // now, set pool size to 1
2076 tmpmap
.deepish_copy_from(tmp
);
2077 OSDMap::Incremental
new_pool_inc(tmpmap
.get_epoch() + 1);
2078 pg_pool_t p
= *tmpmap
.get_pg_pool(pool_infra_1706
);
2080 p
.last_change
= new_pool_inc
.epoch
;
2081 new_pool_inc
.new_pools
[pool_infra_1706
] = p
;
2082 tmpmap
.apply_incremental(new_pool_inc
);
2084 OSDMap::Incremental
new_pending_inc(tmpmap
.get_epoch() + 1);
2085 clean_pg_upmaps(g_ceph_context
, tmpmap
, new_pending_inc
);
2086 tmpmap
.apply_incremental(new_pending_inc
);
2088 ASSERT_TRUE(!tmpmap
.have_pg_upmaps(rep_pgid
));
2089 ASSERT_TRUE(!tmpmap
.have_pg_upmaps(rep_pgid2
));
2090 ASSERT_TRUE(!tmpmap
.have_pg_upmaps(rep_pgid3
));
2093 // now, set pool size to 4
2095 tmpmap
.deepish_copy_from(tmp
);
2096 OSDMap::Incremental
new_pool_inc(tmpmap
.get_epoch() + 1);
2097 pg_pool_t p
= *tmpmap
.get_pg_pool(pool_infra_1706
);
2099 p
.last_change
= new_pool_inc
.epoch
;
2100 new_pool_inc
.new_pools
[pool_infra_1706
] = p
;
2101 tmpmap
.apply_incremental(new_pool_inc
);
2103 OSDMap::Incremental
new_pending_inc(tmpmap
.get_epoch() + 1);
2104 clean_pg_upmaps(g_ceph_context
, tmpmap
, new_pending_inc
);
2105 tmpmap
.apply_incremental(new_pending_inc
);
2107 ASSERT_TRUE(!tmpmap
.have_pg_upmaps(rep_pgid
));
2108 ASSERT_TRUE(!tmpmap
.have_pg_upmaps(rep_pgid2
));
2109 ASSERT_TRUE(!tmpmap
.have_pg_upmaps(rep_pgid3
));
2113 const string
OSDMapTest::range_addrs
[] = {"198.51.100.0/22", "10.2.5.102/32", "2001:db8::/48",
2114 "3001:db8::/72", "4001:db8::/30", "5001:db8::/64", "6001:db8::/128", "7001:db8::/127"};
2115 const string
OSDMapTest::ip_addrs
[] = {"198.51.100.14", "198.51.100.0", "198.51.103.255",
2117 "2001:db8:0:0:0:0:0:0", "2001:db8:0:0:0:0001:ffff:ffff",
2118 "2001:db8:0:ffff:ffff:ffff:ffff:ffff",
2119 "3001:db8:0:0:0:0:0:0", "3001:db8:0:0:0:0001:ffff:ffff",
2120 "3001:db8:0:0:00ff:ffff:ffff:ffff",
2121 "4001:db8::", "4001:db8:0:0:0:0001:ffff:ffff",
2122 "4001:dbb:ffff:ffff:ffff:ffff:ffff:ffff",
2123 "5001:db8:0:0:0:0:0:0", "5001:db8:0:0:0:0:ffff:ffff",
2124 "5001:db8:0:0:ffff:ffff:ffff:ffff",
2125 "6001:db8:0:0:0:0:0:0",
2126 "7001:db8:0:0:0:0:0:0", "7001:db8:0:0:0:0:0:0001"
2128 const string
OSDMapTest::unblocked_ip_addrs
[] = { "0.0.0.0", "1.1.1.1", "192.168.1.1",
2129 "198.51.99.255", "198.51.104.0",
2130 "10.2.5.101", "10.2.5.103",
2131 "2001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "2001:db8:0001::",
2132 "3001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "3001:db8:0:0:0100::",
2133 "4001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "4001:dbc::",
2134 "5001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "5001:db8:0:0001:0:0:0:0",
2135 "6001:db8:0:0:0:0:0:0001",
2136 "7001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "7001:db8:0:0:0:0:0:0002"
2139 TEST_F(OSDMapTest
, blocklisting_ips
) {
2140 set_up_map(6); //whatever
2142 OSDMap::Incremental
new_blocklist_inc(osdmap
.get_epoch() + 1);
2143 for (const auto& a
: ip_addrs
) {
2146 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2147 new_blocklist_inc
.new_blocklist
[addr
] = ceph_clock_now();
2149 osdmap
.apply_incremental(new_blocklist_inc
);
2151 for (const auto& a
: ip_addrs
) {
2154 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2155 ASSERT_TRUE(osdmap
.is_blocklisted(addr
, g_ceph_context
));
2157 for (const auto& a
: unblocked_ip_addrs
) {
2160 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2161 ASSERT_FALSE(osdmap
.is_blocklisted(addr
, g_ceph_context
));
2164 OSDMap::Incremental
rm_blocklist_inc(osdmap
.get_epoch() + 1);
2165 for (const auto& a
: ip_addrs
) {
2168 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2169 rm_blocklist_inc
.old_blocklist
.push_back(addr
);
2171 osdmap
.apply_incremental(rm_blocklist_inc
);
2172 for (const auto& a
: ip_addrs
) {
2175 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2176 ASSERT_FALSE(osdmap
.is_blocklisted(addr
, g_ceph_context
));
2178 for (const auto& a
: unblocked_ip_addrs
) {
2181 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2182 bool blocklisted
= osdmap
.is_blocklisted(addr
, g_ceph_context
);
2184 cout
<< "erroneously blocklisted " << addr
<< std::endl
;
2186 EXPECT_FALSE(blocklisted
);
2190 TEST_F(OSDMapTest
, blocklisting_ranges
) {
2191 set_up_map(6); //whatever
2192 OSDMap::Incremental
range_blocklist_inc(osdmap
.get_epoch() + 1);
2193 for (const auto& a
: range_addrs
) {
2196 addr
.type
= entity_addr_t::TYPE_CIDR
;
2197 range_blocklist_inc
.new_range_blocklist
[addr
] = ceph_clock_now();
2199 osdmap
.apply_incremental(range_blocklist_inc
);
2201 for (const auto& a
: ip_addrs
) {
2204 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2205 bool blocklisted
= osdmap
.is_blocklisted(addr
, g_ceph_context
);
2207 cout
<< "erroneously not blocklisted " << addr
<< std::endl
;
2209 ASSERT_TRUE(blocklisted
);
2211 for (const auto& a
: unblocked_ip_addrs
) {
2214 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2215 bool blocklisted
= osdmap
.is_blocklisted(addr
, g_ceph_context
);
2217 cout
<< "erroneously blocklisted " << addr
<< std::endl
;
2219 EXPECT_FALSE(blocklisted
);
2222 OSDMap::Incremental
rm_range_blocklist(osdmap
.get_epoch() + 1);
2223 for (const auto& a
: range_addrs
) {
2226 addr
.type
= entity_addr_t::TYPE_CIDR
;
2227 rm_range_blocklist
.old_range_blocklist
.push_back(addr
);
2229 osdmap
.apply_incremental(rm_range_blocklist
);
2231 for (const auto& a
: ip_addrs
) {
2234 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2235 ASSERT_FALSE(osdmap
.is_blocklisted(addr
, g_ceph_context
));
2237 for (const auto& a
: unblocked_ip_addrs
) {
2240 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2241 bool blocklisted
= osdmap
.is_blocklisted(addr
, g_ceph_context
);
2243 cout
<< "erroneously blocklisted " << addr
<< std::endl
;
2245 EXPECT_FALSE(blocklisted
);
2249 TEST_F(OSDMapTest
, blocklisting_everything
) {
2250 set_up_map(6); //whatever
2251 OSDMap::Incremental
range_blocklist_inc(osdmap
.get_epoch() + 1);
2252 entity_addr_t baddr
;
2253 baddr
.parse("2001:db8::/0");
2254 baddr
.type
= entity_addr_t::TYPE_CIDR
;
2255 range_blocklist_inc
.new_range_blocklist
[baddr
] = ceph_clock_now();
2256 osdmap
.apply_incremental(range_blocklist_inc
);
2258 for (const auto& a
: ip_addrs
) {
2261 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2262 if (addr
.is_ipv4()) continue;
2263 bool blocklisted
= osdmap
.is_blocklisted(addr
, g_ceph_context
);
2265 cout
<< "erroneously not blocklisted " << addr
<< std::endl
;
2267 ASSERT_TRUE(blocklisted
);
2269 for (const auto& a
: unblocked_ip_addrs
) {
2272 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2273 if (addr
.is_ipv4()) continue;
2274 bool blocklisted
= osdmap
.is_blocklisted(addr
, g_ceph_context
);
2276 cout
<< "erroneously not blocklisted " << addr
<< std::endl
;
2278 ASSERT_TRUE(blocklisted
);
2281 OSDMap::Incremental
swap_blocklist_inc(osdmap
.get_epoch()+1);
2282 swap_blocklist_inc
.old_range_blocklist
.push_back(baddr
);
2284 entity_addr_t caddr
;
2285 caddr
.parse("1.1.1.1/0");
2286 caddr
.type
= entity_addr_t::TYPE_CIDR
;
2287 swap_blocklist_inc
.new_range_blocklist
[caddr
] = ceph_clock_now();
2288 osdmap
.apply_incremental(swap_blocklist_inc
);
2290 for (const auto& a
: ip_addrs
) {
2293 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2294 if (!addr
.is_ipv4()) continue;
2295 bool blocklisted
= osdmap
.is_blocklisted(addr
, g_ceph_context
);
2297 cout
<< "erroneously not blocklisted " << addr
<< std::endl
;
2299 ASSERT_TRUE(blocklisted
);
2301 for (const auto& a
: unblocked_ip_addrs
) {
2304 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
2305 if (!addr
.is_ipv4()) continue;
2306 bool blocklisted
= osdmap
.is_blocklisted(addr
, g_ceph_context
);
2308 cout
<< "erroneously not blocklisted " << addr
<< std::endl
;
2310 ASSERT_TRUE(blocklisted
);
2314 TEST_F(OSDMapTest
, ReadBalanceScore1
) {
2315 std::srand ( unsigned ( std::time(0) ) );
2316 uint osd_rand
= rand() % 13;
2317 set_up_map(6 + osd_rand
); //whatever
2318 auto pools
= osdmap
.get_pools();
2319 for (auto &[pid
, pg_pool
] : pools
) {
2320 const pg_pool_t
*pi
= osdmap
.get_pg_pool(pid
);
2321 if (pi
->is_replicated()) {
2322 //cout << "pool " << pid << " " << pg_pool << std::endl;
2323 auto replica_count
= pi
->get_size();
2324 OSDMap::read_balance_info_t rbi
;
2325 auto rc
= osdmap
.calc_read_balance_score(g_ceph_context
, pid
, &rbi
);
2327 // "Normal" score is between 1 and num_osds
2328 ASSERT_TRUE(rc
== 0);
2329 ASSERT_TRUE(score_in_range(rbi
.adjusted_score
));
2330 ASSERT_TRUE(score_in_range(rbi
.acting_adj_score
));
2331 ASSERT_TRUE(rbi
.err_msg
.empty());
2333 // When all OSDs have primary_affinity 0, score should be 0
2334 auto num_osds
= get_num_osds();
2335 set_primary_affinity_all(0.);
2337 rc
= osdmap
.calc_read_balance_score(g_ceph_context
, pid
, &rbi
);
2338 ASSERT_TRUE(rc
< 0);
2339 ASSERT_TRUE(rbi
.adjusted_score
== 0.);
2340 ASSERT_TRUE(rbi
.acting_adj_score
== 0.);
2341 ASSERT_FALSE(rbi
.err_msg
.empty());
2343 std::vector
<uint
> osds
;
2344 for (uint i
= 0 ; i
< num_osds
; i
++) {
2348 // Change primary_affinity of some OSDs to 1 others are 0
2349 float fratio
= 1. / (float)replica_count
;
2350 for (int iter
= 0 ; iter
< 100 ; iter
++) { // run the test 100 times
2351 // Create random shuffle of OSDs
2352 std::random_shuffle (osds
.begin(), osds
.end());
2353 for (uint i
= 0 ; i
< num_osds
; i
++) {
2354 if ((float(i
+ 1) / float(num_osds
)) < fratio
) {
2355 ASSERT_TRUE(osds
[i
] < num_osds
);
2356 osdmap
.set_primary_affinity(osds
[i
], CEPH_OSD_MAX_PRIMARY_AFFINITY
);
2357 rc
= osdmap
.calc_read_balance_score(g_ceph_context
, pid
, &rbi
);
2359 ASSERT_TRUE(rc
< 0);
2360 ASSERT_TRUE(rbi
.adjusted_score
== 0.);
2361 ASSERT_TRUE(rbi
.acting_adj_score
== 0.);
2362 ASSERT_FALSE(rbi
.err_msg
.empty());
2366 ASSERT_TRUE(rbi
.adjusted_score
== 0.);
2367 ASSERT_TRUE(rbi
.acting_adj_score
== 0.);
2368 ASSERT_FALSE(rbi
.err_msg
.empty());
2371 ASSERT_TRUE(score_in_range(rbi
.acting_adj_score
, i
+ 1));
2372 ASSERT_TRUE(rbi
.err_msg
.empty());
2376 set_primary_affinity_all(0.);
2383 TEST_F(OSDMapTest
, ReadBalanceScore2
) {
2384 std::srand ( unsigned ( std::time(0) ) );
2385 uint osd_num
= 6 + rand() % 13;
2386 set_up_map(osd_num
, true);
2387 for (int i
= 0 ; i
< 100 ; i
++) { //running 100 random tests
2388 uint num_pa_osds
= 0;
2390 OSDMap::read_balance_info_t rbi
;
2392 // set pa for all osds
2393 for (uint j
= 0 ; j
< osd_num
; j
++) {
2394 uint pa
= 1 + rand() % 100;
2399 float fpa
= (float)pa
/ 100.;
2404 osdmap
.set_primary_affinity(j
, int(fpa
* CEPH_OSD_MAX_PRIMARY_AFFINITY
));
2406 float pa_ratio
= pa_sum
/ (float) osd_num
;
2408 // create a pool with the current osdmap configuration
2409 OSDMap::Incremental
new_pool_inc(osdmap
.get_epoch() + 1);
2410 new_pool_inc
.new_pool_max
= osdmap
.get_pool_max();
2411 new_pool_inc
.fsid
= osdmap
.get_fsid();
2412 string pool_name
= "rep_pool" + stringify(i
);
2413 uint64_t new_pid
= set_rep_pool(pool_name
, new_pool_inc
, false);
2414 ASSERT_TRUE(new_pid
> 0);
2415 osdmap
.apply_incremental(new_pool_inc
);
2417 // now run the test on the pool.
2418 const pg_pool_t
*pi
= osdmap
.get_pg_pool(new_pid
);
2419 ASSERT_NE(pi
, nullptr);
2420 ASSERT_TRUE(pi
->is_replicated());
2421 float fratio
= 1. / (float)pi
->get_size();
2422 auto rc
= osdmap
.calc_read_balance_score(g_ceph_context
, new_pid
, &rbi
);
2423 if (pa_ratio
< fratio
) {
2424 ASSERT_TRUE(rc
< 0);
2425 ASSERT_FALSE(rbi
.err_msg
.empty());
2426 ASSERT_TRUE(rbi
.acting_adj_score
== 0.);
2427 ASSERT_TRUE(rbi
.adjusted_score
== 0.);
2431 ASSERT_TRUE(rbi
.adjusted_score
== 0.);
2432 ASSERT_TRUE(rbi
.acting_adj_score
== 0.);
2433 ASSERT_FALSE(rbi
.err_msg
.empty());
2436 if (rbi
.err_msg
.empty()) {
2437 ASSERT_TRUE(score_in_range(rbi
.acting_adj_score
, num_pa_osds
));
2443 //TODO add ReadBalanceScore3 - with weighted osds.
2447 TEST_F(OSDMapTest
, read_balance_small_map
) {
2448 // Set up a map with 4 OSDs and default pools
2451 const vector
<string
> test_cases
= {"basic", "prim_affinity"};
2452 for (const auto & test
: test_cases
) {
2453 if (test
== "prim_affinity") {
2454 // Make osd.0 off-limits for primaries by giving it prim affinity 0
2455 OSDMap::Incremental
pending_inc0(osdmap
.get_epoch() + 1);
2456 pending_inc0
.new_primary_affinity
[0] = 0;
2457 osdmap
.apply_incremental(pending_inc0
);
2459 // Ensure osd.0 has no primaries assigned to it
2460 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd
, acting_prims_by_osd
;
2461 osdmap
.get_pgs_by_osd(g_ceph_context
, my_rep_pool
, &prim_pgs_by_osd
, &acting_prims_by_osd
);
2462 ASSERT_TRUE(prim_pgs_by_osd
[0].size() == 0);
2463 ASSERT_TRUE(acting_prims_by_osd
[0].size() == 0);
2466 // Make sure capacity is balanced first
2467 set
<int64_t> only_pools
;
2468 only_pools
.insert(my_rep_pool
);
2469 OSDMap::Incremental
pending_inc(osdmap
.get_epoch()+1);
2470 osdmap
.calc_pg_upmaps(g_ceph_context
,
2475 osdmap
.apply_incremental(pending_inc
);
2477 // Get read balance score before balancing
2478 OSDMap::read_balance_info_t rb_info
;
2479 auto rc
= osdmap
.calc_read_balance_score(g_ceph_context
, my_rep_pool
, &rb_info
);
2480 ASSERT_TRUE(rc
>= 0);
2481 float read_balance_score_before
= rb_info
.adjusted_score
;
2483 // Calculate desired prim distributions to verify later
2484 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd_2
, acting_prims_by_osd_2
;
2485 osdmap
.get_pgs_by_osd(g_ceph_context
, my_rep_pool
, &prim_pgs_by_osd_2
, &acting_prims_by_osd_2
);
2486 vector
<uint64_t> osds_to_check
;
2487 for (const auto & [osd
, pgs
] : prim_pgs_by_osd_2
) {
2488 osds_to_check
.push_back(osd
);
2490 map
<uint64_t,float> desired_prim_dist
;
2491 rc
= osdmap
.calc_desired_primary_distribution(g_ceph_context
, my_rep_pool
,
2492 osds_to_check
, desired_prim_dist
);
2493 ASSERT_TRUE(rc
>= 0);
2496 OSDMap::Incremental
pending_inc_2(osdmap
.get_epoch()+1);
2497 int num_changes
= osdmap
.balance_primaries(g_ceph_context
, my_rep_pool
, &pending_inc_2
, osdmap
);
2498 osdmap
.apply_incremental(pending_inc_2
);
2500 if (test
== "prim_affinity") {
2501 // Ensure osd.0 still has no primaries assigned to it
2502 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd_3
, acting_prims_by_osd_3
;
2503 osdmap
.get_pgs_by_osd(g_ceph_context
, my_rep_pool
, &prim_pgs_by_osd_3
, &acting_prims_by_osd_3
);
2504 ASSERT_TRUE(prim_pgs_by_osd_3
[0].size() == 0);
2505 ASSERT_TRUE(acting_prims_by_osd_3
[0].size() == 0);
2508 // Get read balance score after balancing
2509 rc
= osdmap
.calc_read_balance_score(g_ceph_context
, my_rep_pool
, &rb_info
);
2510 ASSERT_TRUE(rc
>= 0);
2511 float read_balance_score_after
= rb_info
.adjusted_score
;
2513 // Ensure the score hasn't gotten worse
2514 ASSERT_TRUE(read_balance_score_after
<= read_balance_score_before
);
2516 // Check for improvements
2517 if (num_changes
> 0) {
2518 ASSERT_TRUE(read_balance_score_after
< read_balance_score_before
);
2520 // Check num primaries for each OSD is within range
2521 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd_4
, acting_prims_by_osd_4
;
2522 osdmap
.get_pgs_by_osd(g_ceph_context
, my_rep_pool
, &prim_pgs_by_osd_4
, &acting_prims_by_osd_4
);
2523 for (const auto & [osd
, primaries
] : prim_pgs_by_osd_4
) {
2524 ASSERT_TRUE(primaries
.size() >= floor(desired_prim_dist
[osd
] - 1));
2525 ASSERT_TRUE(primaries
.size() <= ceil(desired_prim_dist
[osd
] + 1));
2531 TEST_F(OSDMapTest
, read_balance_large_map
) {
2532 // Set up a map with 60 OSDs and default pools
2535 const vector
<string
> test_cases
= {"basic", "prim_affinity"};
2536 for (const auto & test
: test_cases
) {
2537 if (test
== "prim_affinity") {
2538 // Make osd.0 off-limits for primaries by giving it prim affinity 0
2539 OSDMap::Incremental
pending_inc0(osdmap
.get_epoch() + 1);
2540 pending_inc0
.new_primary_affinity
[0] = 0;
2541 osdmap
.apply_incremental(pending_inc0
);
2543 // Ensure osd.0 has no primaries assigned to it
2544 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd
, acting_prims_by_osd
;
2545 osdmap
.get_pgs_by_osd(g_ceph_context
, my_rep_pool
, &prim_pgs_by_osd
, &acting_prims_by_osd
);
2546 ASSERT_TRUE(prim_pgs_by_osd
[0].size() == 0);
2547 ASSERT_TRUE(acting_prims_by_osd
[0].size() == 0);
2550 // Make sure capacity is balanced first
2551 set
<int64_t> only_pools
;
2552 only_pools
.insert(my_rep_pool
);
2553 OSDMap::Incremental
pending_inc(osdmap
.get_epoch()+1);
2554 osdmap
.calc_pg_upmaps(g_ceph_context
,
2559 osdmap
.apply_incremental(pending_inc
);
2561 // Get read balance score before balancing
2562 OSDMap::read_balance_info_t rb_info
;
2563 auto rc
= osdmap
.calc_read_balance_score(g_ceph_context
, my_rep_pool
, &rb_info
);
2564 ASSERT_TRUE(rc
>= 0);
2565 float read_balance_score_before
= rb_info
.adjusted_score
;
2567 // Calculate desired prim distributions to verify later
2568 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd_2
, acting_prims_by_osd_2
;
2569 osdmap
.get_pgs_by_osd(g_ceph_context
, my_rep_pool
, &prim_pgs_by_osd_2
, &acting_prims_by_osd_2
);
2570 vector
<uint64_t> osds_to_check
;
2571 for (auto [osd
, pgs
] : prim_pgs_by_osd_2
) {
2572 osds_to_check
.push_back(osd
);
2574 map
<uint64_t,float> desired_prim_dist
;
2575 rc
= osdmap
.calc_desired_primary_distribution(g_ceph_context
, my_rep_pool
,
2576 osds_to_check
, desired_prim_dist
);
2577 ASSERT_TRUE(rc
>= 0);
2580 OSDMap::Incremental
pending_inc_2(osdmap
.get_epoch()+1);
2581 int num_changes
= osdmap
.balance_primaries(g_ceph_context
, my_rep_pool
, &pending_inc_2
, osdmap
);
2582 osdmap
.apply_incremental(pending_inc_2
);
2584 if (test
== "prim_affinity") {
2585 // Ensure osd.0 still has no primaries assigned to it
2586 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd_3
, acting_prims_by_osd_3
;
2587 osdmap
.get_pgs_by_osd(g_ceph_context
, my_rep_pool
, &prim_pgs_by_osd_3
, &acting_prims_by_osd_3
);
2588 ASSERT_TRUE(prim_pgs_by_osd_3
[0].size() == 0);
2589 ASSERT_TRUE(acting_prims_by_osd_3
[0].size() == 0);
2592 // Get read balance score after balancing
2593 rc
= osdmap
.calc_read_balance_score(g_ceph_context
, my_rep_pool
, &rb_info
);
2594 ASSERT_TRUE(rc
>= 0);
2595 float read_balance_score_after
= rb_info
.adjusted_score
;
2597 // Ensure the score hasn't gotten worse
2598 ASSERT_TRUE(read_balance_score_after
<= read_balance_score_before
);
2600 // Check for improvements
2601 if (num_changes
> 0) {
2602 ASSERT_TRUE(read_balance_score_after
< read_balance_score_before
);
2604 // Check num primaries for each OSD is within range
2605 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd_4
, acting_prims_by_osd_4
;
2606 osdmap
.get_pgs_by_osd(g_ceph_context
, my_rep_pool
, &prim_pgs_by_osd_4
, &acting_prims_by_osd_4
);
2607 for (const auto & [osd
, primaries
] : prim_pgs_by_osd_4
) {
2608 ASSERT_TRUE(primaries
.size() >= floor(desired_prim_dist
[osd
] - 1));
2609 ASSERT_TRUE(primaries
.size() <= ceil(desired_prim_dist
[osd
] + 1));
2615 TEST_F(OSDMapTest
, read_balance_random_map
) {
2616 // Set up map with random number of OSDs
2617 std::srand ( unsigned ( std::time(0) ) );
2618 uint num_osds
= 3 + (rand() % 10);
2619 ASSERT_TRUE(num_osds
>= 3);
2620 set_up_map(num_osds
);
2622 const vector
<string
> test_cases
= {"basic", "prim_affinity"};
2623 for (const auto & test
: test_cases
) {
2624 uint rand_osd
= rand() % num_osds
;
2625 if (test
== "prim_affinity") {
2626 // Make a random OSD off-limits for primaries by giving it prim affinity 0
2627 ASSERT_TRUE(rand_osd
< num_osds
);
2628 OSDMap::Incremental
pending_inc0(osdmap
.get_epoch() + 1);
2629 pending_inc0
.new_primary_affinity
[rand_osd
] = 0;
2630 osdmap
.apply_incremental(pending_inc0
);
2632 // Ensure the random OSD has no primaries assigned to it
2633 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd
, acting_prims_by_osd
;
2634 osdmap
.get_pgs_by_osd(g_ceph_context
, my_rep_pool
, &prim_pgs_by_osd
, &acting_prims_by_osd
);
2635 ASSERT_TRUE(prim_pgs_by_osd
[rand_osd
].size() == 0);
2636 ASSERT_TRUE(acting_prims_by_osd
[rand_osd
].size() == 0);
2639 // Make sure capacity is balanced first
2640 set
<int64_t> only_pools
;
2641 only_pools
.insert(my_rep_pool
);
2642 OSDMap::Incremental
pending_inc(osdmap
.get_epoch()+1);
2643 osdmap
.calc_pg_upmaps(g_ceph_context
,
2648 osdmap
.apply_incremental(pending_inc
);
2650 // Get read balance score before balancing
2651 OSDMap::read_balance_info_t rb_info
;
2652 auto rc
= osdmap
.calc_read_balance_score(g_ceph_context
, my_rep_pool
, &rb_info
);
2653 ASSERT_TRUE(rc
>= 0);
2654 float read_balance_score_before
= rb_info
.adjusted_score
;
2656 // Calculate desired prim distributions to verify later
2657 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd_2
, acting_prims_by_osd_2
;
2658 osdmap
.get_pgs_by_osd(g_ceph_context
, my_rep_pool
, &prim_pgs_by_osd_2
, &acting_prims_by_osd_2
);
2659 vector
<uint64_t> osds_to_check
;
2660 for (const auto & [osd
, pgs
] : prim_pgs_by_osd_2
) {
2661 osds_to_check
.push_back(osd
);
2663 map
<uint64_t,float> desired_prim_dist
;
2664 rc
= osdmap
.calc_desired_primary_distribution(g_ceph_context
, my_rep_pool
,
2665 osds_to_check
, desired_prim_dist
);
2666 ASSERT_TRUE(rc
>= 0);
2669 OSDMap::Incremental
pending_inc_2(osdmap
.get_epoch()+1);
2670 int num_changes
= osdmap
.balance_primaries(g_ceph_context
, my_rep_pool
, &pending_inc_2
, osdmap
);
2671 osdmap
.apply_incremental(pending_inc_2
);
2673 if (test
== "prim_affinity") {
2674 // Ensure the random OSD still has no primaries assigned to it
2675 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd_3
, acting_prims_by_osd_3
;
2676 osdmap
.get_pgs_by_osd(g_ceph_context
, my_rep_pool
, &prim_pgs_by_osd_3
, &acting_prims_by_osd_3
);
2677 ASSERT_TRUE(prim_pgs_by_osd_3
[rand_osd
].size() == 0);
2678 ASSERT_TRUE(acting_prims_by_osd_3
[rand_osd
].size() == 0);
2681 // Get read balance score after balancing
2682 rc
= osdmap
.calc_read_balance_score(g_ceph_context
, my_rep_pool
, &rb_info
);
2683 ASSERT_TRUE(rc
>= 0);
2684 float read_balance_score_after
= rb_info
.adjusted_score
;
2686 // Ensure the score hasn't gotten worse
2687 ASSERT_TRUE(read_balance_score_after
<= read_balance_score_before
);
2689 // Check for improvements
2690 if (num_changes
> 0) {
2691 ASSERT_TRUE(read_balance_score_after
< read_balance_score_before
);
2693 // Check num primaries for each OSD is within range
2694 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd_4
, acting_prims_by_osd_4
;
2695 osdmap
.get_pgs_by_osd(g_ceph_context
, my_rep_pool
, &prim_pgs_by_osd_4
, &acting_prims_by_osd_4
);
2696 for (auto [osd
, primaries
] : prim_pgs_by_osd_4
) {
2697 ASSERT_TRUE(primaries
.size() >= floor(desired_prim_dist
[osd
] - 1));
2698 ASSERT_TRUE(primaries
.size() <= ceil(desired_prim_dist
[osd
] + 1));
2700 for (auto [osd
, primaries
] : prim_pgs_by_osd_4
) {
2701 ASSERT_TRUE(primaries
.size() >= floor(desired_prim_dist
[osd
] - 1));
2702 ASSERT_TRUE(primaries
.size() <= ceil(desired_prim_dist
[osd
] + 1));
2708 INSTANTIATE_TEST_SUITE_P(
2712 std::make_pair
<int, int>(0, 1), // chooseleaf firstn 0 host
2713 std::make_pair
<int, int>(3, 1), // chooseleaf firstn 3 host
2714 std::make_pair
<int, int>(0, 0), // chooseleaf firstn 0 osd
2715 std::make_pair
<int, int>(3, 0) // chooseleaf firstn 3 osd