1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 #include "gtest/gtest.h"
3 #include "osd/OSDMap.h"
4 #include "osd/OSDMapMapping.h"
6 #include "global/global_context.h"
7 #include "global/global_init.h"
8 #include "common/common_init.h"
9 #include "common/ceph_argparse.h"
15 int main(int argc
, char **argv
) {
16 std::vector
<const char*> args(argv
, argv
+argc
);
18 auto cct
= global_init(nullptr, args
, CEPH_ENTITY_TYPE_CLIENT
,
19 CODE_ENVIRONMENT_UTILITY
,
20 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE
);
21 common_init_finish(g_ceph_context
);
22 // make sure we have 3 copies, or some tests won't work
23 g_ceph_context
->_conf
->set_val("osd_pool_default_size", "3", false);
24 // our map is flat, so just try and split across OSDs, not hosts or whatever
25 g_ceph_context
->_conf
->set_val("osd_crush_chooseleaf_type", "0", false);
26 ::testing::InitGoogleTest(&argc
, argv
);
27 return RUN_ALL_TESTS();
30 class OSDMapTest
: public testing::Test
{
31 const static int num_osds
= 6;
34 OSDMapMapping mapping
;
35 const uint64_t my_ec_pool
= 1;
36 const uint64_t my_rep_pool
= 2;
43 osdmap
.build_simple(g_ceph_context
, 0, fsid
, num_osds
);
44 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
45 pending_inc
.fsid
= osdmap
.get_fsid();
46 entity_addr_t sample_addr
;
48 for (int i
= 0; i
< num_osds
; ++i
) {
49 sample_uuid
.generate_random();
50 sample_addr
.nonce
= i
;
51 pending_inc
.new_state
[i
] = CEPH_OSD_EXISTS
| CEPH_OSD_NEW
;
52 pending_inc
.new_up_client
[i
] = sample_addr
;
53 pending_inc
.new_up_cluster
[i
] = sample_addr
;
54 pending_inc
.new_hb_back_up
[i
] = sample_addr
;
55 pending_inc
.new_hb_front_up
[i
] = sample_addr
;
56 pending_inc
.new_weight
[i
] = CEPH_OSD_IN
;
57 pending_inc
.new_uuid
[i
] = sample_uuid
;
59 osdmap
.apply_incremental(pending_inc
);
61 // Create an EC ruleset and a pool using it
62 int r
= osdmap
.crush
->add_simple_rule(
63 "erasure", "default", "osd", "",
64 "indep", pg_pool_t::TYPE_ERASURE
,
67 OSDMap::Incremental
new_pool_inc(osdmap
.get_epoch() + 1);
68 new_pool_inc
.new_pool_max
= osdmap
.get_pool_max();
69 new_pool_inc
.fsid
= osdmap
.get_fsid();
72 uint64_t pool_id
= ++new_pool_inc
.new_pool_max
;
73 assert(pool_id
== my_ec_pool
);
74 pg_pool_t
*p
= new_pool_inc
.get_new_pool(pool_id
, &empty
);
78 p
->type
= pg_pool_t::TYPE_ERASURE
;
80 new_pool_inc
.new_pool_names
[pool_id
] = "ec";
81 // and a replicated pool
82 pool_id
= ++new_pool_inc
.new_pool_max
;
83 assert(pool_id
== my_rep_pool
);
84 p
= new_pool_inc
.get_new_pool(pool_id
, &empty
);
88 p
->type
= pg_pool_t::TYPE_REPLICATED
;
90 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
91 new_pool_inc
.new_pool_names
[pool_id
] = "reppool";
92 osdmap
.apply_incremental(new_pool_inc
);
94 unsigned int get_num_osds() { return num_osds
; }
95 void get_crush(CrushWrapper
& newcrush
) {
97 osdmap
.crush
->encode(bl
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
98 bufferlist::iterator p
= bl
.begin();
101 int crush_move(const string
&name
, const vector
<string
> &argvec
) {
102 map
<string
,string
> loc
;
103 CrushWrapper::parse_loc_map(argvec
, &loc
);
104 CrushWrapper newcrush
;
106 if (!newcrush
.name_exists(name
)) {
109 int id
= newcrush
.get_item_id(name
);
111 if (!newcrush
.check_item_loc(g_ceph_context
, id
, loc
, (int *)NULL
)) {
113 err
= newcrush
.create_or_move_item(g_ceph_context
, id
, 0, name
, loc
);
115 err
= newcrush
.move_bucket(g_ceph_context
, id
, loc
);
118 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
119 pending_inc
.crush
.clear();
120 newcrush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
121 osdmap
.apply_incremental(pending_inc
);
130 int crush_rule_create_replicated(const string
&name
,
132 const string
&type
) {
133 if (osdmap
.crush
->rule_exists(name
)) {
134 return osdmap
.crush
->get_rule_id(name
);
136 CrushWrapper newcrush
;
140 int ruleno
= newcrush
.add_simple_rule(
141 name
, root
, type
, device_class
,
142 "firstn", pg_pool_t::TYPE_REPLICATED
, &ss
);
144 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
145 pending_inc
.crush
.clear();
146 newcrush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
147 osdmap
.apply_incremental(pending_inc
);
151 void test_mappings(int pool
,
155 vector
<int> *primary
) {
156 mapping
.update(osdmap
);
157 for (int i
=0; i
<num
; ++i
) {
158 vector
<int> up
, acting
;
159 int up_primary
, acting_primary
;
161 osdmap
.pg_to_up_acting_osds(pgid
,
162 &up
, &up_primary
, &acting
, &acting_primary
);
163 for (unsigned j
=0; j
<acting
.size(); ++j
)
166 (*first
)[acting
[0]]++;
167 if (acting_primary
>= 0)
168 (*primary
)[acting_primary
]++;
170 // compare to precalc mapping
171 vector
<int> up2
, acting2
;
172 int up_primary2
, acting_primary2
;
173 pgid
= osdmap
.raw_pg_to_pg(pgid
);
174 mapping
.get(pgid
, &up2
, &up_primary2
, &acting2
, &acting_primary2
);
176 ASSERT_EQ(up_primary
, up_primary2
);
177 ASSERT_EQ(acting
, acting2
);
178 ASSERT_EQ(acting_primary
, acting_primary2
);
180 cout
<< "any: " << *any
<< std::endl
;;
181 cout
<< "first: " << *first
<< std::endl
;;
182 cout
<< "primary: " << *primary
<< std::endl
;;
186 TEST_F(OSDMapTest
, Create
) {
188 ASSERT_EQ(get_num_osds(), (unsigned)osdmap
.get_max_osd());
189 ASSERT_EQ(get_num_osds(), osdmap
.get_num_in_osds());
192 TEST_F(OSDMapTest
, Features
) {
195 uint64_t features
= osdmap
.get_features(CEPH_ENTITY_TYPE_OSD
, NULL
);
196 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES
);
197 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES2
);
198 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES3
);
199 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_V2
);
200 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_ERASURE_CODES
);
201 ASSERT_TRUE(features
& CEPH_FEATURE_OSDHASHPSPOOL
);
202 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_PRIMARY_AFFINITY
);
204 // clients have a slightly different view
205 features
= osdmap
.get_features(CEPH_ENTITY_TYPE_CLIENT
, NULL
);
206 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES
);
207 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES2
);
208 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES3
);
209 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_V2
);
210 ASSERT_FALSE(features
& CEPH_FEATURE_OSD_ERASURE_CODES
); // dont' need this
211 ASSERT_TRUE(features
& CEPH_FEATURE_OSDHASHPSPOOL
);
212 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_PRIMARY_AFFINITY
);
214 // remove teh EC pool, but leave the rule. add primary affinity.
216 OSDMap::Incremental
new_pool_inc(osdmap
.get_epoch() + 1);
217 new_pool_inc
.old_pools
.insert(osdmap
.lookup_pg_pool_name("ec"));
218 new_pool_inc
.new_primary_affinity
[0] = 0x8000;
219 osdmap
.apply_incremental(new_pool_inc
);
222 features
= osdmap
.get_features(CEPH_ENTITY_TYPE_MON
, NULL
);
223 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES
);
224 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES2
);
225 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES3
); // shared bit with primary affinity
226 ASSERT_FALSE(features
& CEPH_FEATURE_CRUSH_V2
);
227 ASSERT_FALSE(features
& CEPH_FEATURE_OSD_ERASURE_CODES
);
228 ASSERT_TRUE(features
& CEPH_FEATURE_OSDHASHPSPOOL
);
229 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_PRIMARY_AFFINITY
);
231 // FIXME: test tiering feature bits
234 TEST_F(OSDMapTest
, MapPG
) {
237 std::cerr
<< " osdmap.pool_max==" << osdmap
.get_pool_max() << std::endl
;
238 pg_t
rawpg(0, my_rep_pool
, -1);
239 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
240 vector
<int> up_osds
, acting_osds
;
241 int up_primary
, acting_primary
;
243 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
244 &acting_osds
, &acting_primary
);
246 vector
<int> old_up_osds
, old_acting_osds
;
247 osdmap
.pg_to_up_acting_osds(pgid
, old_up_osds
, old_acting_osds
);
248 ASSERT_EQ(old_up_osds
, up_osds
);
249 ASSERT_EQ(old_acting_osds
, acting_osds
);
251 ASSERT_EQ(osdmap
.get_pg_pool(my_rep_pool
)->get_size(), up_osds
.size());
254 TEST_F(OSDMapTest
, MapFunctionsMatch
) {
255 // TODO: make sure pg_to_up_acting_osds and pg_to_acting_osds match
257 pg_t
rawpg(0, my_rep_pool
, -1);
258 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
259 vector
<int> up_osds
, acting_osds
;
260 int up_primary
, acting_primary
;
262 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
263 &acting_osds
, &acting_primary
);
265 vector
<int> up_osds_two
, acting_osds_two
;
267 osdmap
.pg_to_up_acting_osds(pgid
, up_osds_two
, acting_osds_two
);
269 ASSERT_EQ(up_osds
, up_osds_two
);
270 ASSERT_EQ(acting_osds
, acting_osds_two
);
272 int acting_primary_two
;
273 osdmap
.pg_to_acting_osds(pgid
, &acting_osds_two
, &acting_primary_two
);
274 EXPECT_EQ(acting_osds
, acting_osds_two
);
275 EXPECT_EQ(acting_primary
, acting_primary_two
);
276 osdmap
.pg_to_acting_osds(pgid
, acting_osds_two
);
277 EXPECT_EQ(acting_osds
, acting_osds_two
);
280 /** This test must be removed or modified appropriately when we allow
281 * other ways to specify a primary. */
282 TEST_F(OSDMapTest
, PrimaryIsFirst
) {
285 pg_t
rawpg(0, my_rep_pool
, -1);
286 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
287 vector
<int> up_osds
, acting_osds
;
288 int up_primary
, acting_primary
;
290 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
291 &acting_osds
, &acting_primary
);
292 EXPECT_EQ(up_osds
[0], up_primary
);
293 EXPECT_EQ(acting_osds
[0], acting_primary
);
296 TEST_F(OSDMapTest
, PGTempRespected
) {
299 pg_t
rawpg(0, my_rep_pool
, -1);
300 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
301 vector
<int> up_osds
, acting_osds
;
302 int up_primary
, acting_primary
;
304 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
305 &acting_osds
, &acting_primary
);
307 // copy and swap first and last element in acting_osds
308 vector
<int> new_acting_osds(acting_osds
);
309 int first
= new_acting_osds
[0];
310 new_acting_osds
[0] = *new_acting_osds
.rbegin();
311 *new_acting_osds
.rbegin() = first
;
313 // apply pg_temp to osdmap
314 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
315 pgtemp_map
.new_pg_temp
[pgid
] = mempool::osdmap::vector
<int>(
316 new_acting_osds
.begin(), new_acting_osds
.end());
317 osdmap
.apply_incremental(pgtemp_map
);
319 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
320 &acting_osds
, &acting_primary
);
321 EXPECT_EQ(new_acting_osds
, acting_osds
);
324 TEST_F(OSDMapTest
, PrimaryTempRespected
) {
327 pg_t
rawpg(0, my_rep_pool
, -1);
328 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
330 vector
<int> acting_osds
;
331 int up_primary
, acting_primary
;
333 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
334 &acting_osds
, &acting_primary
);
336 // make second OSD primary via incremental
337 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
338 pgtemp_map
.new_primary_temp
[pgid
] = acting_osds
[1];
339 osdmap
.apply_incremental(pgtemp_map
);
341 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
342 &acting_osds
, &acting_primary
);
343 EXPECT_EQ(acting_primary
, acting_osds
[1]);
346 TEST_F(OSDMapTest
, CleanTemps
) {
349 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
350 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 2);
351 pg_t pga
= osdmap
.raw_pg_to_pg(pg_t(0, my_rep_pool
));
353 vector
<int> up_osds
, acting_osds
;
354 int up_primary
, acting_primary
;
355 osdmap
.pg_to_up_acting_osds(pga
, &up_osds
, &up_primary
,
356 &acting_osds
, &acting_primary
);
357 pgtemp_map
.new_pg_temp
[pga
] = mempool::osdmap::vector
<int>(
358 up_osds
.begin(), up_osds
.end());
359 pgtemp_map
.new_primary_temp
[pga
] = up_primary
;
361 pg_t pgb
= osdmap
.raw_pg_to_pg(pg_t(1, my_rep_pool
));
363 vector
<int> up_osds
, acting_osds
;
364 int up_primary
, acting_primary
;
365 osdmap
.pg_to_up_acting_osds(pgb
, &up_osds
, &up_primary
,
366 &acting_osds
, &acting_primary
);
367 pending_inc
.new_pg_temp
[pgb
] = mempool::osdmap::vector
<int>(
368 up_osds
.begin(), up_osds
.end());
369 pending_inc
.new_primary_temp
[pgb
] = up_primary
;
372 osdmap
.apply_incremental(pgtemp_map
);
374 OSDMap::clean_temps(g_ceph_context
, osdmap
, &pending_inc
);
376 EXPECT_TRUE(pending_inc
.new_pg_temp
.count(pga
) &&
377 pending_inc
.new_pg_temp
[pga
].size() == 0);
378 EXPECT_EQ(-1, pending_inc
.new_primary_temp
[pga
]);
380 EXPECT_TRUE(!pending_inc
.new_pg_temp
.count(pgb
) &&
381 !pending_inc
.new_primary_temp
.count(pgb
));
384 TEST_F(OSDMapTest
, KeepsNecessaryTemps
) {
387 pg_t
rawpg(0, my_rep_pool
, -1);
388 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
389 vector
<int> up_osds
, acting_osds
;
390 int up_primary
, acting_primary
;
392 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
393 &acting_osds
, &acting_primary
);
395 // find unused OSD and stick it in there
396 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
397 // find an unused osd and put it in place of the first one
399 for(; i
!= (int)get_num_osds(); ++i
) {
401 for (vector
<int>::iterator osd_it
= up_osds
.begin();
402 osd_it
!= up_osds
.end();
414 if (i
== (int)get_num_osds())
415 FAIL() << "did not find unused OSD for temp mapping";
417 pgtemp_map
.new_pg_temp
[pgid
] = mempool::osdmap::vector
<int>(
418 up_osds
.begin(), up_osds
.end());
419 pgtemp_map
.new_primary_temp
[pgid
] = up_osds
[1];
420 osdmap
.apply_incremental(pgtemp_map
);
422 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
424 OSDMap::clean_temps(g_ceph_context
, osdmap
, &pending_inc
);
425 EXPECT_FALSE(pending_inc
.new_pg_temp
.count(pgid
));
426 EXPECT_FALSE(pending_inc
.new_primary_temp
.count(pgid
));
429 TEST_F(OSDMapTest
, PrimaryAffinity
) {
432 int n
= get_num_osds();
433 for (map
<int64_t,pg_pool_t
>::const_iterator p
= osdmap
.get_pools().begin();
434 p
!= osdmap
.get_pools().end();
437 int expect_primary
= 10000 / n
;
438 cout
<< "pool " << pool
<< " size " << (int)p
->second
.size
439 << " expect_primary " << expect_primary
<< std::endl
;
441 vector
<int> any(n
, 0);
442 vector
<int> first(n
, 0);
443 vector
<int> primary(n
, 0);
444 test_mappings(pool
, 10000, &any
, &first
, &primary
);
445 for (int i
=0; i
<n
; ++i
) {
446 ASSERT_LT(0, any
[i
]);
447 ASSERT_LT(0, first
[i
]);
448 ASSERT_LT(0, primary
[i
]);
452 osdmap
.set_primary_affinity(0, 0);
453 osdmap
.set_primary_affinity(1, 0);
455 vector
<int> any(n
, 0);
456 vector
<int> first(n
, 0);
457 vector
<int> primary(n
, 0);
458 test_mappings(pool
, 10000, &any
, &first
, &primary
);
459 for (int i
=0; i
<n
; ++i
) {
460 ASSERT_LT(0, any
[i
]);
462 ASSERT_LT(0, first
[i
]);
463 ASSERT_LT(0, primary
[i
]);
465 if (p
->second
.is_replicated()) {
466 ASSERT_EQ(0, first
[i
]);
468 ASSERT_EQ(0, primary
[i
]);
473 osdmap
.set_primary_affinity(0, 0x8000);
474 osdmap
.set_primary_affinity(1, 0);
476 vector
<int> any(n
, 0);
477 vector
<int> first(n
, 0);
478 vector
<int> primary(n
, 0);
479 test_mappings(pool
, 10000, &any
, &first
, &primary
);
480 int expect
= (10000 / (n
-2)) / 2; // half weight
481 cout
<< "expect " << expect
<< std::endl
;
482 for (int i
=0; i
<n
; ++i
) {
483 ASSERT_LT(0, any
[i
]);
485 ASSERT_LT(0, first
[i
]);
486 ASSERT_LT(0, primary
[i
]);
488 if (p
->second
.is_replicated()) {
489 ASSERT_EQ(0, first
[i
]);
491 ASSERT_EQ(0, primary
[i
]);
493 ASSERT_LT(expect
*2/3, primary
[0]);
494 ASSERT_GT(expect
*4/3, primary
[0]);
499 osdmap
.set_primary_affinity(0, 0x10000);
500 osdmap
.set_primary_affinity(1, 0x10000);
504 TEST_F(OSDMapTest
, parse_osd_id_list
) {
508 osdmap
.get_all_osds(all
);
510 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"osd.0"}, &out
, &cout
));
511 ASSERT_EQ(1, out
.size());
512 ASSERT_EQ(0, *out
.begin());
514 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"1"}, &out
, &cout
));
515 ASSERT_EQ(1, out
.size());
516 ASSERT_EQ(1, *out
.begin());
518 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"osd.0","osd.1"}, &out
, &cout
));
519 ASSERT_EQ(2, out
.size());
520 ASSERT_EQ(0, *out
.begin());
521 ASSERT_EQ(1, *out
.rbegin());
523 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"osd.0","1"}, &out
, &cout
));
524 ASSERT_EQ(2, out
.size());
525 ASSERT_EQ(0, *out
.begin());
526 ASSERT_EQ(1, *out
.rbegin());
528 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"*"}, &out
, &cout
));
529 ASSERT_EQ(all
.size(), out
.size());
532 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"all"}, &out
, &cout
));
535 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"any"}, &out
, &cout
));
538 ASSERT_EQ(-EINVAL
, osdmap
.parse_osd_id_list({"foo"}, &out
, &cout
));
539 ASSERT_EQ(-EINVAL
, osdmap
.parse_osd_id_list({"-12"}, &out
, &cout
));
542 TEST_F(OSDMapTest
, CleanPGUpmaps
) {
545 // build a crush rule of type host
546 const int expected_host_num
= 3;
547 int osd_per_host
= get_num_osds() / expected_host_num
;
548 ASSERT_GE(2, osd_per_host
);
550 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
551 if (i
&& i
% osd_per_host
== 0) {
554 stringstream osd_name
;
555 stringstream host_name
;
556 vector
<string
> move_to
;
557 osd_name
<< "osd." << i
;
558 host_name
<< "host-" << index
;
559 move_to
.push_back("root=default");
560 string host_loc
= "host=" + host_name
.str();
561 move_to
.push_back(host_loc
);
562 int r
= crush_move(osd_name
.str(), move_to
);
565 const string upmap_rule
= "upmap";
566 int upmap_rule_no
= crush_rule_create_replicated(
567 upmap_rule
, "default", "host");
568 ASSERT_LT(0, upmap_rule_no
);
570 // create a replicated pool which references the above rule
571 OSDMap::Incremental
new_pool_inc(osdmap
.get_epoch() + 1);
572 new_pool_inc
.new_pool_max
= osdmap
.get_pool_max();
573 new_pool_inc
.fsid
= osdmap
.get_fsid();
575 uint64_t upmap_pool_id
= ++new_pool_inc
.new_pool_max
;
576 pg_pool_t
*p
= new_pool_inc
.get_new_pool(upmap_pool_id
, &empty
);
580 p
->type
= pg_pool_t::TYPE_REPLICATED
;
581 p
->crush_rule
= upmap_rule_no
;
582 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
583 new_pool_inc
.new_pool_names
[upmap_pool_id
] = "upmap_pool";
584 osdmap
.apply_incremental(new_pool_inc
);
586 pg_t
rawpg(0, upmap_pool_id
);
587 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
590 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
591 ASSERT_LT(1U, up
.size());
593 // validate we won't have two OSDs from a same host
594 int parent_0
= osdmap
.crush
->get_parent_of_type(up
[0],
595 osdmap
.crush
->get_type_id("host"));
596 int parent_1
= osdmap
.crush
->get_parent_of_type(up
[1],
597 osdmap
.crush
->get_type_id("host"));
598 ASSERT_TRUE(parent_0
!= parent_1
);
602 // cancel stale upmaps
603 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
605 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
606 if (std::find(up
.begin(), up
.end(), i
) == up
.end()) {
611 ASSERT_TRUE(from
>= 0);
613 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
614 if (std::find(up
.begin(), up
.end(), i
) == up
.end() && i
!= from
) {
619 ASSERT_TRUE(to
>= 0);
620 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
621 new_pg_upmap_items
.push_back(make_pair(from
, to
));
622 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
623 pending_inc
.new_pg_upmap_items
[pgid
] =
624 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
625 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
627 nextmap
.deepish_copy_from(osdmap
);
628 nextmap
.apply_incremental(pending_inc
);
629 ASSERT_TRUE(nextmap
.have_pg_upmaps(pgid
));
630 OSDMap::Incremental
new_pending_inc(nextmap
.get_epoch() + 1);
631 nextmap
.clean_pg_upmaps(g_ceph_context
, &new_pending_inc
);
632 nextmap
.apply_incremental(new_pending_inc
);
633 ASSERT_TRUE(!nextmap
.have_pg_upmaps(pgid
));
637 // https://tracker.ceph.com/issues/37493
638 pg_t
ec_pg(0, my_ec_pool
);
639 pg_t ec_pgid
= osdmap
.raw_pg_to_pg(ec_pg
);
640 OSDMap tmpmap
; // use a tmpmap here, so we do not dirty origin map..
644 // insert a valid pg_upmap_item
647 osdmap
.pg_to_raw_up(ec_pgid
, &ec_up
, &ec_up_primary
);
648 ASSERT_TRUE(!ec_up
.empty());
649 from
= *(ec_up
.begin());
650 ASSERT_TRUE(from
>= 0);
651 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
652 if (std::find(ec_up
.begin(), ec_up
.end(), i
) == ec_up
.end()) {
657 ASSERT_TRUE(to
>= 0);
658 ASSERT_TRUE(from
!= to
);
659 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
660 new_pg_upmap_items
.push_back(make_pair(from
, to
));
661 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
662 pending_inc
.new_pg_upmap_items
[ec_pgid
] =
663 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
664 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
665 tmpmap
.deepish_copy_from(osdmap
);
666 tmpmap
.apply_incremental(pending_inc
);
667 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
670 // mark one of the target OSDs of the above pg_upmap_item as down
671 OSDMap::Incremental
pending_inc(tmpmap
.get_epoch() + 1);
672 pending_inc
.new_state
[to
] = CEPH_OSD_UP
;
673 tmpmap
.apply_incremental(pending_inc
);
674 ASSERT_TRUE(!tmpmap
.is_up(to
));
675 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
678 // confirm *maybe_remove_pg_upmaps* won't do anything bad
679 OSDMap::Incremental
pending_inc(tmpmap
.get_epoch() + 1);
680 tmpmap
.maybe_remove_pg_upmaps(g_ceph_context
, tmpmap
, &pending_inc
);
681 tmpmap
.apply_incremental(pending_inc
);
682 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
687 // http://tracker.ceph.com/issues/37501
688 pg_t
ec_pg(0, my_ec_pool
);
689 pg_t ec_pgid
= osdmap
.raw_pg_to_pg(ec_pg
);
690 OSDMap tmpmap
; // use a tmpmap here, so we do not dirty origin map..
694 // insert a valid pg_upmap_item
697 osdmap
.pg_to_raw_up(ec_pgid
, &ec_up
, &ec_up_primary
);
698 ASSERT_TRUE(!ec_up
.empty());
699 from
= *(ec_up
.begin());
700 ASSERT_TRUE(from
>= 0);
701 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
702 if (std::find(ec_up
.begin(), ec_up
.end(), i
) == ec_up
.end()) {
707 ASSERT_TRUE(to
>= 0);
708 ASSERT_TRUE(from
!= to
);
709 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
710 new_pg_upmap_items
.push_back(make_pair(from
, to
));
711 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
712 pending_inc
.new_pg_upmap_items
[ec_pgid
] =
713 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
714 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
715 tmpmap
.deepish_copy_from(osdmap
);
716 tmpmap
.apply_incremental(pending_inc
);
717 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
720 // mark one of the target OSDs of the above pg_upmap_item as out
721 OSDMap::Incremental
pending_inc(tmpmap
.get_epoch() + 1);
722 pending_inc
.new_weight
[to
] = CEPH_OSD_OUT
;
723 tmpmap
.apply_incremental(pending_inc
);
724 ASSERT_TRUE(tmpmap
.is_out(to
));
725 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
728 // *maybe_remove_pg_upmaps* should be able to remove the above *bad* mapping
729 OSDMap::Incremental
pending_inc(tmpmap
.get_epoch() + 1);
731 nextmap
.deepish_copy_from(tmpmap
);
732 nextmap
.maybe_remove_pg_upmaps(g_ceph_context
, nextmap
, &pending_inc
);
733 tmpmap
.apply_incremental(pending_inc
);
734 ASSERT_TRUE(!tmpmap
.have_pg_upmaps(ec_pgid
));
741 // STEP-1: enumerate all children of up[0]'s parent,
742 // replace up[1] with one of them (other than up[0])
743 int parent
= osdmap
.crush
->get_parent_of_type(up
[0],
744 osdmap
.crush
->get_type_id("host"));
746 osdmap
.crush
->get_leaves(osdmap
.crush
->get_item_name(parent
), &candidates
);
747 ASSERT_LT(1U, candidates
.size());
748 int replaced_by
= -1;
749 for (auto c
: candidates
) {
756 // Check we can handle a negative pg_upmap value
757 vector
<int32_t> new_pg_upmap
;
758 new_pg_upmap
.push_back(up
[0]);
759 new_pg_upmap
.push_back(-823648512);
760 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
761 pending_inc
.new_pg_upmap
[pgid
] = mempool::osdmap::vector
<int32_t>(
762 new_pg_upmap
.begin(), new_pg_upmap
.end());
763 osdmap
.apply_incremental(pending_inc
);
766 // crucial call - _apply_upmap should ignore the negative value
767 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
769 ASSERT_NE(-1, replaced_by
);
770 // generate a new pg_upmap item and apply
771 vector
<int32_t> new_pg_upmap
;
772 new_pg_upmap
.push_back(up
[0]);
773 new_pg_upmap
.push_back(replaced_by
); // up[1] -> replaced_by
774 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
775 pending_inc
.new_pg_upmap
[pgid
] = mempool::osdmap::vector
<int32_t>(
776 new_pg_upmap
.begin(), new_pg_upmap
.end());
777 osdmap
.apply_incremental(pending_inc
);
779 // validate pg_upmap is there
782 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
783 ASSERT_TRUE(up
.size() == new_up
.size());
784 ASSERT_TRUE(new_up
[0] == new_pg_upmap
[0]);
785 ASSERT_TRUE(new_up
[1] == new_pg_upmap
[1]);
786 // and we shall have two OSDs from a same host now..
787 int parent_0
= osdmap
.crush
->get_parent_of_type(new_up
[0],
788 osdmap
.crush
->get_type_id("host"));
789 int parent_1
= osdmap
.crush
->get_parent_of_type(new_up
[1],
790 osdmap
.crush
->get_type_id("host"));
791 ASSERT_TRUE(parent_0
== parent_1
);
795 // STEP-2: apply cure
796 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
797 osdmap
.maybe_remove_pg_upmaps(g_ceph_context
, osdmap
, &pending_inc
);
798 osdmap
.apply_incremental(pending_inc
);
800 // validate pg_upmap is gone (reverted)
803 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
804 ASSERT_TRUE(new_up
== up
);
805 ASSERT_TRUE(new_up_primary
= up_primary
);
811 // TEST pg_upmap_items
812 // enumerate all used hosts first
815 int parent
= osdmap
.crush
->get_parent_of_type(u
,
816 osdmap
.crush
->get_type_id("host"));
817 ASSERT_GT(0, parent
);
818 parents
.insert(parent
);
820 int candidate_parent
= 0;
821 set
<int> candidate_children
;
822 vector
<int> up_after_out
;
824 // STEP-1: try mark out up[1] and all other OSDs from the same host
825 int parent
= osdmap
.crush
->get_parent_of_type(up
[1],
826 osdmap
.crush
->get_type_id("host"));
828 osdmap
.crush
->get_leaves(osdmap
.crush
->get_item_name(parent
),
830 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
831 for (auto c
: children
) {
832 pending_inc
.new_weight
[c
] = CEPH_OSD_OUT
;
835 tmpmap
.deepish_copy_from(osdmap
);
836 tmpmap
.apply_incremental(pending_inc
);
839 tmpmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
840 // verify that we'll have OSDs from a different host..
841 int will_choose
= -1;
842 for (auto o
: new_up
) {
843 int parent
= tmpmap
.crush
->get_parent_of_type(o
,
844 osdmap
.crush
->get_type_id("host"));
845 if (!parents
.count(parent
)) {
847 candidate_parent
= parent
; // record
851 ASSERT_LT(-1, will_choose
); // it is an OSD!
852 ASSERT_TRUE(candidate_parent
!= 0);
853 osdmap
.crush
->get_leaves(osdmap
.crush
->get_item_name(candidate_parent
),
854 &candidate_children
);
855 ASSERT_TRUE(candidate_children
.count(will_choose
));
856 candidate_children
.erase(will_choose
);
857 ASSERT_TRUE(!candidate_children
.empty());
858 up_after_out
= new_up
; // needed for verification..
861 // Make sure we can handle a negative pg_upmap_item
863 int replaced_by
= -823648512;
864 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
865 new_pg_upmap_items
.push_back(make_pair(victim
, replaced_by
));
867 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
868 pending_inc
.new_pg_upmap_items
[pgid
] =
869 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
870 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
871 osdmap
.apply_incremental(pending_inc
);
874 // crucial call - _apply_upmap should ignore the negative value
875 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
878 // STEP-2: generating a new pg_upmap_items entry by
879 // replacing up[0] with one coming from candidate_children
881 int replaced_by
= *candidate_children
.begin();
882 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
883 new_pg_upmap_items
.push_back(make_pair(victim
, replaced_by
));
885 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
886 pending_inc
.new_pg_upmap_items
[pgid
] =
887 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
888 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
889 osdmap
.apply_incremental(pending_inc
);
891 // validate pg_upmap_items is there
894 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
895 ASSERT_TRUE(up
.size() == new_up
.size());
896 ASSERT_TRUE(std::find(new_up
.begin(), new_up
.end(), replaced_by
) !=
899 ASSERT_TRUE(std::find(new_up
.begin(), new_up
.end(), up
[1]) !=
904 // STEP-3: mark out up[1] and all other OSDs from the same host
905 int parent
= osdmap
.crush
->get_parent_of_type(up
[1],
906 osdmap
.crush
->get_type_id("host"));
908 osdmap
.crush
->get_leaves(osdmap
.crush
->get_item_name(parent
),
910 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
911 for (auto c
: children
) {
912 pending_inc
.new_weight
[c
] = CEPH_OSD_OUT
;
914 osdmap
.apply_incremental(pending_inc
);
916 // validate we have two OSDs from the same host now..
919 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
920 ASSERT_TRUE(up
.size() == new_up
.size());
921 int parent_0
= osdmap
.crush
->get_parent_of_type(new_up
[0],
922 osdmap
.crush
->get_type_id("host"));
923 int parent_1
= osdmap
.crush
->get_parent_of_type(new_up
[1],
924 osdmap
.crush
->get_type_id("host"));
925 ASSERT_TRUE(parent_0
== parent_1
);
929 // STEP-4: apply cure
930 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
931 osdmap
.maybe_remove_pg_upmaps(g_ceph_context
, osdmap
, &pending_inc
);
932 osdmap
.apply_incremental(pending_inc
);
934 // validate pg_upmap_items is gone (reverted)
937 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
938 ASSERT_TRUE(new_up
== up_after_out
);
944 TEST(PGTempMap
, basic
)
948 for (auto i
=3; i
<1000; ++i
) {
950 m
.set(x
, {static_cast<int>(i
)});
954 ASSERT_NE(m
.find(a
), m
.end());
955 ASSERT_EQ(m
.find(a
), m
.begin());
956 ASSERT_EQ(m
.find(b
), m
.end());
957 ASSERT_EQ(998u, m
.size());