1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 #include "gtest/gtest.h"
3 #include "osd/OSDMap.h"
4 #include "osd/OSDMapMapping.h"
5 #include "mon/OSDMonitor.h"
7 #include "global/global_context.h"
8 #include "global/global_init.h"
9 #include "common/common_init.h"
10 #include "common/ceph_argparse.h"
16 int main(int argc
, char **argv
) {
17 map
<string
,string
> defaults
= {
18 // make sure we have 3 copies, or some tests won't work
19 { "osd_pool_default_size", "3" },
20 // our map is flat, so just try and split across OSDs, not hosts or whatever
21 { "osd_crush_chooseleaf_type", "0" },
23 std::vector
<const char*> args(argv
, argv
+argc
);
24 auto cct
= global_init(&defaults
, args
, CEPH_ENTITY_TYPE_CLIENT
,
25 CODE_ENVIRONMENT_UTILITY
,
26 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE
);
27 common_init_finish(g_ceph_context
);
28 ::testing::InitGoogleTest(&argc
, argv
);
29 return RUN_ALL_TESTS();
32 class OSDMapTest
: public testing::Test
{
36 OSDMapMapping mapping
;
37 const uint64_t my_ec_pool
= 1;
38 const uint64_t my_rep_pool
= 2;
43 void set_up_map(int new_num_osds
= 6, bool no_default_pools
= false) {
44 num_osds
= new_num_osds
;
46 osdmap
.build_simple(g_ceph_context
, 0, fsid
, num_osds
);
47 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
48 pending_inc
.fsid
= osdmap
.get_fsid();
49 entity_addrvec_t sample_addrs
;
50 sample_addrs
.v
.push_back(entity_addr_t());
52 for (int i
= 0; i
< num_osds
; ++i
) {
53 sample_uuid
.generate_random();
54 sample_addrs
.v
[0].nonce
= i
;
55 pending_inc
.new_state
[i
] = CEPH_OSD_EXISTS
| CEPH_OSD_NEW
;
56 pending_inc
.new_up_client
[i
] = sample_addrs
;
57 pending_inc
.new_up_cluster
[i
] = sample_addrs
;
58 pending_inc
.new_hb_back_up
[i
] = sample_addrs
;
59 pending_inc
.new_hb_front_up
[i
] = sample_addrs
;
60 pending_inc
.new_weight
[i
] = CEPH_OSD_IN
;
61 pending_inc
.new_uuid
[i
] = sample_uuid
;
63 osdmap
.apply_incremental(pending_inc
);
64 if (no_default_pools
) // do not create any default pool(s)
67 // Create an EC ruleset and a pool using it
68 int r
= osdmap
.crush
->add_simple_rule(
69 "erasure", "default", "osd", "",
70 "indep", pg_pool_t::TYPE_ERASURE
,
73 OSDMap::Incremental
new_pool_inc(osdmap
.get_epoch() + 1);
74 new_pool_inc
.new_pool_max
= osdmap
.get_pool_max();
75 new_pool_inc
.fsid
= osdmap
.get_fsid();
78 uint64_t pool_id
= ++new_pool_inc
.new_pool_max
;
79 ceph_assert(pool_id
== my_ec_pool
);
80 pg_pool_t
*p
= new_pool_inc
.get_new_pool(pool_id
, &empty
);
84 p
->type
= pg_pool_t::TYPE_ERASURE
;
86 new_pool_inc
.new_pool_names
[pool_id
] = "ec";
87 // and a replicated pool
88 pool_id
= ++new_pool_inc
.new_pool_max
;
89 ceph_assert(pool_id
== my_rep_pool
);
90 p
= new_pool_inc
.get_new_pool(pool_id
, &empty
);
94 p
->type
= pg_pool_t::TYPE_REPLICATED
;
96 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
97 new_pool_inc
.new_pool_names
[pool_id
] = "reppool";
98 osdmap
.apply_incremental(new_pool_inc
);
100 unsigned int get_num_osds() { return num_osds
; }
101 void get_crush(const OSDMap
& tmap
, CrushWrapper
& newcrush
) {
103 tmap
.crush
->encode(bl
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
104 auto p
= bl
.cbegin();
107 int crush_move(OSDMap
& tmap
, const string
&name
, const vector
<string
> &argvec
) {
108 map
<string
,string
> loc
;
109 CrushWrapper::parse_loc_map(argvec
, &loc
);
110 CrushWrapper newcrush
;
111 get_crush(tmap
, newcrush
);
112 if (!newcrush
.name_exists(name
)) {
115 int id
= newcrush
.get_item_id(name
);
117 if (!newcrush
.check_item_loc(g_ceph_context
, id
, loc
, (int *)NULL
)) {
119 err
= newcrush
.create_or_move_item(g_ceph_context
, id
, 0, name
, loc
);
121 err
= newcrush
.move_bucket(g_ceph_context
, id
, loc
);
124 OSDMap::Incremental
pending_inc(tmap
.get_epoch() + 1);
125 pending_inc
.crush
.clear();
126 newcrush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
127 tmap
.apply_incremental(pending_inc
);
136 int crush_rule_create_replicated(const string
&name
,
138 const string
&type
) {
139 if (osdmap
.crush
->rule_exists(name
)) {
140 return osdmap
.crush
->get_rule_id(name
);
142 CrushWrapper newcrush
;
143 get_crush(osdmap
, newcrush
);
146 int ruleno
= newcrush
.add_simple_rule(
147 name
, root
, type
, device_class
,
148 "firstn", pg_pool_t::TYPE_REPLICATED
, &ss
);
150 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
151 pending_inc
.crush
.clear();
152 newcrush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
153 osdmap
.apply_incremental(pending_inc
);
157 void test_mappings(int pool
,
161 vector
<int> *primary
) {
162 mapping
.update(osdmap
);
163 for (int i
=0; i
<num
; ++i
) {
164 vector
<int> up
, acting
;
165 int up_primary
, acting_primary
;
167 osdmap
.pg_to_up_acting_osds(pgid
,
168 &up
, &up_primary
, &acting
, &acting_primary
);
169 for (unsigned j
=0; j
<acting
.size(); ++j
)
172 (*first
)[acting
[0]]++;
173 if (acting_primary
>= 0)
174 (*primary
)[acting_primary
]++;
176 // compare to precalc mapping
177 vector
<int> up2
, acting2
;
178 int up_primary2
, acting_primary2
;
179 pgid
= osdmap
.raw_pg_to_pg(pgid
);
180 mapping
.get(pgid
, &up2
, &up_primary2
, &acting2
, &acting_primary2
);
182 ASSERT_EQ(up_primary
, up_primary2
);
183 ASSERT_EQ(acting
, acting2
);
184 ASSERT_EQ(acting_primary
, acting_primary2
);
186 cout
<< "any: " << *any
<< std::endl
;;
187 cout
<< "first: " << *first
<< std::endl
;;
188 cout
<< "primary: " << *primary
<< std::endl
;;
190 void clean_pg_upmaps(CephContext
*cct
,
192 OSDMap::Incremental
& pending_inc
) {
194 int pgs_per_chunk
= 256;
195 ThreadPool
tp(cct
, "BUG_40104::clean_upmap_tp", "clean_upmap_tp", cpu_num
);
197 ParallelPGMapper
mapper(cct
, &tp
);
198 vector
<pg_t
> pgs_to_check
;
199 om
.get_upmap_pgs(&pgs_to_check
);
200 OSDMonitor::CleanUpmapJob
job(cct
, om
, pending_inc
);
201 mapper
.queue(&job
, pgs_per_chunk
, pgs_to_check
);
207 TEST_F(OSDMapTest
, Create
) {
209 ASSERT_EQ(get_num_osds(), (unsigned)osdmap
.get_max_osd());
210 ASSERT_EQ(get_num_osds(), osdmap
.get_num_in_osds());
213 TEST_F(OSDMapTest
, Features
) {
216 uint64_t features
= osdmap
.get_features(CEPH_ENTITY_TYPE_OSD
, NULL
);
217 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES
);
218 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES2
);
219 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES3
);
220 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_V2
);
221 ASSERT_TRUE(features
& CEPH_FEATURE_OSDHASHPSPOOL
);
222 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_PRIMARY_AFFINITY
);
224 // clients have a slightly different view
225 features
= osdmap
.get_features(CEPH_ENTITY_TYPE_CLIENT
, NULL
);
226 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES
);
227 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES2
);
228 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES3
);
229 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_V2
);
230 ASSERT_TRUE(features
& CEPH_FEATURE_OSDHASHPSPOOL
);
231 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_PRIMARY_AFFINITY
);
233 // remove teh EC pool, but leave the rule. add primary affinity.
235 OSDMap::Incremental
new_pool_inc(osdmap
.get_epoch() + 1);
236 new_pool_inc
.old_pools
.insert(osdmap
.lookup_pg_pool_name("ec"));
237 new_pool_inc
.new_primary_affinity
[0] = 0x8000;
238 osdmap
.apply_incremental(new_pool_inc
);
241 features
= osdmap
.get_features(CEPH_ENTITY_TYPE_MON
, NULL
);
242 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES
);
243 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES2
);
244 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES3
); // shared bit with primary affinity
245 ASSERT_FALSE(features
& CEPH_FEATURE_CRUSH_V2
);
246 ASSERT_TRUE(features
& CEPH_FEATURE_OSDHASHPSPOOL
);
247 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_PRIMARY_AFFINITY
);
249 // FIXME: test tiering feature bits
252 TEST_F(OSDMapTest
, MapPG
) {
255 std::cerr
<< " osdmap.pool_max==" << osdmap
.get_pool_max() << std::endl
;
256 pg_t
rawpg(0, my_rep_pool
);
257 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
258 vector
<int> up_osds
, acting_osds
;
259 int up_primary
, acting_primary
;
261 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
262 &acting_osds
, &acting_primary
);
264 vector
<int> old_up_osds
, old_acting_osds
;
265 osdmap
.pg_to_up_acting_osds(pgid
, old_up_osds
, old_acting_osds
);
266 ASSERT_EQ(old_up_osds
, up_osds
);
267 ASSERT_EQ(old_acting_osds
, acting_osds
);
269 ASSERT_EQ(osdmap
.get_pg_pool(my_rep_pool
)->get_size(), up_osds
.size());
272 TEST_F(OSDMapTest
, MapFunctionsMatch
) {
273 // TODO: make sure pg_to_up_acting_osds and pg_to_acting_osds match
275 pg_t
rawpg(0, my_rep_pool
);
276 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
277 vector
<int> up_osds
, acting_osds
;
278 int up_primary
, acting_primary
;
280 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
281 &acting_osds
, &acting_primary
);
283 vector
<int> up_osds_two
, acting_osds_two
;
285 osdmap
.pg_to_up_acting_osds(pgid
, up_osds_two
, acting_osds_two
);
287 ASSERT_EQ(up_osds
, up_osds_two
);
288 ASSERT_EQ(acting_osds
, acting_osds_two
);
290 int acting_primary_two
;
291 osdmap
.pg_to_acting_osds(pgid
, &acting_osds_two
, &acting_primary_two
);
292 EXPECT_EQ(acting_osds
, acting_osds_two
);
293 EXPECT_EQ(acting_primary
, acting_primary_two
);
294 osdmap
.pg_to_acting_osds(pgid
, acting_osds_two
);
295 EXPECT_EQ(acting_osds
, acting_osds_two
);
298 /** This test must be removed or modified appropriately when we allow
299 * other ways to specify a primary. */
300 TEST_F(OSDMapTest
, PrimaryIsFirst
) {
303 pg_t
rawpg(0, my_rep_pool
);
304 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
305 vector
<int> up_osds
, acting_osds
;
306 int up_primary
, acting_primary
;
308 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
309 &acting_osds
, &acting_primary
);
310 EXPECT_EQ(up_osds
[0], up_primary
);
311 EXPECT_EQ(acting_osds
[0], acting_primary
);
314 TEST_F(OSDMapTest
, PGTempRespected
) {
317 pg_t
rawpg(0, my_rep_pool
);
318 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
319 vector
<int> up_osds
, acting_osds
;
320 int up_primary
, acting_primary
;
322 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
323 &acting_osds
, &acting_primary
);
325 // copy and swap first and last element in acting_osds
326 vector
<int> new_acting_osds(acting_osds
);
327 int first
= new_acting_osds
[0];
328 new_acting_osds
[0] = *new_acting_osds
.rbegin();
329 *new_acting_osds
.rbegin() = first
;
331 // apply pg_temp to osdmap
332 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
333 pgtemp_map
.new_pg_temp
[pgid
] = mempool::osdmap::vector
<int>(
334 new_acting_osds
.begin(), new_acting_osds
.end());
335 osdmap
.apply_incremental(pgtemp_map
);
337 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
338 &acting_osds
, &acting_primary
);
339 EXPECT_EQ(new_acting_osds
, acting_osds
);
342 TEST_F(OSDMapTest
, PrimaryTempRespected
) {
345 pg_t
rawpg(0, my_rep_pool
);
346 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
348 vector
<int> acting_osds
;
349 int up_primary
, acting_primary
;
351 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
352 &acting_osds
, &acting_primary
);
354 // make second OSD primary via incremental
355 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
356 pgtemp_map
.new_primary_temp
[pgid
] = acting_osds
[1];
357 osdmap
.apply_incremental(pgtemp_map
);
359 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
360 &acting_osds
, &acting_primary
);
361 EXPECT_EQ(acting_primary
, acting_osds
[1]);
364 TEST_F(OSDMapTest
, CleanTemps
) {
367 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
368 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 2);
369 pg_t pga
= osdmap
.raw_pg_to_pg(pg_t(0, my_rep_pool
));
371 vector
<int> up_osds
, acting_osds
;
372 int up_primary
, acting_primary
;
373 osdmap
.pg_to_up_acting_osds(pga
, &up_osds
, &up_primary
,
374 &acting_osds
, &acting_primary
);
375 pgtemp_map
.new_pg_temp
[pga
] = mempool::osdmap::vector
<int>(
376 up_osds
.begin(), up_osds
.end());
377 pgtemp_map
.new_primary_temp
[pga
] = up_primary
;
379 pg_t pgb
= osdmap
.raw_pg_to_pg(pg_t(1, my_rep_pool
));
381 vector
<int> up_osds
, acting_osds
;
382 int up_primary
, acting_primary
;
383 osdmap
.pg_to_up_acting_osds(pgb
, &up_osds
, &up_primary
,
384 &acting_osds
, &acting_primary
);
385 pending_inc
.new_pg_temp
[pgb
] = mempool::osdmap::vector
<int>(
386 up_osds
.begin(), up_osds
.end());
387 pending_inc
.new_primary_temp
[pgb
] = up_primary
;
390 osdmap
.apply_incremental(pgtemp_map
);
393 tmpmap
.deepish_copy_from(osdmap
);
394 tmpmap
.apply_incremental(pending_inc
);
395 OSDMap::clean_temps(g_ceph_context
, osdmap
, tmpmap
, &pending_inc
);
397 EXPECT_TRUE(pending_inc
.new_pg_temp
.count(pga
) &&
398 pending_inc
.new_pg_temp
[pga
].size() == 0);
399 EXPECT_EQ(-1, pending_inc
.new_primary_temp
[pga
]);
401 EXPECT_TRUE(!pending_inc
.new_pg_temp
.count(pgb
) &&
402 !pending_inc
.new_primary_temp
.count(pgb
));
405 TEST_F(OSDMapTest
, KeepsNecessaryTemps
) {
408 pg_t
rawpg(0, my_rep_pool
);
409 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
410 vector
<int> up_osds
, acting_osds
;
411 int up_primary
, acting_primary
;
413 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
414 &acting_osds
, &acting_primary
);
416 // find unused OSD and stick it in there
417 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
418 // find an unused osd and put it in place of the first one
420 for(; i
!= (int)get_num_osds(); ++i
) {
422 for (vector
<int>::iterator osd_it
= up_osds
.begin();
423 osd_it
!= up_osds
.end();
435 if (i
== (int)get_num_osds())
436 FAIL() << "did not find unused OSD for temp mapping";
438 pgtemp_map
.new_pg_temp
[pgid
] = mempool::osdmap::vector
<int>(
439 up_osds
.begin(), up_osds
.end());
440 pgtemp_map
.new_primary_temp
[pgid
] = up_osds
[1];
441 osdmap
.apply_incremental(pgtemp_map
);
443 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
446 tmpmap
.deepish_copy_from(osdmap
);
447 tmpmap
.apply_incremental(pending_inc
);
448 OSDMap::clean_temps(g_ceph_context
, osdmap
, tmpmap
, &pending_inc
);
449 EXPECT_FALSE(pending_inc
.new_pg_temp
.count(pgid
));
450 EXPECT_FALSE(pending_inc
.new_primary_temp
.count(pgid
));
453 TEST_F(OSDMapTest
, PrimaryAffinity
) {
456 int n
= get_num_osds();
457 for (map
<int64_t,pg_pool_t
>::const_iterator p
= osdmap
.get_pools().begin();
458 p
!= osdmap
.get_pools().end();
461 int expect_primary
= 10000 / n
;
462 cout
<< "pool " << pool
<< " size " << (int)p
->second
.size
463 << " expect_primary " << expect_primary
<< std::endl
;
465 vector
<int> any(n
, 0);
466 vector
<int> first(n
, 0);
467 vector
<int> primary(n
, 0);
468 test_mappings(pool
, 10000, &any
, &first
, &primary
);
469 for (int i
=0; i
<n
; ++i
) {
470 ASSERT_LT(0, any
[i
]);
471 ASSERT_LT(0, first
[i
]);
472 ASSERT_LT(0, primary
[i
]);
476 osdmap
.set_primary_affinity(0, 0);
477 osdmap
.set_primary_affinity(1, 0);
479 vector
<int> any(n
, 0);
480 vector
<int> first(n
, 0);
481 vector
<int> primary(n
, 0);
482 test_mappings(pool
, 10000, &any
, &first
, &primary
);
483 for (int i
=0; i
<n
; ++i
) {
484 ASSERT_LT(0, any
[i
]);
486 ASSERT_LT(0, first
[i
]);
487 ASSERT_LT(0, primary
[i
]);
489 if (p
->second
.is_replicated()) {
490 ASSERT_EQ(0, first
[i
]);
492 ASSERT_EQ(0, primary
[i
]);
497 osdmap
.set_primary_affinity(0, 0x8000);
498 osdmap
.set_primary_affinity(1, 0);
500 vector
<int> any(n
, 0);
501 vector
<int> first(n
, 0);
502 vector
<int> primary(n
, 0);
503 test_mappings(pool
, 10000, &any
, &first
, &primary
);
504 int expect
= (10000 / (n
-2)) / 2; // half weight
505 cout
<< "expect " << expect
<< std::endl
;
506 for (int i
=0; i
<n
; ++i
) {
507 ASSERT_LT(0, any
[i
]);
509 ASSERT_LT(0, first
[i
]);
510 ASSERT_LT(0, primary
[i
]);
512 if (p
->second
.is_replicated()) {
513 ASSERT_EQ(0, first
[i
]);
515 ASSERT_EQ(0, primary
[i
]);
517 ASSERT_LT(expect
*2/3, primary
[0]);
518 ASSERT_GT(expect
*4/3, primary
[0]);
523 osdmap
.set_primary_affinity(0, 0x10000);
524 osdmap
.set_primary_affinity(1, 0x10000);
528 TEST_F(OSDMapTest
, get_osd_crush_node_flags
) {
531 for (unsigned i
=0; i
<get_num_osds(); ++i
) {
532 ASSERT_EQ(0u, osdmap
.get_osd_crush_node_flags(i
));
535 OSDMap::Incremental
inc(osdmap
.get_epoch() + 1);
536 inc
.new_crush_node_flags
[-1] = 123u;
537 osdmap
.apply_incremental(inc
);
538 for (unsigned i
=0; i
<get_num_osds(); ++i
) {
539 ASSERT_EQ(123u, osdmap
.get_osd_crush_node_flags(i
));
541 ASSERT_EQ(0u, osdmap
.get_osd_crush_node_flags(1000));
543 OSDMap::Incremental
inc3(osdmap
.get_epoch() + 1);
544 inc3
.new_crush_node_flags
[-1] = 456u;
545 osdmap
.apply_incremental(inc3
);
546 for (unsigned i
=0; i
<get_num_osds(); ++i
) {
547 ASSERT_EQ(456u, osdmap
.get_osd_crush_node_flags(i
));
549 ASSERT_EQ(0u, osdmap
.get_osd_crush_node_flags(1000));
551 OSDMap::Incremental
inc2(osdmap
.get_epoch() + 1);
552 inc2
.new_crush_node_flags
[-1] = 0;
553 osdmap
.apply_incremental(inc2
);
554 for (unsigned i
=0; i
<get_num_osds(); ++i
) {
555 ASSERT_EQ(0u, osdmap
.get_crush_node_flags(i
));
559 TEST_F(OSDMapTest
, parse_osd_id_list
) {
563 osdmap
.get_all_osds(all
);
565 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"osd.0"}, &out
, &cout
));
566 ASSERT_EQ(1u, out
.size());
567 ASSERT_EQ(0, *out
.begin());
569 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"1"}, &out
, &cout
));
570 ASSERT_EQ(1u, out
.size());
571 ASSERT_EQ(1, *out
.begin());
573 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"osd.0","osd.1"}, &out
, &cout
));
574 ASSERT_EQ(2u, out
.size());
575 ASSERT_EQ(0, *out
.begin());
576 ASSERT_EQ(1, *out
.rbegin());
578 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"osd.0","1"}, &out
, &cout
));
579 ASSERT_EQ(2u, out
.size());
580 ASSERT_EQ(0, *out
.begin());
581 ASSERT_EQ(1, *out
.rbegin());
583 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"*"}, &out
, &cout
));
584 ASSERT_EQ(all
.size(), out
.size());
587 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"all"}, &out
, &cout
));
590 ASSERT_EQ(0, osdmap
.parse_osd_id_list({"any"}, &out
, &cout
));
593 ASSERT_EQ(-EINVAL
, osdmap
.parse_osd_id_list({"foo"}, &out
, &cout
));
594 ASSERT_EQ(-EINVAL
, osdmap
.parse_osd_id_list({"-12"}, &out
, &cout
));
597 TEST_F(OSDMapTest
, CleanPGUpmaps
) {
600 // build a crush rule of type host
601 const int expected_host_num
= 3;
602 int osd_per_host
= get_num_osds() / expected_host_num
;
603 ASSERT_GE(2, osd_per_host
);
605 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
606 if (i
&& i
% osd_per_host
== 0) {
609 stringstream osd_name
;
610 stringstream host_name
;
611 vector
<string
> move_to
;
612 osd_name
<< "osd." << i
;
613 host_name
<< "host-" << index
;
614 move_to
.push_back("root=default");
615 string host_loc
= "host=" + host_name
.str();
616 move_to
.push_back(host_loc
);
617 int r
= crush_move(osdmap
, osd_name
.str(), move_to
);
620 const string upmap_rule
= "upmap";
621 int upmap_rule_no
= crush_rule_create_replicated(
622 upmap_rule
, "default", "host");
623 ASSERT_LT(0, upmap_rule_no
);
625 // create a replicated pool which references the above rule
626 OSDMap::Incremental
new_pool_inc(osdmap
.get_epoch() + 1);
627 new_pool_inc
.new_pool_max
= osdmap
.get_pool_max();
628 new_pool_inc
.fsid
= osdmap
.get_fsid();
630 uint64_t upmap_pool_id
= ++new_pool_inc
.new_pool_max
;
631 pg_pool_t
*p
= new_pool_inc
.get_new_pool(upmap_pool_id
, &empty
);
635 p
->type
= pg_pool_t::TYPE_REPLICATED
;
636 p
->crush_rule
= upmap_rule_no
;
637 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
638 new_pool_inc
.new_pool_names
[upmap_pool_id
] = "upmap_pool";
639 osdmap
.apply_incremental(new_pool_inc
);
641 pg_t
rawpg(0, upmap_pool_id
);
642 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
645 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
646 ASSERT_LT(1U, up
.size());
648 // validate we won't have two OSDs from a same host
649 int parent_0
= osdmap
.crush
->get_parent_of_type(up
[0],
650 osdmap
.crush
->get_type_id("host"));
651 int parent_1
= osdmap
.crush
->get_parent_of_type(up
[1],
652 osdmap
.crush
->get_type_id("host"));
653 ASSERT_TRUE(parent_0
!= parent_1
);
657 // cancel stale upmaps
658 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
660 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
661 if (std::find(up
.begin(), up
.end(), i
) == up
.end()) {
666 ASSERT_TRUE(from
>= 0);
668 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
669 if (std::find(up
.begin(), up
.end(), i
) == up
.end() && i
!= from
) {
674 ASSERT_TRUE(to
>= 0);
675 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
676 new_pg_upmap_items
.push_back(make_pair(from
, to
));
677 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
678 pending_inc
.new_pg_upmap_items
[pgid
] =
679 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
680 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
682 nextmap
.deepish_copy_from(osdmap
);
683 nextmap
.apply_incremental(pending_inc
);
684 ASSERT_TRUE(nextmap
.have_pg_upmaps(pgid
));
685 OSDMap::Incremental
new_pending_inc(nextmap
.get_epoch() + 1);
686 clean_pg_upmaps(g_ceph_context
, nextmap
, new_pending_inc
);
687 nextmap
.apply_incremental(new_pending_inc
);
688 ASSERT_TRUE(!nextmap
.have_pg_upmaps(pgid
));
692 // https://tracker.ceph.com/issues/37493
693 pg_t
ec_pg(0, my_ec_pool
);
694 pg_t ec_pgid
= osdmap
.raw_pg_to_pg(ec_pg
);
695 OSDMap tmpmap
; // use a tmpmap here, so we do not dirty origin map..
699 // insert a valid pg_upmap_item
702 osdmap
.pg_to_raw_up(ec_pgid
, &ec_up
, &ec_up_primary
);
703 ASSERT_TRUE(!ec_up
.empty());
704 from
= *(ec_up
.begin());
705 ASSERT_TRUE(from
>= 0);
706 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
707 if (std::find(ec_up
.begin(), ec_up
.end(), i
) == ec_up
.end()) {
712 ASSERT_TRUE(to
>= 0);
713 ASSERT_TRUE(from
!= to
);
714 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
715 new_pg_upmap_items
.push_back(make_pair(from
, to
));
716 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
717 pending_inc
.new_pg_upmap_items
[ec_pgid
] =
718 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
719 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
720 tmpmap
.deepish_copy_from(osdmap
);
721 tmpmap
.apply_incremental(pending_inc
);
722 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
725 // mark one of the target OSDs of the above pg_upmap_item as down
726 OSDMap::Incremental
pending_inc(tmpmap
.get_epoch() + 1);
727 pending_inc
.new_state
[to
] = CEPH_OSD_UP
;
728 tmpmap
.apply_incremental(pending_inc
);
729 ASSERT_TRUE(!tmpmap
.is_up(to
));
730 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
733 // confirm *clean_pg_upmaps* won't do anything bad
734 OSDMap::Incremental
pending_inc(tmpmap
.get_epoch() + 1);
735 clean_pg_upmaps(g_ceph_context
, tmpmap
, pending_inc
);
736 tmpmap
.apply_incremental(pending_inc
);
737 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
742 // http://tracker.ceph.com/issues/37501
743 pg_t
ec_pg(0, my_ec_pool
);
744 pg_t ec_pgid
= osdmap
.raw_pg_to_pg(ec_pg
);
745 OSDMap tmpmap
; // use a tmpmap here, so we do not dirty origin map..
749 // insert a valid pg_upmap_item
752 osdmap
.pg_to_raw_up(ec_pgid
, &ec_up
, &ec_up_primary
);
753 ASSERT_TRUE(!ec_up
.empty());
754 from
= *(ec_up
.begin());
755 ASSERT_TRUE(from
>= 0);
756 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
757 if (std::find(ec_up
.begin(), ec_up
.end(), i
) == ec_up
.end()) {
762 ASSERT_TRUE(to
>= 0);
763 ASSERT_TRUE(from
!= to
);
764 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
765 new_pg_upmap_items
.push_back(make_pair(from
, to
));
766 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
767 pending_inc
.new_pg_upmap_items
[ec_pgid
] =
768 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
769 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
770 tmpmap
.deepish_copy_from(osdmap
);
771 tmpmap
.apply_incremental(pending_inc
);
772 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
775 // mark one of the target OSDs of the above pg_upmap_item as out
776 OSDMap::Incremental
pending_inc(tmpmap
.get_epoch() + 1);
777 pending_inc
.new_weight
[to
] = CEPH_OSD_OUT
;
778 tmpmap
.apply_incremental(pending_inc
);
779 ASSERT_TRUE(tmpmap
.is_out(to
));
780 ASSERT_TRUE(tmpmap
.have_pg_upmaps(ec_pgid
));
783 // *clean_pg_upmaps* should be able to remove the above *bad* mapping
784 OSDMap::Incremental
pending_inc(tmpmap
.get_epoch() + 1);
785 clean_pg_upmaps(g_ceph_context
, tmpmap
, pending_inc
);
786 tmpmap
.apply_incremental(pending_inc
);
787 ASSERT_TRUE(!tmpmap
.have_pg_upmaps(ec_pgid
));
792 // http://tracker.ceph.com/issues/37968
794 // build a temporary crush topology of 2 hosts, 3 osds per host
795 OSDMap tmp
; // use a tmpmap here, so we do not dirty origin map..
796 tmp
.deepish_copy_from(osdmap
);
797 const int expected_host_num
= 2;
798 int osd_per_host
= get_num_osds() / expected_host_num
;
799 ASSERT_GE(osd_per_host
, 3);
801 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
802 if (i
&& i
% osd_per_host
== 0) {
805 stringstream osd_name
;
806 stringstream host_name
;
807 vector
<string
> move_to
;
808 osd_name
<< "osd." << i
;
809 host_name
<< "host-" << index
;
810 move_to
.push_back("root=default");
811 string host_loc
= "host=" + host_name
.str();
812 move_to
.push_back(host_loc
);
813 auto r
= crush_move(tmp
, osd_name
.str(), move_to
);
819 get_crush(tmp
, crush
);
820 string rule_name
= "rule_37968";
821 int rule_type
= pg_pool_t::TYPE_ERASURE
;
822 ASSERT_TRUE(!crush
.rule_exists(rule_name
));
824 for (rno
= 0; rno
< crush
.get_max_rules(); rno
++) {
825 if (!crush
.rule_exists(rno
) && !crush
.ruleset_exists(rno
))
828 string root_name
= "default";
829 int root
= crush
.get_item_id(root_name
);
833 crush_rule
*rule
= crush_make_rule(steps
, rno
, rule_type
, min_size
, max_size
);
835 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 5, 0);
836 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSE_TRIES
, 100, 0);
837 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, root
, 0);
838 crush_rule_set_step(rule
, step
++, CRUSH_RULE_CHOOSE_INDEP
, 2, 1 /* host*/);
839 crush_rule_set_step(rule
, step
++, CRUSH_RULE_CHOOSE_INDEP
, 2, 0 /* osd */);
840 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
841 ASSERT_TRUE(step
== steps
);
842 auto r
= crush_add_rule(crush
.get_crush_map(), rule
, rno
);
844 crush
.set_rule_name(rno
, rule_name
);
846 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
847 pending_inc
.crush
.clear();
848 crush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
849 tmp
.apply_incremental(pending_inc
);
852 // create a erasuce-coded pool referencing the above rule
855 OSDMap::Incremental
new_pool_inc(tmp
.get_epoch() + 1);
856 new_pool_inc
.new_pool_max
= tmp
.get_pool_max();
857 new_pool_inc
.fsid
= tmp
.get_fsid();
859 pool_37968
= ++new_pool_inc
.new_pool_max
;
860 pg_pool_t
*p
= new_pool_inc
.get_new_pool(pool_37968
, &empty
);
864 p
->type
= pg_pool_t::TYPE_ERASURE
;
866 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
867 new_pool_inc
.new_pool_names
[pool_37968
] = "pool_37968";
868 tmp
.apply_incremental(new_pool_inc
);
871 pg_t
ec_pg(0, pool_37968
);
872 pg_t ec_pgid
= tmp
.raw_pg_to_pg(ec_pg
);
876 // insert a valid pg_upmap_item
879 tmp
.pg_to_raw_up(ec_pgid
, &ec_up
, &ec_up_primary
);
880 ASSERT_TRUE(ec_up
.size() == 4);
881 from
= *(ec_up
.begin());
882 ASSERT_TRUE(from
>= 0);
883 auto parent
= tmp
.crush
->get_parent_of_type(from
, 1 /* host */, rno
);
884 ASSERT_TRUE(parent
< 0);
885 // pick an osd of the same parent with *from*
886 for (int i
= 0; i
< (int)get_num_osds(); i
++) {
887 if (std::find(ec_up
.begin(), ec_up
.end(), i
) == ec_up
.end()) {
888 auto p
= tmp
.crush
->get_parent_of_type(i
, 1 /* host */, rno
);
895 ASSERT_TRUE(to
>= 0);
896 ASSERT_TRUE(from
!= to
);
897 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
898 new_pg_upmap_items
.push_back(make_pair(from
, to
));
899 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
900 pending_inc
.new_pg_upmap_items
[ec_pgid
] =
901 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
902 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
903 tmp
.apply_incremental(pending_inc
);
904 ASSERT_TRUE(tmp
.have_pg_upmaps(ec_pgid
));
907 // *clean_pg_upmaps* should not remove the above upmap_item
908 OSDMap::Incremental
pending_inc(tmp
.get_epoch() + 1);
909 clean_pg_upmaps(g_ceph_context
, tmp
, pending_inc
);
910 tmp
.apply_incremental(pending_inc
);
911 ASSERT_TRUE(tmp
.have_pg_upmaps(ec_pgid
));
918 // STEP-1: enumerate all children of up[0]'s parent,
919 // replace up[1] with one of them (other than up[0])
920 int parent
= osdmap
.crush
->get_parent_of_type(up
[0],
921 osdmap
.crush
->get_type_id("host"));
923 osdmap
.crush
->get_leaves(osdmap
.crush
->get_item_name(parent
), &candidates
);
924 ASSERT_LT(1U, candidates
.size());
925 int replaced_by
= -1;
926 for (auto c
: candidates
) {
933 // Check we can handle a negative pg_upmap value
934 vector
<int32_t> new_pg_upmap
;
935 new_pg_upmap
.push_back(up
[0]);
936 new_pg_upmap
.push_back(-823648512);
937 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
938 pending_inc
.new_pg_upmap
[pgid
] = mempool::osdmap::vector
<int32_t>(
939 new_pg_upmap
.begin(), new_pg_upmap
.end());
940 osdmap
.apply_incremental(pending_inc
);
943 // crucial call - _apply_upmap should ignore the negative value
944 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
946 ASSERT_NE(-1, replaced_by
);
947 // generate a new pg_upmap item and apply
948 vector
<int32_t> new_pg_upmap
;
949 new_pg_upmap
.push_back(up
[0]);
950 new_pg_upmap
.push_back(replaced_by
); // up[1] -> replaced_by
951 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
952 pending_inc
.new_pg_upmap
[pgid
] = mempool::osdmap::vector
<int32_t>(
953 new_pg_upmap
.begin(), new_pg_upmap
.end());
954 osdmap
.apply_incremental(pending_inc
);
956 // validate pg_upmap is there
959 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
960 ASSERT_TRUE(up
.size() == new_up
.size());
961 ASSERT_TRUE(new_up
[0] == new_pg_upmap
[0]);
962 ASSERT_TRUE(new_up
[1] == new_pg_upmap
[1]);
963 // and we shall have two OSDs from a same host now..
964 int parent_0
= osdmap
.crush
->get_parent_of_type(new_up
[0],
965 osdmap
.crush
->get_type_id("host"));
966 int parent_1
= osdmap
.crush
->get_parent_of_type(new_up
[1],
967 osdmap
.crush
->get_type_id("host"));
968 ASSERT_TRUE(parent_0
== parent_1
);
972 // STEP-2: apply cure
973 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
974 clean_pg_upmaps(g_ceph_context
, osdmap
, pending_inc
);
975 osdmap
.apply_incremental(pending_inc
);
977 // validate pg_upmap is gone (reverted)
980 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
981 ASSERT_TRUE(new_up
== up
);
982 ASSERT_TRUE(new_up_primary
= up_primary
);
988 // TEST pg_upmap_items
989 // enumerate all used hosts first
992 int parent
= osdmap
.crush
->get_parent_of_type(u
,
993 osdmap
.crush
->get_type_id("host"));
994 ASSERT_GT(0, parent
);
995 parents
.insert(parent
);
997 int candidate_parent
= 0;
998 set
<int> candidate_children
;
999 vector
<int> up_after_out
;
1001 // STEP-1: try mark out up[1] and all other OSDs from the same host
1002 int parent
= osdmap
.crush
->get_parent_of_type(up
[1],
1003 osdmap
.crush
->get_type_id("host"));
1005 osdmap
.crush
->get_leaves(osdmap
.crush
->get_item_name(parent
),
1007 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1008 for (auto c
: children
) {
1009 pending_inc
.new_weight
[c
] = CEPH_OSD_OUT
;
1012 tmpmap
.deepish_copy_from(osdmap
);
1013 tmpmap
.apply_incremental(pending_inc
);
1016 tmpmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
1017 // verify that we'll have OSDs from a different host..
1018 int will_choose
= -1;
1019 for (auto o
: new_up
) {
1020 int parent
= tmpmap
.crush
->get_parent_of_type(o
,
1021 osdmap
.crush
->get_type_id("host"));
1022 if (!parents
.count(parent
)) {
1024 candidate_parent
= parent
; // record
1028 ASSERT_LT(-1, will_choose
); // it is an OSD!
1029 ASSERT_TRUE(candidate_parent
!= 0);
1030 osdmap
.crush
->get_leaves(osdmap
.crush
->get_item_name(candidate_parent
),
1031 &candidate_children
);
1032 ASSERT_TRUE(candidate_children
.count(will_choose
));
1033 candidate_children
.erase(will_choose
);
1034 ASSERT_TRUE(!candidate_children
.empty());
1035 up_after_out
= new_up
; // needed for verification..
1038 // Make sure we can handle a negative pg_upmap_item
1040 int replaced_by
= -823648512;
1041 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
1042 new_pg_upmap_items
.push_back(make_pair(victim
, replaced_by
));
1044 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1045 pending_inc
.new_pg_upmap_items
[pgid
] =
1046 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
1047 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
1048 osdmap
.apply_incremental(pending_inc
);
1051 // crucial call - _apply_upmap should ignore the negative value
1052 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
1055 // STEP-2: generating a new pg_upmap_items entry by
1056 // replacing up[0] with one coming from candidate_children
1058 int replaced_by
= *candidate_children
.begin();
1059 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
1060 new_pg_upmap_items
.push_back(make_pair(victim
, replaced_by
));
1062 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1063 pending_inc
.new_pg_upmap_items
[pgid
] =
1064 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
1065 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
1066 osdmap
.apply_incremental(pending_inc
);
1068 // validate pg_upmap_items is there
1071 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
1072 ASSERT_TRUE(up
.size() == new_up
.size());
1073 ASSERT_TRUE(std::find(new_up
.begin(), new_up
.end(), replaced_by
) !=
1076 ASSERT_TRUE(std::find(new_up
.begin(), new_up
.end(), up
[1]) !=
1081 // STEP-3: mark out up[1] and all other OSDs from the same host
1082 int parent
= osdmap
.crush
->get_parent_of_type(up
[1],
1083 osdmap
.crush
->get_type_id("host"));
1085 osdmap
.crush
->get_leaves(osdmap
.crush
->get_item_name(parent
),
1087 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1088 for (auto c
: children
) {
1089 pending_inc
.new_weight
[c
] = CEPH_OSD_OUT
;
1091 osdmap
.apply_incremental(pending_inc
);
1093 // validate we have two OSDs from the same host now..
1096 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
1097 ASSERT_TRUE(up
.size() == new_up
.size());
1098 int parent_0
= osdmap
.crush
->get_parent_of_type(new_up
[0],
1099 osdmap
.crush
->get_type_id("host"));
1100 int parent_1
= osdmap
.crush
->get_parent_of_type(new_up
[1],
1101 osdmap
.crush
->get_type_id("host"));
1102 ASSERT_TRUE(parent_0
== parent_1
);
1106 // STEP-4: apply cure
1107 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1108 clean_pg_upmaps(g_ceph_context
, osdmap
, pending_inc
);
1109 osdmap
.apply_incremental(pending_inc
);
1111 // validate pg_upmap_items is gone (reverted)
1114 osdmap
.pg_to_raw_up(pgid
, &new_up
, &new_up_primary
);
1115 ASSERT_TRUE(new_up
== up_after_out
);
1121 TEST_F(OSDMapTest
, BUG_38897
) {
1122 // http://tracker.ceph.com/issues/38897
1123 // build a fresh map with 12 OSDs, without any default pools
1124 set_up_map(12, true);
1125 const string
pool_1("pool1");
1126 const string
pool_2("pool2");
1127 int64_t pool_1_id
= -1;
1130 // build customized crush rule for "pool1"
1131 string host_name
= "host_for_pool_1";
1132 // build a customized host to capture osd.1~5
1133 for (int i
= 1; i
< 5; i
++) {
1134 stringstream osd_name
;
1135 vector
<string
> move_to
;
1136 osd_name
<< "osd." << i
;
1137 move_to
.push_back("root=default");
1138 string host_loc
= "host=" + host_name
;
1139 move_to
.push_back(host_loc
);
1140 auto r
= crush_move(osdmap
, osd_name
.str(), move_to
);
1144 get_crush(osdmap
, crush
);
1145 auto host_id
= crush
.get_item_id(host_name
);
1146 ASSERT_TRUE(host_id
< 0);
1147 string rule_name
= "rule_for_pool1";
1148 int rule_type
= pg_pool_t::TYPE_REPLICATED
;
1149 ASSERT_TRUE(!crush
.rule_exists(rule_name
));
1151 for (rno
= 0; rno
< crush
.get_max_rules(); rno
++) {
1152 if (!crush
.rule_exists(rno
) && !crush
.ruleset_exists(rno
))
1158 crush_rule
*rule
= crush_make_rule(steps
, rno
, rule_type
, min_size
, max_size
);
1160 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 5, 0);
1161 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSE_TRIES
, 100, 0);
1162 // always choose osd.0
1163 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, 0, 0);
1164 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1165 // then pick any other random osds
1166 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, host_id
, 0);
1167 crush_rule_set_step(rule
, step
++, CRUSH_RULE_CHOOSELEAF_FIRSTN
, 2, 0);
1168 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1169 ASSERT_TRUE(step
== steps
);
1170 auto r
= crush_add_rule(crush
.get_crush_map(), rule
, rno
);
1171 ASSERT_TRUE(r
>= 0);
1172 crush
.set_rule_name(rno
, rule_name
);
1174 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1175 pending_inc
.crush
.clear();
1176 crush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
1177 osdmap
.apply_incremental(pending_inc
);
1181 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1182 pending_inc
.new_pool_max
= osdmap
.get_pool_max();
1183 auto pool_id
= ++pending_inc
.new_pool_max
;
1184 pool_1_id
= pool_id
;
1186 auto p
= pending_inc
.get_new_pool(pool_id
, &empty
);
1191 p
->type
= pg_pool_t::TYPE_REPLICATED
;
1192 p
->crush_rule
= rno
;
1193 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
1194 pending_inc
.new_pool_names
[pool_id
] = pool_1
;
1195 osdmap
.apply_incremental(pending_inc
);
1196 ASSERT_TRUE(osdmap
.have_pg_pool(pool_id
));
1197 ASSERT_TRUE(osdmap
.get_pool_name(pool_id
) == pool_1
);
1199 for (unsigned i
= 0; i
< 3; i
++) {
1201 pg_t
rawpg(i
, pool_id
);
1202 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
1205 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
1206 ASSERT_TRUE(up
.size() == 3);
1207 ASSERT_TRUE(up
[0] == 0);
1209 // insert a new pg_upmap
1210 vector
<int32_t> new_up
;
1211 // and remap 1.x to osd.1 only
1212 // this way osd.0 is deemed to be *underfull*
1213 // and osd.1 is deemed to be *overfull*
1214 new_up
.push_back(1);
1216 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1217 pending_inc
.new_pg_upmap
[pgid
] = mempool::osdmap::vector
<int32_t>(
1218 new_up
.begin(), new_up
.end());
1219 osdmap
.apply_incremental(pending_inc
);
1221 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
1222 ASSERT_TRUE(up
.size() == 1);
1223 ASSERT_TRUE(up
[0] == 1);
1229 // build customized crush rule for "pool2"
1230 string host_name
= "host_for_pool_2";
1231 // build a customized host to capture osd.6~11
1232 for (int i
= 6; i
< (int)get_num_osds(); i
++) {
1233 stringstream osd_name
;
1234 vector
<string
> move_to
;
1235 osd_name
<< "osd." << i
;
1236 move_to
.push_back("root=default");
1237 string host_loc
= "host=" + host_name
;
1238 move_to
.push_back(host_loc
);
1239 auto r
= crush_move(osdmap
, osd_name
.str(), move_to
);
1243 get_crush(osdmap
, crush
);
1244 auto host_id
= crush
.get_item_id(host_name
);
1245 ASSERT_TRUE(host_id
< 0);
1246 string rule_name
= "rule_for_pool2";
1247 int rule_type
= pg_pool_t::TYPE_REPLICATED
;
1248 ASSERT_TRUE(!crush
.rule_exists(rule_name
));
1250 for (rno
= 0; rno
< crush
.get_max_rules(); rno
++) {
1251 if (!crush
.rule_exists(rno
) && !crush
.ruleset_exists(rno
))
1257 crush_rule
*rule
= crush_make_rule(steps
, rno
, rule_type
, min_size
, max_size
);
1259 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 5, 0);
1260 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSE_TRIES
, 100, 0);
1261 // always choose osd.0
1262 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, 0, 0);
1263 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1264 // then pick any other random osds
1265 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, host_id
, 0);
1266 crush_rule_set_step(rule
, step
++, CRUSH_RULE_CHOOSELEAF_FIRSTN
, 2, 0);
1267 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1268 ASSERT_TRUE(step
== steps
);
1269 auto r
= crush_add_rule(crush
.get_crush_map(), rule
, rno
);
1270 ASSERT_TRUE(r
>= 0);
1271 crush
.set_rule_name(rno
, rule_name
);
1273 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1274 pending_inc
.crush
.clear();
1275 crush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
1276 osdmap
.apply_incremental(pending_inc
);
1280 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1281 pending_inc
.new_pool_max
= osdmap
.get_pool_max();
1282 auto pool_id
= ++pending_inc
.new_pool_max
;
1284 auto p
= pending_inc
.get_new_pool(pool_id
, &empty
);
1286 // include a single PG
1289 p
->type
= pg_pool_t::TYPE_REPLICATED
;
1290 p
->crush_rule
= rno
;
1291 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
1292 pending_inc
.new_pool_names
[pool_id
] = pool_2
;
1293 osdmap
.apply_incremental(pending_inc
);
1294 ASSERT_TRUE(osdmap
.have_pg_pool(pool_id
));
1295 ASSERT_TRUE(osdmap
.get_pool_name(pool_id
) == pool_2
);
1296 pg_t
rawpg(0, pool_id
);
1297 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
1298 EXPECT_TRUE(!osdmap
.have_pg_upmaps(pgid
));
1301 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
1302 ASSERT_TRUE(up
.size() == 3);
1303 ASSERT_TRUE(up
[0] == 0);
1306 // build a pg_upmap_item that will
1307 // remap pg out from *underfull* osd.0
1308 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
1309 new_pg_upmap_items
.push_back(make_pair(0, 10)); // osd.0 -> osd.10
1310 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1311 pending_inc
.new_pg_upmap_items
[pgid
] =
1312 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
1313 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
1314 osdmap
.apply_incremental(pending_inc
);
1315 ASSERT_TRUE(osdmap
.have_pg_upmaps(pgid
));
1318 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
1319 ASSERT_TRUE(up
.size() == 3);
1320 ASSERT_TRUE(up
[0] == 10);
1326 // require perfect distribution!
1327 auto ret
= g_ceph_context
->_conf
.set_val(
1328 "osd_calc_pg_upmaps_max_stddev", "0");
1330 g_ceph_context
->_conf
.apply_changes(nullptr);
1331 set
<int64_t> only_pools
;
1332 ASSERT_TRUE(pool_1_id
>= 0);
1333 only_pools
.insert(pool_1_id
);
1334 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1335 osdmap
.calc_pg_upmaps(g_ceph_context
,
1336 0, // so we can force optimizing
1340 osdmap
.apply_incremental(pending_inc
);
1344 TEST_F(OSDMapTest
, BUG_40104
) {
1345 // http://tracker.ceph.com/issues/40104
1346 int big_osd_num
= 5000;
1347 int big_pg_num
= 10000;
1348 set_up_map(big_osd_num
, true);
1351 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1352 pending_inc
.new_pool_max
= osdmap
.get_pool_max();
1353 pool_id
= ++pending_inc
.new_pool_max
;
1355 auto p
= pending_inc
.get_new_pool(pool_id
, &empty
);
1358 p
->set_pg_num(big_pg_num
);
1359 p
->set_pgp_num(big_pg_num
);
1360 p
->type
= pg_pool_t::TYPE_REPLICATED
;
1362 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
1363 pending_inc
.new_pool_names
[pool_id
] = "big_pool";
1364 osdmap
.apply_incremental(pending_inc
);
1365 ASSERT_TRUE(osdmap
.have_pg_pool(pool_id
));
1366 ASSERT_TRUE(osdmap
.get_pool_name(pool_id
) == "big_pool");
1369 // generate pg_upmap_items for each pg
1370 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1371 for (int i
= 0; i
< big_pg_num
; i
++) {
1372 pg_t
rawpg(i
, pool_id
);
1373 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
1376 osdmap
.pg_to_raw_up(pgid
, &up
, &up_primary
);
1377 ASSERT_TRUE(up
.size() == 3);
1379 int replaced_by
= random() % big_osd_num
;
1380 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
1381 // note that it might or might not be valid, we don't care
1382 new_pg_upmap_items
.push_back(make_pair(victim
, replaced_by
));
1383 pending_inc
.new_pg_upmap_items
[pgid
] =
1384 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
1385 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
1387 osdmap
.apply_incremental(pending_inc
);
1390 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1391 auto start
= mono_clock::now();
1392 clean_pg_upmaps(g_ceph_context
, osdmap
, pending_inc
);
1393 auto latency
= mono_clock::now() - start
;
1394 std::cout
<< "clean_pg_upmaps (~" << big_pg_num
1395 << " pg_upmap_items) latency:" << timespan_str(latency
)
1400 TEST_F(OSDMapTest
, BUG_42052
) {
1401 // https://tracker.ceph.com/issues/42052
1402 set_up_map(6, true);
1403 const string
pool_name("pool");
1404 // build customized crush rule for "pool"
1406 get_crush(osdmap
, crush
);
1407 string rule_name
= "rule";
1408 int rule_type
= pg_pool_t::TYPE_REPLICATED
;
1409 ASSERT_TRUE(!crush
.rule_exists(rule_name
));
1411 for (rno
= 0; rno
< crush
.get_max_rules(); rno
++) {
1412 if (!crush
.rule_exists(rno
) && !crush
.ruleset_exists(rno
))
1418 crush_rule
*rule
= crush_make_rule(steps
, rno
, rule_type
, min_size
, max_size
);
1420 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 5, 0);
1421 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSE_TRIES
, 100, 0);
1422 // always choose osd.0, osd.1, osd.2
1423 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, 0, 0);
1424 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1425 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, 0, 1);
1426 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1427 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, 0, 2);
1428 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1429 ASSERT_TRUE(step
== steps
);
1430 auto r
= crush_add_rule(crush
.get_crush_map(), rule
, rno
);
1431 ASSERT_TRUE(r
>= 0);
1432 crush
.set_rule_name(rno
, rule_name
);
1434 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1435 pending_inc
.crush
.clear();
1436 crush
.encode(pending_inc
.crush
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
1437 osdmap
.apply_incremental(pending_inc
);
1441 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1442 pending_inc
.new_pool_max
= osdmap
.get_pool_max();
1443 auto pool_id
= ++pending_inc
.new_pool_max
;
1445 auto p
= pending_inc
.get_new_pool(pool_id
, &empty
);
1450 p
->type
= pg_pool_t::TYPE_REPLICATED
;
1451 p
->crush_rule
= rno
;
1452 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
1453 pending_inc
.new_pool_names
[pool_id
] = pool_name
;
1454 osdmap
.apply_incremental(pending_inc
);
1455 ASSERT_TRUE(osdmap
.have_pg_pool(pool_id
));
1456 ASSERT_TRUE(osdmap
.get_pool_name(pool_id
) == pool_name
);
1457 pg_t
rawpg(0, pool_id
);
1458 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
1460 // pg_upmap 1.0 [2,3,5]
1461 vector
<int32_t> new_up
{2,3,5};
1462 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1463 pending_inc
.new_pg_upmap
[pgid
] = mempool::osdmap::vector
<int32_t>(
1464 new_up
.begin(), new_up
.end());
1465 osdmap
.apply_incremental(pending_inc
);
1468 // pg_upmap_items 1.0 [0,3,4,5]
1469 vector
<pair
<int32_t,int32_t>> new_pg_upmap_items
;
1470 new_pg_upmap_items
.push_back(make_pair(0, 3));
1471 new_pg_upmap_items
.push_back(make_pair(4, 5));
1472 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1473 pending_inc
.new_pg_upmap_items
[pgid
] =
1474 mempool::osdmap::vector
<pair
<int32_t,int32_t>>(
1475 new_pg_upmap_items
.begin(), new_pg_upmap_items
.end());
1476 osdmap
.apply_incremental(pending_inc
);
1479 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
1480 clean_pg_upmaps(g_ceph_context
, osdmap
, pending_inc
);
1481 osdmap
.apply_incremental(pending_inc
);
1482 ASSERT_FALSE(osdmap
.have_pg_upmaps(pgid
));
1486 TEST(PGTempMap
, basic
)
1490 for (auto i
=3; i
<1000; ++i
) {
1492 m
.set(x
, {static_cast<int>(i
)});
1496 ASSERT_NE(m
.find(a
), m
.end());
1497 ASSERT_EQ(m
.find(a
), m
.begin());
1498 ASSERT_EQ(m
.find(b
), m
.end());
1499 ASSERT_EQ(998u, m
.size());