1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 #include "gtest/gtest.h"
3 #include "osd/OSDMap.h"
4 #include "osd/OSDMapMapping.h"
6 #include "global/global_context.h"
7 #include "global/global_init.h"
8 #include "common/common_init.h"
9 #include "common/ceph_argparse.h"
15 int main(int argc
, char **argv
) {
16 std::vector
<const char*> args(argv
, argv
+argc
);
18 auto cct
= global_init(nullptr, args
, CEPH_ENTITY_TYPE_CLIENT
,
19 CODE_ENVIRONMENT_UTILITY
,
20 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE
);
21 common_init_finish(g_ceph_context
);
22 // make sure we have 3 copies, or some tests won't work
23 g_ceph_context
->_conf
->set_val("osd_pool_default_size", "3", false);
24 // our map is flat, so just try and split across OSDs, not hosts or whatever
25 g_ceph_context
->_conf
->set_val("osd_crush_chooseleaf_type", "0", false);
26 ::testing::InitGoogleTest(&argc
, argv
);
27 return RUN_ALL_TESTS();
30 class OSDMapTest
: public testing::Test
{
31 const static int num_osds
= 6;
34 OSDMapMapping mapping
;
35 const uint64_t my_ec_pool
= 1;
36 const uint64_t my_rep_pool
= 2;
43 osdmap
.build_simple(g_ceph_context
, 0, fsid
, num_osds
);
44 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
45 pending_inc
.fsid
= osdmap
.get_fsid();
46 entity_addr_t sample_addr
;
48 for (int i
= 0; i
< num_osds
; ++i
) {
49 sample_uuid
.generate_random();
50 sample_addr
.nonce
= i
;
51 pending_inc
.new_state
[i
] = CEPH_OSD_EXISTS
| CEPH_OSD_NEW
;
52 pending_inc
.new_up_client
[i
] = sample_addr
;
53 pending_inc
.new_up_cluster
[i
] = sample_addr
;
54 pending_inc
.new_hb_back_up
[i
] = sample_addr
;
55 pending_inc
.new_hb_front_up
[i
] = sample_addr
;
56 pending_inc
.new_weight
[i
] = CEPH_OSD_IN
;
57 pending_inc
.new_uuid
[i
] = sample_uuid
;
59 osdmap
.apply_incremental(pending_inc
);
61 // Create an EC ruleset and a pool using it
62 int r
= osdmap
.crush
->add_simple_rule(
63 "erasure", "default", "osd", "",
64 "indep", pg_pool_t::TYPE_ERASURE
,
67 OSDMap::Incremental
new_pool_inc(osdmap
.get_epoch() + 1);
68 new_pool_inc
.new_pool_max
= osdmap
.get_pool_max();
69 new_pool_inc
.fsid
= osdmap
.get_fsid();
72 uint64_t pool_id
= ++new_pool_inc
.new_pool_max
;
73 assert(pool_id
== my_ec_pool
);
74 pg_pool_t
*p
= new_pool_inc
.get_new_pool(pool_id
, &empty
);
78 p
->type
= pg_pool_t::TYPE_ERASURE
;
80 new_pool_inc
.new_pool_names
[pool_id
] = "ec";
81 // and a replicated pool
82 pool_id
= ++new_pool_inc
.new_pool_max
;
83 assert(pool_id
== my_rep_pool
);
84 p
= new_pool_inc
.get_new_pool(pool_id
, &empty
);
88 p
->type
= pg_pool_t::TYPE_REPLICATED
;
90 p
->set_flag(pg_pool_t::FLAG_HASHPSPOOL
);
91 new_pool_inc
.new_pool_names
[pool_id
] = "reppool";
92 osdmap
.apply_incremental(new_pool_inc
);
94 unsigned int get_num_osds() { return num_osds
; }
96 void test_mappings(int pool
,
100 vector
<int> *primary
) {
101 mapping
.update(osdmap
);
102 for (int i
=0; i
<num
; ++i
) {
103 vector
<int> up
, acting
;
104 int up_primary
, acting_primary
;
106 osdmap
.pg_to_up_acting_osds(pgid
,
107 &up
, &up_primary
, &acting
, &acting_primary
);
108 for (unsigned j
=0; j
<acting
.size(); ++j
)
111 (*first
)[acting
[0]]++;
112 if (acting_primary
>= 0)
113 (*primary
)[acting_primary
]++;
115 // compare to precalc mapping
116 vector
<int> up2
, acting2
;
117 int up_primary2
, acting_primary2
;
118 pgid
= osdmap
.raw_pg_to_pg(pgid
);
119 mapping
.get(pgid
, &up2
, &up_primary2
, &acting2
, &acting_primary2
);
121 ASSERT_EQ(up_primary
, up_primary2
);
122 ASSERT_EQ(acting
, acting2
);
123 ASSERT_EQ(acting_primary
, acting_primary2
);
125 cout
<< "any: " << *any
<< std::endl
;;
126 cout
<< "first: " << *first
<< std::endl
;;
127 cout
<< "primary: " << *primary
<< std::endl
;;
131 TEST_F(OSDMapTest
, Create
) {
133 ASSERT_EQ(get_num_osds(), (unsigned)osdmap
.get_max_osd());
134 ASSERT_EQ(get_num_osds(), osdmap
.get_num_in_osds());
137 TEST_F(OSDMapTest
, Features
) {
140 uint64_t features
= osdmap
.get_features(CEPH_ENTITY_TYPE_OSD
, NULL
);
141 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES
);
142 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES2
);
143 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES3
);
144 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_V2
);
145 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_ERASURE_CODES
);
146 ASSERT_TRUE(features
& CEPH_FEATURE_OSDHASHPSPOOL
);
147 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_PRIMARY_AFFINITY
);
149 // clients have a slightly different view
150 features
= osdmap
.get_features(CEPH_ENTITY_TYPE_CLIENT
, NULL
);
151 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES
);
152 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES2
);
153 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES3
);
154 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_V2
);
155 ASSERT_FALSE(features
& CEPH_FEATURE_OSD_ERASURE_CODES
); // dont' need this
156 ASSERT_TRUE(features
& CEPH_FEATURE_OSDHASHPSPOOL
);
157 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_PRIMARY_AFFINITY
);
159 // remove teh EC pool, but leave the rule. add primary affinity.
161 OSDMap::Incremental
new_pool_inc(osdmap
.get_epoch() + 1);
162 new_pool_inc
.old_pools
.insert(osdmap
.lookup_pg_pool_name("ec"));
163 new_pool_inc
.new_primary_affinity
[0] = 0x8000;
164 osdmap
.apply_incremental(new_pool_inc
);
167 features
= osdmap
.get_features(CEPH_ENTITY_TYPE_MON
, NULL
);
168 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES
);
169 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES2
);
170 ASSERT_TRUE(features
& CEPH_FEATURE_CRUSH_TUNABLES3
); // shared bit with primary affinity
171 ASSERT_FALSE(features
& CEPH_FEATURE_CRUSH_V2
);
172 ASSERT_FALSE(features
& CEPH_FEATURE_OSD_ERASURE_CODES
);
173 ASSERT_TRUE(features
& CEPH_FEATURE_OSDHASHPSPOOL
);
174 ASSERT_TRUE(features
& CEPH_FEATURE_OSD_PRIMARY_AFFINITY
);
176 // FIXME: test tiering feature bits
179 TEST_F(OSDMapTest
, MapPG
) {
182 std::cerr
<< " osdmap.pool_max==" << osdmap
.get_pool_max() << std::endl
;
183 pg_t
rawpg(0, my_rep_pool
, -1);
184 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
185 vector
<int> up_osds
, acting_osds
;
186 int up_primary
, acting_primary
;
188 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
189 &acting_osds
, &acting_primary
);
191 vector
<int> old_up_osds
, old_acting_osds
;
192 osdmap
.pg_to_up_acting_osds(pgid
, old_up_osds
, old_acting_osds
);
193 ASSERT_EQ(old_up_osds
, up_osds
);
194 ASSERT_EQ(old_acting_osds
, acting_osds
);
196 ASSERT_EQ(osdmap
.get_pg_pool(my_rep_pool
)->get_size(), up_osds
.size());
199 TEST_F(OSDMapTest
, MapFunctionsMatch
) {
200 // TODO: make sure pg_to_up_acting_osds and pg_to_acting_osds match
202 pg_t
rawpg(0, my_rep_pool
, -1);
203 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
204 vector
<int> up_osds
, acting_osds
;
205 int up_primary
, acting_primary
;
207 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
208 &acting_osds
, &acting_primary
);
210 vector
<int> up_osds_two
, acting_osds_two
;
212 osdmap
.pg_to_up_acting_osds(pgid
, up_osds_two
, acting_osds_two
);
214 ASSERT_EQ(up_osds
, up_osds_two
);
215 ASSERT_EQ(acting_osds
, acting_osds_two
);
217 int acting_primary_two
;
218 osdmap
.pg_to_acting_osds(pgid
, &acting_osds_two
, &acting_primary_two
);
219 EXPECT_EQ(acting_osds
, acting_osds_two
);
220 EXPECT_EQ(acting_primary
, acting_primary_two
);
221 osdmap
.pg_to_acting_osds(pgid
, acting_osds_two
);
222 EXPECT_EQ(acting_osds
, acting_osds_two
);
225 /** This test must be removed or modified appropriately when we allow
226 * other ways to specify a primary. */
227 TEST_F(OSDMapTest
, PrimaryIsFirst
) {
230 pg_t
rawpg(0, my_rep_pool
, -1);
231 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
232 vector
<int> up_osds
, acting_osds
;
233 int up_primary
, acting_primary
;
235 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
236 &acting_osds
, &acting_primary
);
237 EXPECT_EQ(up_osds
[0], up_primary
);
238 EXPECT_EQ(acting_osds
[0], acting_primary
);
241 TEST_F(OSDMapTest
, PGTempRespected
) {
244 pg_t
rawpg(0, my_rep_pool
, -1);
245 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
246 vector
<int> up_osds
, acting_osds
;
247 int up_primary
, acting_primary
;
249 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
250 &acting_osds
, &acting_primary
);
252 // copy and swap first and last element in acting_osds
253 vector
<int> new_acting_osds(acting_osds
);
254 int first
= new_acting_osds
[0];
255 new_acting_osds
[0] = *new_acting_osds
.rbegin();
256 *new_acting_osds
.rbegin() = first
;
258 // apply pg_temp to osdmap
259 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
260 pgtemp_map
.new_pg_temp
[pgid
] = mempool::osdmap::vector
<int>(
261 new_acting_osds
.begin(), new_acting_osds
.end());
262 osdmap
.apply_incremental(pgtemp_map
);
264 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
265 &acting_osds
, &acting_primary
);
266 EXPECT_EQ(new_acting_osds
, acting_osds
);
269 TEST_F(OSDMapTest
, PrimaryTempRespected
) {
272 pg_t
rawpg(0, my_rep_pool
, -1);
273 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
275 vector
<int> acting_osds
;
276 int up_primary
, acting_primary
;
278 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
279 &acting_osds
, &acting_primary
);
281 // make second OSD primary via incremental
282 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
283 pgtemp_map
.new_primary_temp
[pgid
] = acting_osds
[1];
284 osdmap
.apply_incremental(pgtemp_map
);
286 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
287 &acting_osds
, &acting_primary
);
288 EXPECT_EQ(acting_primary
, acting_osds
[1]);
291 TEST_F(OSDMapTest
, CleanTemps
) {
294 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
295 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 2);
296 pg_t pga
= osdmap
.raw_pg_to_pg(pg_t(0, my_rep_pool
));
298 vector
<int> up_osds
, acting_osds
;
299 int up_primary
, acting_primary
;
300 osdmap
.pg_to_up_acting_osds(pga
, &up_osds
, &up_primary
,
301 &acting_osds
, &acting_primary
);
302 pgtemp_map
.new_pg_temp
[pga
] = mempool::osdmap::vector
<int>(
303 up_osds
.begin(), up_osds
.end());
304 pgtemp_map
.new_primary_temp
[pga
] = up_primary
;
306 pg_t pgb
= osdmap
.raw_pg_to_pg(pg_t(1, my_rep_pool
));
308 vector
<int> up_osds
, acting_osds
;
309 int up_primary
, acting_primary
;
310 osdmap
.pg_to_up_acting_osds(pgb
, &up_osds
, &up_primary
,
311 &acting_osds
, &acting_primary
);
312 pending_inc
.new_pg_temp
[pgb
] = mempool::osdmap::vector
<int>(
313 up_osds
.begin(), up_osds
.end());
314 pending_inc
.new_primary_temp
[pgb
] = up_primary
;
317 osdmap
.apply_incremental(pgtemp_map
);
319 OSDMap::clean_temps(g_ceph_context
, osdmap
, &pending_inc
);
321 EXPECT_TRUE(pending_inc
.new_pg_temp
.count(pga
) &&
322 pending_inc
.new_pg_temp
[pga
].size() == 0);
323 EXPECT_EQ(-1, pending_inc
.new_primary_temp
[pga
]);
325 EXPECT_TRUE(!pending_inc
.new_pg_temp
.count(pgb
) &&
326 !pending_inc
.new_primary_temp
.count(pgb
));
329 TEST_F(OSDMapTest
, KeepsNecessaryTemps
) {
332 pg_t
rawpg(0, my_rep_pool
, -1);
333 pg_t pgid
= osdmap
.raw_pg_to_pg(rawpg
);
334 vector
<int> up_osds
, acting_osds
;
335 int up_primary
, acting_primary
;
337 osdmap
.pg_to_up_acting_osds(pgid
, &up_osds
, &up_primary
,
338 &acting_osds
, &acting_primary
);
340 // find unused OSD and stick it in there
341 OSDMap::Incremental
pgtemp_map(osdmap
.get_epoch() + 1);
342 // find an unused osd and put it in place of the first one
344 for(; i
!= (int)get_num_osds(); ++i
) {
346 for (vector
<int>::iterator osd_it
= up_osds
.begin();
347 osd_it
!= up_osds
.end();
359 if (i
== (int)get_num_osds())
360 FAIL() << "did not find unused OSD for temp mapping";
362 pgtemp_map
.new_pg_temp
[pgid
] = mempool::osdmap::vector
<int>(
363 up_osds
.begin(), up_osds
.end());
364 pgtemp_map
.new_primary_temp
[pgid
] = up_osds
[1];
365 osdmap
.apply_incremental(pgtemp_map
);
367 OSDMap::Incremental
pending_inc(osdmap
.get_epoch() + 1);
369 OSDMap::clean_temps(g_ceph_context
, osdmap
, &pending_inc
);
370 EXPECT_FALSE(pending_inc
.new_pg_temp
.count(pgid
));
371 EXPECT_FALSE(pending_inc
.new_primary_temp
.count(pgid
));
374 TEST_F(OSDMapTest
, PrimaryAffinity
) {
377 int n
= get_num_osds();
378 for (map
<int64_t,pg_pool_t
>::const_iterator p
= osdmap
.get_pools().begin();
379 p
!= osdmap
.get_pools().end();
382 int expect_primary
= 10000 / n
;
383 cout
<< "pool " << pool
<< " size " << (int)p
->second
.size
384 << " expect_primary " << expect_primary
<< std::endl
;
386 vector
<int> any(n
, 0);
387 vector
<int> first(n
, 0);
388 vector
<int> primary(n
, 0);
389 test_mappings(pool
, 10000, &any
, &first
, &primary
);
390 for (int i
=0; i
<n
; ++i
) {
391 ASSERT_LT(0, any
[i
]);
392 ASSERT_LT(0, first
[i
]);
393 ASSERT_LT(0, primary
[i
]);
397 osdmap
.set_primary_affinity(0, 0);
398 osdmap
.set_primary_affinity(1, 0);
400 vector
<int> any(n
, 0);
401 vector
<int> first(n
, 0);
402 vector
<int> primary(n
, 0);
403 test_mappings(pool
, 10000, &any
, &first
, &primary
);
404 for (int i
=0; i
<n
; ++i
) {
405 ASSERT_LT(0, any
[i
]);
407 ASSERT_LT(0, first
[i
]);
408 ASSERT_LT(0, primary
[i
]);
410 if (p
->second
.is_replicated()) {
411 ASSERT_EQ(0, first
[i
]);
413 ASSERT_EQ(0, primary
[i
]);
418 osdmap
.set_primary_affinity(0, 0x8000);
419 osdmap
.set_primary_affinity(1, 0);
421 vector
<int> any(n
, 0);
422 vector
<int> first(n
, 0);
423 vector
<int> primary(n
, 0);
424 test_mappings(pool
, 10000, &any
, &first
, &primary
);
425 int expect
= (10000 / (n
-2)) / 2; // half weight
426 cout
<< "expect " << expect
<< std::endl
;
427 for (int i
=0; i
<n
; ++i
) {
428 ASSERT_LT(0, any
[i
]);
430 ASSERT_LT(0, first
[i
]);
431 ASSERT_LT(0, primary
[i
]);
433 if (p
->second
.is_replicated()) {
434 ASSERT_EQ(0, first
[i
]);
436 ASSERT_EQ(0, primary
[i
]);
438 ASSERT_LT(expect
*2/3, primary
[0]);
439 ASSERT_GT(expect
*4/3, primary
[0]);
444 osdmap
.set_primary_affinity(0, 0x10000);
445 osdmap
.set_primary_affinity(1, 0x10000);
449 TEST(PGTempMap
, basic
)
453 for (auto i
=3; i
<1000; ++i
) {
455 m
.set(x
, {static_cast<int>(i
)});
459 ASSERT_NE(m
.find(a
), m
.end());
460 ASSERT_EQ(m
.find(a
), m
.begin());
461 ASSERT_EQ(m
.find(b
), m
.end());
462 ASSERT_EQ(998u, m
.size());