]> git.proxmox.com Git - ceph.git/blob - ceph/src/test/osd/TestOSDMap.cc
update sources to v12.1.1
[ceph.git] / ceph / src / test / osd / TestOSDMap.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 #include "gtest/gtest.h"
3 #include "osd/OSDMap.h"
4 #include "osd/OSDMapMapping.h"
5
6 #include "global/global_context.h"
7 #include "global/global_init.h"
8 #include "common/common_init.h"
9 #include "common/ceph_argparse.h"
10
11 #include <iostream>
12
13 using namespace std;
14
15 int main(int argc, char **argv) {
16 std::vector<const char*> args(argv, argv+argc);
17 env_to_vec(args);
18 auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT,
19 CODE_ENVIRONMENT_UTILITY,
20 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
21 common_init_finish(g_ceph_context);
22 // make sure we have 3 copies, or some tests won't work
23 g_ceph_context->_conf->set_val("osd_pool_default_size", "3", false);
24 // our map is flat, so just try and split across OSDs, not hosts or whatever
25 g_ceph_context->_conf->set_val("osd_crush_chooseleaf_type", "0", false);
26 ::testing::InitGoogleTest(&argc, argv);
27 return RUN_ALL_TESTS();
28 }
29
30 class OSDMapTest : public testing::Test {
31 const static int num_osds = 6;
32 public:
33 OSDMap osdmap;
34 OSDMapMapping mapping;
35 const uint64_t my_ec_pool = 1;
36 const uint64_t my_rep_pool = 2;
37
38
39 OSDMapTest() {}
40
41 void set_up_map() {
42 uuid_d fsid;
43 osdmap.build_simple(g_ceph_context, 0, fsid, num_osds);
44 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
45 pending_inc.fsid = osdmap.get_fsid();
46 entity_addr_t sample_addr;
47 uuid_d sample_uuid;
48 for (int i = 0; i < num_osds; ++i) {
49 sample_uuid.generate_random();
50 sample_addr.nonce = i;
51 pending_inc.new_state[i] = CEPH_OSD_EXISTS | CEPH_OSD_NEW;
52 pending_inc.new_up_client[i] = sample_addr;
53 pending_inc.new_up_cluster[i] = sample_addr;
54 pending_inc.new_hb_back_up[i] = sample_addr;
55 pending_inc.new_hb_front_up[i] = sample_addr;
56 pending_inc.new_weight[i] = CEPH_OSD_IN;
57 pending_inc.new_uuid[i] = sample_uuid;
58 }
59 osdmap.apply_incremental(pending_inc);
60
61 // Create an EC ruleset and a pool using it
62 int r = osdmap.crush->add_simple_rule(
63 "erasure", "default", "osd", "",
64 "indep", pg_pool_t::TYPE_ERASURE,
65 &cerr);
66
67 OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
68 new_pool_inc.new_pool_max = osdmap.get_pool_max();
69 new_pool_inc.fsid = osdmap.get_fsid();
70 pg_pool_t empty;
71 // make an ec pool
72 uint64_t pool_id = ++new_pool_inc.new_pool_max;
73 assert(pool_id == my_ec_pool);
74 pg_pool_t *p = new_pool_inc.get_new_pool(pool_id, &empty);
75 p->size = 3;
76 p->set_pg_num(64);
77 p->set_pgp_num(64);
78 p->type = pg_pool_t::TYPE_ERASURE;
79 p->crush_rule = r;
80 new_pool_inc.new_pool_names[pool_id] = "ec";
81 // and a replicated pool
82 pool_id = ++new_pool_inc.new_pool_max;
83 assert(pool_id == my_rep_pool);
84 p = new_pool_inc.get_new_pool(pool_id, &empty);
85 p->size = 3;
86 p->set_pg_num(64);
87 p->set_pgp_num(64);
88 p->type = pg_pool_t::TYPE_REPLICATED;
89 p->crush_rule = 0;
90 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
91 new_pool_inc.new_pool_names[pool_id] = "reppool";
92 osdmap.apply_incremental(new_pool_inc);
93 }
94 unsigned int get_num_osds() { return num_osds; }
95
96 void test_mappings(int pool,
97 int num,
98 vector<int> *any,
99 vector<int> *first,
100 vector<int> *primary) {
101 mapping.update(osdmap);
102 for (int i=0; i<num; ++i) {
103 vector<int> up, acting;
104 int up_primary, acting_primary;
105 pg_t pgid(i, pool);
106 osdmap.pg_to_up_acting_osds(pgid,
107 &up, &up_primary, &acting, &acting_primary);
108 for (unsigned j=0; j<acting.size(); ++j)
109 (*any)[acting[j]]++;
110 if (!acting.empty())
111 (*first)[acting[0]]++;
112 if (acting_primary >= 0)
113 (*primary)[acting_primary]++;
114
115 // compare to precalc mapping
116 vector<int> up2, acting2;
117 int up_primary2, acting_primary2;
118 pgid = osdmap.raw_pg_to_pg(pgid);
119 mapping.get(pgid, &up2, &up_primary2, &acting2, &acting_primary2);
120 ASSERT_EQ(up, up2);
121 ASSERT_EQ(up_primary, up_primary2);
122 ASSERT_EQ(acting, acting2);
123 ASSERT_EQ(acting_primary, acting_primary2);
124 }
125 cout << "any: " << *any << std::endl;;
126 cout << "first: " << *first << std::endl;;
127 cout << "primary: " << *primary << std::endl;;
128 }
129 };
130
131 TEST_F(OSDMapTest, Create) {
132 set_up_map();
133 ASSERT_EQ(get_num_osds(), (unsigned)osdmap.get_max_osd());
134 ASSERT_EQ(get_num_osds(), osdmap.get_num_in_osds());
135 }
136
137 TEST_F(OSDMapTest, Features) {
138 // with EC pool
139 set_up_map();
140 uint64_t features = osdmap.get_features(CEPH_ENTITY_TYPE_OSD, NULL);
141 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
142 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
143 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3);
144 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2);
145 ASSERT_TRUE(features & CEPH_FEATURE_OSD_ERASURE_CODES);
146 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
147 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
148
149 // clients have a slightly different view
150 features = osdmap.get_features(CEPH_ENTITY_TYPE_CLIENT, NULL);
151 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
152 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
153 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3);
154 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2);
155 ASSERT_FALSE(features & CEPH_FEATURE_OSD_ERASURE_CODES); // dont' need this
156 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
157 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
158
159 // remove teh EC pool, but leave the rule. add primary affinity.
160 {
161 OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
162 new_pool_inc.old_pools.insert(osdmap.lookup_pg_pool_name("ec"));
163 new_pool_inc.new_primary_affinity[0] = 0x8000;
164 osdmap.apply_incremental(new_pool_inc);
165 }
166
167 features = osdmap.get_features(CEPH_ENTITY_TYPE_MON, NULL);
168 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
169 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
170 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); // shared bit with primary affinity
171 ASSERT_FALSE(features & CEPH_FEATURE_CRUSH_V2);
172 ASSERT_FALSE(features & CEPH_FEATURE_OSD_ERASURE_CODES);
173 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
174 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
175
176 // FIXME: test tiering feature bits
177 }
178
179 TEST_F(OSDMapTest, MapPG) {
180 set_up_map();
181
182 std::cerr << " osdmap.pool_max==" << osdmap.get_pool_max() << std::endl;
183 pg_t rawpg(0, my_rep_pool, -1);
184 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
185 vector<int> up_osds, acting_osds;
186 int up_primary, acting_primary;
187
188 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
189 &acting_osds, &acting_primary);
190
191 vector<int> old_up_osds, old_acting_osds;
192 osdmap.pg_to_up_acting_osds(pgid, old_up_osds, old_acting_osds);
193 ASSERT_EQ(old_up_osds, up_osds);
194 ASSERT_EQ(old_acting_osds, acting_osds);
195
196 ASSERT_EQ(osdmap.get_pg_pool(my_rep_pool)->get_size(), up_osds.size());
197 }
198
199 TEST_F(OSDMapTest, MapFunctionsMatch) {
200 // TODO: make sure pg_to_up_acting_osds and pg_to_acting_osds match
201 set_up_map();
202 pg_t rawpg(0, my_rep_pool, -1);
203 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
204 vector<int> up_osds, acting_osds;
205 int up_primary, acting_primary;
206
207 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
208 &acting_osds, &acting_primary);
209
210 vector<int> up_osds_two, acting_osds_two;
211
212 osdmap.pg_to_up_acting_osds(pgid, up_osds_two, acting_osds_two);
213
214 ASSERT_EQ(up_osds, up_osds_two);
215 ASSERT_EQ(acting_osds, acting_osds_two);
216
217 int acting_primary_two;
218 osdmap.pg_to_acting_osds(pgid, &acting_osds_two, &acting_primary_two);
219 EXPECT_EQ(acting_osds, acting_osds_two);
220 EXPECT_EQ(acting_primary, acting_primary_two);
221 osdmap.pg_to_acting_osds(pgid, acting_osds_two);
222 EXPECT_EQ(acting_osds, acting_osds_two);
223 }
224
225 /** This test must be removed or modified appropriately when we allow
226 * other ways to specify a primary. */
227 TEST_F(OSDMapTest, PrimaryIsFirst) {
228 set_up_map();
229
230 pg_t rawpg(0, my_rep_pool, -1);
231 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
232 vector<int> up_osds, acting_osds;
233 int up_primary, acting_primary;
234
235 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
236 &acting_osds, &acting_primary);
237 EXPECT_EQ(up_osds[0], up_primary);
238 EXPECT_EQ(acting_osds[0], acting_primary);
239 }
240
241 TEST_F(OSDMapTest, PGTempRespected) {
242 set_up_map();
243
244 pg_t rawpg(0, my_rep_pool, -1);
245 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
246 vector<int> up_osds, acting_osds;
247 int up_primary, acting_primary;
248
249 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
250 &acting_osds, &acting_primary);
251
252 // copy and swap first and last element in acting_osds
253 vector<int> new_acting_osds(acting_osds);
254 int first = new_acting_osds[0];
255 new_acting_osds[0] = *new_acting_osds.rbegin();
256 *new_acting_osds.rbegin() = first;
257
258 // apply pg_temp to osdmap
259 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
260 pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>(
261 new_acting_osds.begin(), new_acting_osds.end());
262 osdmap.apply_incremental(pgtemp_map);
263
264 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
265 &acting_osds, &acting_primary);
266 EXPECT_EQ(new_acting_osds, acting_osds);
267 }
268
269 TEST_F(OSDMapTest, PrimaryTempRespected) {
270 set_up_map();
271
272 pg_t rawpg(0, my_rep_pool, -1);
273 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
274 vector<int> up_osds;
275 vector<int> acting_osds;
276 int up_primary, acting_primary;
277
278 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
279 &acting_osds, &acting_primary);
280
281 // make second OSD primary via incremental
282 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
283 pgtemp_map.new_primary_temp[pgid] = acting_osds[1];
284 osdmap.apply_incremental(pgtemp_map);
285
286 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
287 &acting_osds, &acting_primary);
288 EXPECT_EQ(acting_primary, acting_osds[1]);
289 }
290
291 TEST_F(OSDMapTest, CleanTemps) {
292 set_up_map();
293
294 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
295 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 2);
296 pg_t pga = osdmap.raw_pg_to_pg(pg_t(0, my_rep_pool));
297 {
298 vector<int> up_osds, acting_osds;
299 int up_primary, acting_primary;
300 osdmap.pg_to_up_acting_osds(pga, &up_osds, &up_primary,
301 &acting_osds, &acting_primary);
302 pgtemp_map.new_pg_temp[pga] = mempool::osdmap::vector<int>(
303 up_osds.begin(), up_osds.end());
304 pgtemp_map.new_primary_temp[pga] = up_primary;
305 }
306 pg_t pgb = osdmap.raw_pg_to_pg(pg_t(1, my_rep_pool));
307 {
308 vector<int> up_osds, acting_osds;
309 int up_primary, acting_primary;
310 osdmap.pg_to_up_acting_osds(pgb, &up_osds, &up_primary,
311 &acting_osds, &acting_primary);
312 pending_inc.new_pg_temp[pgb] = mempool::osdmap::vector<int>(
313 up_osds.begin(), up_osds.end());
314 pending_inc.new_primary_temp[pgb] = up_primary;
315 }
316
317 osdmap.apply_incremental(pgtemp_map);
318
319 OSDMap::clean_temps(g_ceph_context, osdmap, &pending_inc);
320
321 EXPECT_TRUE(pending_inc.new_pg_temp.count(pga) &&
322 pending_inc.new_pg_temp[pga].size() == 0);
323 EXPECT_EQ(-1, pending_inc.new_primary_temp[pga]);
324
325 EXPECT_TRUE(!pending_inc.new_pg_temp.count(pgb) &&
326 !pending_inc.new_primary_temp.count(pgb));
327 }
328
329 TEST_F(OSDMapTest, KeepsNecessaryTemps) {
330 set_up_map();
331
332 pg_t rawpg(0, my_rep_pool, -1);
333 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
334 vector<int> up_osds, acting_osds;
335 int up_primary, acting_primary;
336
337 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
338 &acting_osds, &acting_primary);
339
340 // find unused OSD and stick it in there
341 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
342 // find an unused osd and put it in place of the first one
343 int i = 0;
344 for(; i != (int)get_num_osds(); ++i) {
345 bool in_use = false;
346 for (vector<int>::iterator osd_it = up_osds.begin();
347 osd_it != up_osds.end();
348 ++osd_it) {
349 if (i == *osd_it) {
350 in_use = true;
351 break;
352 }
353 }
354 if (!in_use) {
355 up_osds[1] = i;
356 break;
357 }
358 }
359 if (i == (int)get_num_osds())
360 FAIL() << "did not find unused OSD for temp mapping";
361
362 pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>(
363 up_osds.begin(), up_osds.end());
364 pgtemp_map.new_primary_temp[pgid] = up_osds[1];
365 osdmap.apply_incremental(pgtemp_map);
366
367 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
368
369 OSDMap::clean_temps(g_ceph_context, osdmap, &pending_inc);
370 EXPECT_FALSE(pending_inc.new_pg_temp.count(pgid));
371 EXPECT_FALSE(pending_inc.new_primary_temp.count(pgid));
372 }
373
374 TEST_F(OSDMapTest, PrimaryAffinity) {
375 set_up_map();
376
377 int n = get_num_osds();
378 for (map<int64_t,pg_pool_t>::const_iterator p = osdmap.get_pools().begin();
379 p != osdmap.get_pools().end();
380 ++p) {
381 int pool = p->first;
382 int expect_primary = 10000 / n;
383 cout << "pool " << pool << " size " << (int)p->second.size
384 << " expect_primary " << expect_primary << std::endl;
385 {
386 vector<int> any(n, 0);
387 vector<int> first(n, 0);
388 vector<int> primary(n, 0);
389 test_mappings(pool, 10000, &any, &first, &primary);
390 for (int i=0; i<n; ++i) {
391 ASSERT_LT(0, any[i]);
392 ASSERT_LT(0, first[i]);
393 ASSERT_LT(0, primary[i]);
394 }
395 }
396
397 osdmap.set_primary_affinity(0, 0);
398 osdmap.set_primary_affinity(1, 0);
399 {
400 vector<int> any(n, 0);
401 vector<int> first(n, 0);
402 vector<int> primary(n, 0);
403 test_mappings(pool, 10000, &any, &first, &primary);
404 for (int i=0; i<n; ++i) {
405 ASSERT_LT(0, any[i]);
406 if (i >= 2) {
407 ASSERT_LT(0, first[i]);
408 ASSERT_LT(0, primary[i]);
409 } else {
410 if (p->second.is_replicated()) {
411 ASSERT_EQ(0, first[i]);
412 }
413 ASSERT_EQ(0, primary[i]);
414 }
415 }
416 }
417
418 osdmap.set_primary_affinity(0, 0x8000);
419 osdmap.set_primary_affinity(1, 0);
420 {
421 vector<int> any(n, 0);
422 vector<int> first(n, 0);
423 vector<int> primary(n, 0);
424 test_mappings(pool, 10000, &any, &first, &primary);
425 int expect = (10000 / (n-2)) / 2; // half weight
426 cout << "expect " << expect << std::endl;
427 for (int i=0; i<n; ++i) {
428 ASSERT_LT(0, any[i]);
429 if (i >= 2) {
430 ASSERT_LT(0, first[i]);
431 ASSERT_LT(0, primary[i]);
432 } else if (i == 1) {
433 if (p->second.is_replicated()) {
434 ASSERT_EQ(0, first[i]);
435 }
436 ASSERT_EQ(0, primary[i]);
437 } else {
438 ASSERT_LT(expect *2/3, primary[0]);
439 ASSERT_GT(expect *4/3, primary[0]);
440 }
441 }
442 }
443
444 osdmap.set_primary_affinity(0, 0x10000);
445 osdmap.set_primary_affinity(1, 0x10000);
446 }
447 }
448
449 TEST(PGTempMap, basic)
450 {
451 PGTempMap m;
452 pg_t a(1,1);
453 for (auto i=3; i<1000; ++i) {
454 pg_t x(i, 1);
455 m.set(x, {static_cast<int>(i)});
456 }
457 pg_t b(2,1);
458 m.set(a, {1, 2});
459 ASSERT_NE(m.find(a), m.end());
460 ASSERT_EQ(m.find(a), m.begin());
461 ASSERT_EQ(m.find(b), m.end());
462 ASSERT_EQ(998u, m.size());
463 }