]> git.proxmox.com Git - ceph.git/blob - ceph/src/test/osd/TestOSDMap.cc
update source to 12.2.11
[ceph.git] / ceph / src / test / osd / TestOSDMap.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 #include "gtest/gtest.h"
3 #include "osd/OSDMap.h"
4 #include "osd/OSDMapMapping.h"
5
6 #include "global/global_context.h"
7 #include "global/global_init.h"
8 #include "common/common_init.h"
9 #include "common/ceph_argparse.h"
10
11 #include <iostream>
12
13 using namespace std;
14
15 int main(int argc, char **argv) {
16 std::vector<const char*> args(argv, argv+argc);
17 env_to_vec(args);
18 auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT,
19 CODE_ENVIRONMENT_UTILITY,
20 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
21 common_init_finish(g_ceph_context);
22 // make sure we have 3 copies, or some tests won't work
23 g_ceph_context->_conf->set_val("osd_pool_default_size", "3", false);
24 // our map is flat, so just try and split across OSDs, not hosts or whatever
25 g_ceph_context->_conf->set_val("osd_crush_chooseleaf_type", "0", false);
26 ::testing::InitGoogleTest(&argc, argv);
27 return RUN_ALL_TESTS();
28 }
29
30 class OSDMapTest : public testing::Test {
31 const static int num_osds = 6;
32 public:
33 OSDMap osdmap;
34 OSDMapMapping mapping;
35 const uint64_t my_ec_pool = 1;
36 const uint64_t my_rep_pool = 2;
37
38
39 OSDMapTest() {}
40
41 void set_up_map() {
42 uuid_d fsid;
43 osdmap.build_simple(g_ceph_context, 0, fsid, num_osds);
44 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
45 pending_inc.fsid = osdmap.get_fsid();
46 entity_addr_t sample_addr;
47 uuid_d sample_uuid;
48 for (int i = 0; i < num_osds; ++i) {
49 sample_uuid.generate_random();
50 sample_addr.nonce = i;
51 pending_inc.new_state[i] = CEPH_OSD_EXISTS | CEPH_OSD_NEW;
52 pending_inc.new_up_client[i] = sample_addr;
53 pending_inc.new_up_cluster[i] = sample_addr;
54 pending_inc.new_hb_back_up[i] = sample_addr;
55 pending_inc.new_hb_front_up[i] = sample_addr;
56 pending_inc.new_weight[i] = CEPH_OSD_IN;
57 pending_inc.new_uuid[i] = sample_uuid;
58 }
59 osdmap.apply_incremental(pending_inc);
60
61 // Create an EC ruleset and a pool using it
62 int r = osdmap.crush->add_simple_rule(
63 "erasure", "default", "osd", "",
64 "indep", pg_pool_t::TYPE_ERASURE,
65 &cerr);
66
67 OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
68 new_pool_inc.new_pool_max = osdmap.get_pool_max();
69 new_pool_inc.fsid = osdmap.get_fsid();
70 pg_pool_t empty;
71 // make an ec pool
72 uint64_t pool_id = ++new_pool_inc.new_pool_max;
73 assert(pool_id == my_ec_pool);
74 pg_pool_t *p = new_pool_inc.get_new_pool(pool_id, &empty);
75 p->size = 3;
76 p->set_pg_num(64);
77 p->set_pgp_num(64);
78 p->type = pg_pool_t::TYPE_ERASURE;
79 p->crush_rule = r;
80 new_pool_inc.new_pool_names[pool_id] = "ec";
81 // and a replicated pool
82 pool_id = ++new_pool_inc.new_pool_max;
83 assert(pool_id == my_rep_pool);
84 p = new_pool_inc.get_new_pool(pool_id, &empty);
85 p->size = 3;
86 p->set_pg_num(64);
87 p->set_pgp_num(64);
88 p->type = pg_pool_t::TYPE_REPLICATED;
89 p->crush_rule = 0;
90 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
91 new_pool_inc.new_pool_names[pool_id] = "reppool";
92 osdmap.apply_incremental(new_pool_inc);
93 }
94 unsigned int get_num_osds() { return num_osds; }
95 void get_crush(CrushWrapper& newcrush) {
96 bufferlist bl;
97 osdmap.crush->encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT);
98 bufferlist::iterator p = bl.begin();
99 newcrush.decode(p);
100 }
101 int crush_move(const string &name, const vector<string> &argvec) {
102 map<string,string> loc;
103 CrushWrapper::parse_loc_map(argvec, &loc);
104 CrushWrapper newcrush;
105 get_crush(newcrush);
106 if (!newcrush.name_exists(name)) {
107 return -ENOENT;
108 }
109 int id = newcrush.get_item_id(name);
110 int err;
111 if (!newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) {
112 if (id >= 0) {
113 err = newcrush.create_or_move_item(g_ceph_context, id, 0, name, loc);
114 } else {
115 err = newcrush.move_bucket(g_ceph_context, id, loc);
116 }
117 if (err >= 0) {
118 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
119 pending_inc.crush.clear();
120 newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
121 osdmap.apply_incremental(pending_inc);
122 err = 0;
123 }
124 } else {
125 // already there
126 err = 0;
127 }
128 return err;
129 }
130 int crush_rule_create_replicated(const string &name,
131 const string &root,
132 const string &type) {
133 if (osdmap.crush->rule_exists(name)) {
134 return osdmap.crush->get_rule_id(name);
135 }
136 CrushWrapper newcrush;
137 get_crush(newcrush);
138 string device_class;
139 stringstream ss;
140 int ruleno = newcrush.add_simple_rule(
141 name, root, type, device_class,
142 "firstn", pg_pool_t::TYPE_REPLICATED, &ss);
143 if (ruleno >= 0) {
144 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
145 pending_inc.crush.clear();
146 newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
147 osdmap.apply_incremental(pending_inc);
148 }
149 return ruleno;
150 }
151 void test_mappings(int pool,
152 int num,
153 vector<int> *any,
154 vector<int> *first,
155 vector<int> *primary) {
156 mapping.update(osdmap);
157 for (int i=0; i<num; ++i) {
158 vector<int> up, acting;
159 int up_primary, acting_primary;
160 pg_t pgid(i, pool);
161 osdmap.pg_to_up_acting_osds(pgid,
162 &up, &up_primary, &acting, &acting_primary);
163 for (unsigned j=0; j<acting.size(); ++j)
164 (*any)[acting[j]]++;
165 if (!acting.empty())
166 (*first)[acting[0]]++;
167 if (acting_primary >= 0)
168 (*primary)[acting_primary]++;
169
170 // compare to precalc mapping
171 vector<int> up2, acting2;
172 int up_primary2, acting_primary2;
173 pgid = osdmap.raw_pg_to_pg(pgid);
174 mapping.get(pgid, &up2, &up_primary2, &acting2, &acting_primary2);
175 ASSERT_EQ(up, up2);
176 ASSERT_EQ(up_primary, up_primary2);
177 ASSERT_EQ(acting, acting2);
178 ASSERT_EQ(acting_primary, acting_primary2);
179 }
180 cout << "any: " << *any << std::endl;;
181 cout << "first: " << *first << std::endl;;
182 cout << "primary: " << *primary << std::endl;;
183 }
184 };
185
186 TEST_F(OSDMapTest, Create) {
187 set_up_map();
188 ASSERT_EQ(get_num_osds(), (unsigned)osdmap.get_max_osd());
189 ASSERT_EQ(get_num_osds(), osdmap.get_num_in_osds());
190 }
191
192 TEST_F(OSDMapTest, Features) {
193 // with EC pool
194 set_up_map();
195 uint64_t features = osdmap.get_features(CEPH_ENTITY_TYPE_OSD, NULL);
196 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
197 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
198 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3);
199 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2);
200 ASSERT_TRUE(features & CEPH_FEATURE_OSD_ERASURE_CODES);
201 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
202 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
203
204 // clients have a slightly different view
205 features = osdmap.get_features(CEPH_ENTITY_TYPE_CLIENT, NULL);
206 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
207 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
208 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3);
209 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2);
210 ASSERT_FALSE(features & CEPH_FEATURE_OSD_ERASURE_CODES); // dont' need this
211 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
212 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
213
214 // remove teh EC pool, but leave the rule. add primary affinity.
215 {
216 OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
217 new_pool_inc.old_pools.insert(osdmap.lookup_pg_pool_name("ec"));
218 new_pool_inc.new_primary_affinity[0] = 0x8000;
219 osdmap.apply_incremental(new_pool_inc);
220 }
221
222 features = osdmap.get_features(CEPH_ENTITY_TYPE_MON, NULL);
223 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
224 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
225 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); // shared bit with primary affinity
226 ASSERT_FALSE(features & CEPH_FEATURE_CRUSH_V2);
227 ASSERT_FALSE(features & CEPH_FEATURE_OSD_ERASURE_CODES);
228 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
229 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
230
231 // FIXME: test tiering feature bits
232 }
233
234 TEST_F(OSDMapTest, MapPG) {
235 set_up_map();
236
237 std::cerr << " osdmap.pool_max==" << osdmap.get_pool_max() << std::endl;
238 pg_t rawpg(0, my_rep_pool, -1);
239 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
240 vector<int> up_osds, acting_osds;
241 int up_primary, acting_primary;
242
243 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
244 &acting_osds, &acting_primary);
245
246 vector<int> old_up_osds, old_acting_osds;
247 osdmap.pg_to_up_acting_osds(pgid, old_up_osds, old_acting_osds);
248 ASSERT_EQ(old_up_osds, up_osds);
249 ASSERT_EQ(old_acting_osds, acting_osds);
250
251 ASSERT_EQ(osdmap.get_pg_pool(my_rep_pool)->get_size(), up_osds.size());
252 }
253
254 TEST_F(OSDMapTest, MapFunctionsMatch) {
255 // TODO: make sure pg_to_up_acting_osds and pg_to_acting_osds match
256 set_up_map();
257 pg_t rawpg(0, my_rep_pool, -1);
258 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
259 vector<int> up_osds, acting_osds;
260 int up_primary, acting_primary;
261
262 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
263 &acting_osds, &acting_primary);
264
265 vector<int> up_osds_two, acting_osds_two;
266
267 osdmap.pg_to_up_acting_osds(pgid, up_osds_two, acting_osds_two);
268
269 ASSERT_EQ(up_osds, up_osds_two);
270 ASSERT_EQ(acting_osds, acting_osds_two);
271
272 int acting_primary_two;
273 osdmap.pg_to_acting_osds(pgid, &acting_osds_two, &acting_primary_two);
274 EXPECT_EQ(acting_osds, acting_osds_two);
275 EXPECT_EQ(acting_primary, acting_primary_two);
276 osdmap.pg_to_acting_osds(pgid, acting_osds_two);
277 EXPECT_EQ(acting_osds, acting_osds_two);
278 }
279
280 /** This test must be removed or modified appropriately when we allow
281 * other ways to specify a primary. */
282 TEST_F(OSDMapTest, PrimaryIsFirst) {
283 set_up_map();
284
285 pg_t rawpg(0, my_rep_pool, -1);
286 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
287 vector<int> up_osds, acting_osds;
288 int up_primary, acting_primary;
289
290 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
291 &acting_osds, &acting_primary);
292 EXPECT_EQ(up_osds[0], up_primary);
293 EXPECT_EQ(acting_osds[0], acting_primary);
294 }
295
296 TEST_F(OSDMapTest, PGTempRespected) {
297 set_up_map();
298
299 pg_t rawpg(0, my_rep_pool, -1);
300 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
301 vector<int> up_osds, acting_osds;
302 int up_primary, acting_primary;
303
304 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
305 &acting_osds, &acting_primary);
306
307 // copy and swap first and last element in acting_osds
308 vector<int> new_acting_osds(acting_osds);
309 int first = new_acting_osds[0];
310 new_acting_osds[0] = *new_acting_osds.rbegin();
311 *new_acting_osds.rbegin() = first;
312
313 // apply pg_temp to osdmap
314 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
315 pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>(
316 new_acting_osds.begin(), new_acting_osds.end());
317 osdmap.apply_incremental(pgtemp_map);
318
319 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
320 &acting_osds, &acting_primary);
321 EXPECT_EQ(new_acting_osds, acting_osds);
322 }
323
324 TEST_F(OSDMapTest, PrimaryTempRespected) {
325 set_up_map();
326
327 pg_t rawpg(0, my_rep_pool, -1);
328 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
329 vector<int> up_osds;
330 vector<int> acting_osds;
331 int up_primary, acting_primary;
332
333 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
334 &acting_osds, &acting_primary);
335
336 // make second OSD primary via incremental
337 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
338 pgtemp_map.new_primary_temp[pgid] = acting_osds[1];
339 osdmap.apply_incremental(pgtemp_map);
340
341 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
342 &acting_osds, &acting_primary);
343 EXPECT_EQ(acting_primary, acting_osds[1]);
344 }
345
346 TEST_F(OSDMapTest, CleanTemps) {
347 set_up_map();
348
349 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
350 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 2);
351 pg_t pga = osdmap.raw_pg_to_pg(pg_t(0, my_rep_pool));
352 {
353 vector<int> up_osds, acting_osds;
354 int up_primary, acting_primary;
355 osdmap.pg_to_up_acting_osds(pga, &up_osds, &up_primary,
356 &acting_osds, &acting_primary);
357 pgtemp_map.new_pg_temp[pga] = mempool::osdmap::vector<int>(
358 up_osds.begin(), up_osds.end());
359 pgtemp_map.new_primary_temp[pga] = up_primary;
360 }
361 pg_t pgb = osdmap.raw_pg_to_pg(pg_t(1, my_rep_pool));
362 {
363 vector<int> up_osds, acting_osds;
364 int up_primary, acting_primary;
365 osdmap.pg_to_up_acting_osds(pgb, &up_osds, &up_primary,
366 &acting_osds, &acting_primary);
367 pending_inc.new_pg_temp[pgb] = mempool::osdmap::vector<int>(
368 up_osds.begin(), up_osds.end());
369 pending_inc.new_primary_temp[pgb] = up_primary;
370 }
371
372 osdmap.apply_incremental(pgtemp_map);
373
374 OSDMap::clean_temps(g_ceph_context, osdmap, &pending_inc);
375
376 EXPECT_TRUE(pending_inc.new_pg_temp.count(pga) &&
377 pending_inc.new_pg_temp[pga].size() == 0);
378 EXPECT_EQ(-1, pending_inc.new_primary_temp[pga]);
379
380 EXPECT_TRUE(!pending_inc.new_pg_temp.count(pgb) &&
381 !pending_inc.new_primary_temp.count(pgb));
382 }
383
384 TEST_F(OSDMapTest, KeepsNecessaryTemps) {
385 set_up_map();
386
387 pg_t rawpg(0, my_rep_pool, -1);
388 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
389 vector<int> up_osds, acting_osds;
390 int up_primary, acting_primary;
391
392 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
393 &acting_osds, &acting_primary);
394
395 // find unused OSD and stick it in there
396 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
397 // find an unused osd and put it in place of the first one
398 int i = 0;
399 for(; i != (int)get_num_osds(); ++i) {
400 bool in_use = false;
401 for (vector<int>::iterator osd_it = up_osds.begin();
402 osd_it != up_osds.end();
403 ++osd_it) {
404 if (i == *osd_it) {
405 in_use = true;
406 break;
407 }
408 }
409 if (!in_use) {
410 up_osds[1] = i;
411 break;
412 }
413 }
414 if (i == (int)get_num_osds())
415 FAIL() << "did not find unused OSD for temp mapping";
416
417 pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>(
418 up_osds.begin(), up_osds.end());
419 pgtemp_map.new_primary_temp[pgid] = up_osds[1];
420 osdmap.apply_incremental(pgtemp_map);
421
422 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
423
424 OSDMap::clean_temps(g_ceph_context, osdmap, &pending_inc);
425 EXPECT_FALSE(pending_inc.new_pg_temp.count(pgid));
426 EXPECT_FALSE(pending_inc.new_primary_temp.count(pgid));
427 }
428
429 TEST_F(OSDMapTest, PrimaryAffinity) {
430 set_up_map();
431
432 int n = get_num_osds();
433 for (map<int64_t,pg_pool_t>::const_iterator p = osdmap.get_pools().begin();
434 p != osdmap.get_pools().end();
435 ++p) {
436 int pool = p->first;
437 int expect_primary = 10000 / n;
438 cout << "pool " << pool << " size " << (int)p->second.size
439 << " expect_primary " << expect_primary << std::endl;
440 {
441 vector<int> any(n, 0);
442 vector<int> first(n, 0);
443 vector<int> primary(n, 0);
444 test_mappings(pool, 10000, &any, &first, &primary);
445 for (int i=0; i<n; ++i) {
446 ASSERT_LT(0, any[i]);
447 ASSERT_LT(0, first[i]);
448 ASSERT_LT(0, primary[i]);
449 }
450 }
451
452 osdmap.set_primary_affinity(0, 0);
453 osdmap.set_primary_affinity(1, 0);
454 {
455 vector<int> any(n, 0);
456 vector<int> first(n, 0);
457 vector<int> primary(n, 0);
458 test_mappings(pool, 10000, &any, &first, &primary);
459 for (int i=0; i<n; ++i) {
460 ASSERT_LT(0, any[i]);
461 if (i >= 2) {
462 ASSERT_LT(0, first[i]);
463 ASSERT_LT(0, primary[i]);
464 } else {
465 if (p->second.is_replicated()) {
466 ASSERT_EQ(0, first[i]);
467 }
468 ASSERT_EQ(0, primary[i]);
469 }
470 }
471 }
472
473 osdmap.set_primary_affinity(0, 0x8000);
474 osdmap.set_primary_affinity(1, 0);
475 {
476 vector<int> any(n, 0);
477 vector<int> first(n, 0);
478 vector<int> primary(n, 0);
479 test_mappings(pool, 10000, &any, &first, &primary);
480 int expect = (10000 / (n-2)) / 2; // half weight
481 cout << "expect " << expect << std::endl;
482 for (int i=0; i<n; ++i) {
483 ASSERT_LT(0, any[i]);
484 if (i >= 2) {
485 ASSERT_LT(0, first[i]);
486 ASSERT_LT(0, primary[i]);
487 } else if (i == 1) {
488 if (p->second.is_replicated()) {
489 ASSERT_EQ(0, first[i]);
490 }
491 ASSERT_EQ(0, primary[i]);
492 } else {
493 ASSERT_LT(expect *2/3, primary[0]);
494 ASSERT_GT(expect *4/3, primary[0]);
495 }
496 }
497 }
498
499 osdmap.set_primary_affinity(0, 0x10000);
500 osdmap.set_primary_affinity(1, 0x10000);
501 }
502 }
503
504 TEST_F(OSDMapTest, parse_osd_id_list) {
505 set_up_map();
506 set<int> out;
507 set<int> all;
508 osdmap.get_all_osds(all);
509
510 ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0"}, &out, &cout));
511 ASSERT_EQ(1, out.size());
512 ASSERT_EQ(0, *out.begin());
513
514 ASSERT_EQ(0, osdmap.parse_osd_id_list({"1"}, &out, &cout));
515 ASSERT_EQ(1, out.size());
516 ASSERT_EQ(1, *out.begin());
517
518 ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","osd.1"}, &out, &cout));
519 ASSERT_EQ(2, out.size());
520 ASSERT_EQ(0, *out.begin());
521 ASSERT_EQ(1, *out.rbegin());
522
523 ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","1"}, &out, &cout));
524 ASSERT_EQ(2, out.size());
525 ASSERT_EQ(0, *out.begin());
526 ASSERT_EQ(1, *out.rbegin());
527
528 ASSERT_EQ(0, osdmap.parse_osd_id_list({"*"}, &out, &cout));
529 ASSERT_EQ(all.size(), out.size());
530 ASSERT_EQ(all, out);
531
532 ASSERT_EQ(0, osdmap.parse_osd_id_list({"all"}, &out, &cout));
533 ASSERT_EQ(all, out);
534
535 ASSERT_EQ(0, osdmap.parse_osd_id_list({"any"}, &out, &cout));
536 ASSERT_EQ(all, out);
537
538 ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"foo"}, &out, &cout));
539 ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"-12"}, &out, &cout));
540 }
541
542 TEST_F(OSDMapTest, CleanPGUpmaps) {
543 set_up_map();
544
545 // build a crush rule of type host
546 const int expected_host_num = 3;
547 int osd_per_host = get_num_osds() / expected_host_num;
548 ASSERT_GE(2, osd_per_host);
549 int index = 0;
550 for (int i = 0; i < (int)get_num_osds(); i++) {
551 if (i && i % osd_per_host == 0) {
552 ++index;
553 }
554 stringstream osd_name;
555 stringstream host_name;
556 vector<string> move_to;
557 osd_name << "osd." << i;
558 host_name << "host-" << index;
559 move_to.push_back("root=default");
560 string host_loc = "host=" + host_name.str();
561 move_to.push_back(host_loc);
562 int r = crush_move(osd_name.str(), move_to);
563 ASSERT_EQ(0, r);
564 }
565 const string upmap_rule = "upmap";
566 int upmap_rule_no = crush_rule_create_replicated(
567 upmap_rule, "default", "host");
568 ASSERT_LT(0, upmap_rule_no);
569
570 // create a replicated pool which references the above rule
571 OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
572 new_pool_inc.new_pool_max = osdmap.get_pool_max();
573 new_pool_inc.fsid = osdmap.get_fsid();
574 pg_pool_t empty;
575 uint64_t upmap_pool_id = ++new_pool_inc.new_pool_max;
576 pg_pool_t *p = new_pool_inc.get_new_pool(upmap_pool_id, &empty);
577 p->size = 2;
578 p->set_pg_num(64);
579 p->set_pgp_num(64);
580 p->type = pg_pool_t::TYPE_REPLICATED;
581 p->crush_rule = upmap_rule_no;
582 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
583 new_pool_inc.new_pool_names[upmap_pool_id] = "upmap_pool";
584 osdmap.apply_incremental(new_pool_inc);
585
586 pg_t rawpg(0, upmap_pool_id);
587 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
588 vector<int> up;
589 int up_primary;
590 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
591 ASSERT_LT(1U, up.size());
592 {
593 // validate we won't have two OSDs from a same host
594 int parent_0 = osdmap.crush->get_parent_of_type(up[0],
595 osdmap.crush->get_type_id("host"));
596 int parent_1 = osdmap.crush->get_parent_of_type(up[1],
597 osdmap.crush->get_type_id("host"));
598 ASSERT_TRUE(parent_0 != parent_1);
599 }
600
601 {
602 // cancel stale upmaps
603 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
604 int from = -1;
605 for (int i = 0; i < (int)get_num_osds(); i++) {
606 if (std::find(up.begin(), up.end(), i) == up.end()) {
607 from = i;
608 break;
609 }
610 }
611 ASSERT_TRUE(from >= 0);
612 int to = -1;
613 for (int i = 0; i < (int)get_num_osds(); i++) {
614 if (std::find(up.begin(), up.end(), i) == up.end() && i != from) {
615 to = i;
616 break;
617 }
618 }
619 ASSERT_TRUE(to >= 0);
620 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
621 new_pg_upmap_items.push_back(make_pair(from, to));
622 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
623 pending_inc.new_pg_upmap_items[pgid] =
624 mempool::osdmap::vector<pair<int32_t,int32_t>>(
625 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
626 OSDMap nextmap;
627 nextmap.deepish_copy_from(osdmap);
628 nextmap.apply_incremental(pending_inc);
629 ASSERT_TRUE(nextmap.have_pg_upmaps(pgid));
630 OSDMap::Incremental new_pending_inc(nextmap.get_epoch() + 1);
631 nextmap.clean_pg_upmaps(g_ceph_context, &new_pending_inc);
632 nextmap.apply_incremental(new_pending_inc);
633 ASSERT_TRUE(!nextmap.have_pg_upmaps(pgid));
634 }
635
636 {
637 // https://tracker.ceph.com/issues/37493
638 pg_t ec_pg(0, my_ec_pool);
639 pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg);
640 OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map..
641 int from = -1;
642 int to = -1;
643 {
644 // insert a valid pg_upmap_item
645 vector<int> ec_up;
646 int ec_up_primary;
647 osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary);
648 ASSERT_TRUE(!ec_up.empty());
649 from = *(ec_up.begin());
650 ASSERT_TRUE(from >= 0);
651 for (int i = 0; i < (int)get_num_osds(); i++) {
652 if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) {
653 to = i;
654 break;
655 }
656 }
657 ASSERT_TRUE(to >= 0);
658 ASSERT_TRUE(from != to);
659 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
660 new_pg_upmap_items.push_back(make_pair(from, to));
661 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
662 pending_inc.new_pg_upmap_items[ec_pgid] =
663 mempool::osdmap::vector<pair<int32_t,int32_t>>(
664 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
665 tmpmap.deepish_copy_from(osdmap);
666 tmpmap.apply_incremental(pending_inc);
667 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
668 }
669 {
670 // mark one of the target OSDs of the above pg_upmap_item as down
671 OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
672 pending_inc.new_state[to] = CEPH_OSD_UP;
673 tmpmap.apply_incremental(pending_inc);
674 ASSERT_TRUE(!tmpmap.is_up(to));
675 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
676 }
677 {
678 // confirm *maybe_remove_pg_upmaps* won't do anything bad
679 OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
680 tmpmap.maybe_remove_pg_upmaps(g_ceph_context, tmpmap, &pending_inc);
681 tmpmap.apply_incremental(pending_inc);
682 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
683 }
684 }
685
686 {
687 // http://tracker.ceph.com/issues/37501
688 pg_t ec_pg(0, my_ec_pool);
689 pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg);
690 OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map..
691 int from = -1;
692 int to = -1;
693 {
694 // insert a valid pg_upmap_item
695 vector<int> ec_up;
696 int ec_up_primary;
697 osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary);
698 ASSERT_TRUE(!ec_up.empty());
699 from = *(ec_up.begin());
700 ASSERT_TRUE(from >= 0);
701 for (int i = 0; i < (int)get_num_osds(); i++) {
702 if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) {
703 to = i;
704 break;
705 }
706 }
707 ASSERT_TRUE(to >= 0);
708 ASSERT_TRUE(from != to);
709 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
710 new_pg_upmap_items.push_back(make_pair(from, to));
711 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
712 pending_inc.new_pg_upmap_items[ec_pgid] =
713 mempool::osdmap::vector<pair<int32_t,int32_t>>(
714 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
715 tmpmap.deepish_copy_from(osdmap);
716 tmpmap.apply_incremental(pending_inc);
717 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
718 }
719 {
720 // mark one of the target OSDs of the above pg_upmap_item as out
721 OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
722 pending_inc.new_weight[to] = CEPH_OSD_OUT;
723 tmpmap.apply_incremental(pending_inc);
724 ASSERT_TRUE(tmpmap.is_out(to));
725 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
726 }
727 {
728 // *maybe_remove_pg_upmaps* should be able to remove the above *bad* mapping
729 OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
730 OSDMap nextmap;
731 nextmap.deepish_copy_from(tmpmap);
732 nextmap.maybe_remove_pg_upmaps(g_ceph_context, nextmap, &pending_inc);
733 tmpmap.apply_incremental(pending_inc);
734 ASSERT_TRUE(!tmpmap.have_pg_upmaps(ec_pgid));
735 }
736 }
737
738 {
739 // TEST pg_upmap
740 {
741 // STEP-1: enumerate all children of up[0]'s parent,
742 // replace up[1] with one of them (other than up[0])
743 int parent = osdmap.crush->get_parent_of_type(up[0],
744 osdmap.crush->get_type_id("host"));
745 set<int> candidates;
746 osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent), &candidates);
747 ASSERT_LT(1U, candidates.size());
748 int replaced_by = -1;
749 for (auto c: candidates) {
750 if (c != up[0]) {
751 replaced_by = c;
752 break;
753 }
754 }
755 {
756 // Check we can handle a negative pg_upmap value
757 vector<int32_t> new_pg_upmap;
758 new_pg_upmap.push_back(up[0]);
759 new_pg_upmap.push_back(-823648512);
760 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
761 pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>(
762 new_pg_upmap.begin(), new_pg_upmap.end());
763 osdmap.apply_incremental(pending_inc);
764 vector<int> new_up;
765 int new_up_primary;
766 // crucial call - _apply_upmap should ignore the negative value
767 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
768 }
769 ASSERT_NE(-1, replaced_by);
770 // generate a new pg_upmap item and apply
771 vector<int32_t> new_pg_upmap;
772 new_pg_upmap.push_back(up[0]);
773 new_pg_upmap.push_back(replaced_by); // up[1] -> replaced_by
774 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
775 pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>(
776 new_pg_upmap.begin(), new_pg_upmap.end());
777 osdmap.apply_incremental(pending_inc);
778 {
779 // validate pg_upmap is there
780 vector<int> new_up;
781 int new_up_primary;
782 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
783 ASSERT_TRUE(up.size() == new_up.size());
784 ASSERT_TRUE(new_up[0] == new_pg_upmap[0]);
785 ASSERT_TRUE(new_up[1] == new_pg_upmap[1]);
786 // and we shall have two OSDs from a same host now..
787 int parent_0 = osdmap.crush->get_parent_of_type(new_up[0],
788 osdmap.crush->get_type_id("host"));
789 int parent_1 = osdmap.crush->get_parent_of_type(new_up[1],
790 osdmap.crush->get_type_id("host"));
791 ASSERT_TRUE(parent_0 == parent_1);
792 }
793 }
794 {
795 // STEP-2: apply cure
796 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
797 osdmap.maybe_remove_pg_upmaps(g_ceph_context, osdmap, &pending_inc);
798 osdmap.apply_incremental(pending_inc);
799 {
800 // validate pg_upmap is gone (reverted)
801 vector<int> new_up;
802 int new_up_primary;
803 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
804 ASSERT_TRUE(new_up == up);
805 ASSERT_TRUE(new_up_primary = up_primary);
806 }
807 }
808 }
809
810 {
811 // TEST pg_upmap_items
812 // enumerate all used hosts first
813 set<int> parents;
814 for (auto u: up) {
815 int parent = osdmap.crush->get_parent_of_type(u,
816 osdmap.crush->get_type_id("host"));
817 ASSERT_GT(0, parent);
818 parents.insert(parent);
819 }
820 int candidate_parent = 0;
821 set<int> candidate_children;
822 vector<int> up_after_out;
823 {
824 // STEP-1: try mark out up[1] and all other OSDs from the same host
825 int parent = osdmap.crush->get_parent_of_type(up[1],
826 osdmap.crush->get_type_id("host"));
827 set<int> children;
828 osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent),
829 &children);
830 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
831 for (auto c: children) {
832 pending_inc.new_weight[c] = CEPH_OSD_OUT;
833 }
834 OSDMap tmpmap;
835 tmpmap.deepish_copy_from(osdmap);
836 tmpmap.apply_incremental(pending_inc);
837 vector<int> new_up;
838 int new_up_primary;
839 tmpmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
840 // verify that we'll have OSDs from a different host..
841 int will_choose = -1;
842 for (auto o: new_up) {
843 int parent = tmpmap.crush->get_parent_of_type(o,
844 osdmap.crush->get_type_id("host"));
845 if (!parents.count(parent)) {
846 will_choose = o;
847 candidate_parent = parent; // record
848 break;
849 }
850 }
851 ASSERT_LT(-1, will_choose); // it is an OSD!
852 ASSERT_TRUE(candidate_parent != 0);
853 osdmap.crush->get_leaves(osdmap.crush->get_item_name(candidate_parent),
854 &candidate_children);
855 ASSERT_TRUE(candidate_children.count(will_choose));
856 candidate_children.erase(will_choose);
857 ASSERT_TRUE(!candidate_children.empty());
858 up_after_out = new_up; // needed for verification..
859 }
860 {
861 // Make sure we can handle a negative pg_upmap_item
862 int victim = up[0];
863 int replaced_by = -823648512;
864 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
865 new_pg_upmap_items.push_back(make_pair(victim, replaced_by));
866 // apply
867 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
868 pending_inc.new_pg_upmap_items[pgid] =
869 mempool::osdmap::vector<pair<int32_t,int32_t>>(
870 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
871 osdmap.apply_incremental(pending_inc);
872 vector<int> new_up;
873 int new_up_primary;
874 // crucial call - _apply_upmap should ignore the negative value
875 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
876 }
877 {
878 // STEP-2: generating a new pg_upmap_items entry by
879 // replacing up[0] with one coming from candidate_children
880 int victim = up[0];
881 int replaced_by = *candidate_children.begin();
882 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
883 new_pg_upmap_items.push_back(make_pair(victim, replaced_by));
884 // apply
885 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
886 pending_inc.new_pg_upmap_items[pgid] =
887 mempool::osdmap::vector<pair<int32_t,int32_t>>(
888 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
889 osdmap.apply_incremental(pending_inc);
890 {
891 // validate pg_upmap_items is there
892 vector<int> new_up;
893 int new_up_primary;
894 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
895 ASSERT_TRUE(up.size() == new_up.size());
896 ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), replaced_by) !=
897 new_up.end());
898 // and up[1] too
899 ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), up[1]) !=
900 new_up.end());
901 }
902 }
903 {
904 // STEP-3: mark out up[1] and all other OSDs from the same host
905 int parent = osdmap.crush->get_parent_of_type(up[1],
906 osdmap.crush->get_type_id("host"));
907 set<int> children;
908 osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent),
909 &children);
910 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
911 for (auto c: children) {
912 pending_inc.new_weight[c] = CEPH_OSD_OUT;
913 }
914 osdmap.apply_incremental(pending_inc);
915 {
916 // validate we have two OSDs from the same host now..
917 vector<int> new_up;
918 int new_up_primary;
919 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
920 ASSERT_TRUE(up.size() == new_up.size());
921 int parent_0 = osdmap.crush->get_parent_of_type(new_up[0],
922 osdmap.crush->get_type_id("host"));
923 int parent_1 = osdmap.crush->get_parent_of_type(new_up[1],
924 osdmap.crush->get_type_id("host"));
925 ASSERT_TRUE(parent_0 == parent_1);
926 }
927 }
928 {
929 // STEP-4: apply cure
930 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
931 osdmap.maybe_remove_pg_upmaps(g_ceph_context, osdmap, &pending_inc);
932 osdmap.apply_incremental(pending_inc);
933 {
934 // validate pg_upmap_items is gone (reverted)
935 vector<int> new_up;
936 int new_up_primary;
937 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
938 ASSERT_TRUE(new_up == up_after_out);
939 }
940 }
941 }
942 }
943
944 TEST(PGTempMap, basic)
945 {
946 PGTempMap m;
947 pg_t a(1,1);
948 for (auto i=3; i<1000; ++i) {
949 pg_t x(i, 1);
950 m.set(x, {static_cast<int>(i)});
951 }
952 pg_t b(2,1);
953 m.set(a, {1, 2});
954 ASSERT_NE(m.find(a), m.end());
955 ASSERT_EQ(m.find(a), m.begin());
956 ASSERT_EQ(m.find(b), m.end());
957 ASSERT_EQ(998u, m.size());
958 }
959