]> git.proxmox.com Git - ceph.git/blame - ceph/src/test/osd/TestOSDMap.cc
import ceph 12.2.12
[ceph.git] / ceph / src / test / osd / TestOSDMap.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2#include "gtest/gtest.h"
3#include "osd/OSDMap.h"
4#include "osd/OSDMapMapping.h"
5
6#include "global/global_context.h"
7#include "global/global_init.h"
8#include "common/common_init.h"
31f18b77 9#include "common/ceph_argparse.h"
7c673cae
FG
10
11#include <iostream>
12
13using namespace std;
14
15int main(int argc, char **argv) {
16 std::vector<const char*> args(argv, argv+argc);
31f18b77 17 env_to_vec(args);
7c673cae
FG
18 auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT,
19 CODE_ENVIRONMENT_UTILITY,
20 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
21 common_init_finish(g_ceph_context);
22 // make sure we have 3 copies, or some tests won't work
23 g_ceph_context->_conf->set_val("osd_pool_default_size", "3", false);
24 // our map is flat, so just try and split across OSDs, not hosts or whatever
25 g_ceph_context->_conf->set_val("osd_crush_chooseleaf_type", "0", false);
26 ::testing::InitGoogleTest(&argc, argv);
27 return RUN_ALL_TESTS();
28}
29
30class OSDMapTest : public testing::Test {
a8e16298 31 int num_osds = 6;
7c673cae
FG
32public:
33 OSDMap osdmap;
34 OSDMapMapping mapping;
224ce89b
WB
35 const uint64_t my_ec_pool = 1;
36 const uint64_t my_rep_pool = 2;
37
7c673cae
FG
38
39 OSDMapTest() {}
40
a8e16298
TL
41 void set_up_map(int new_num_osds = 6, bool no_default_pools = false) {
42 num_osds = new_num_osds;
7c673cae 43 uuid_d fsid;
224ce89b 44 osdmap.build_simple(g_ceph_context, 0, fsid, num_osds);
7c673cae
FG
45 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
46 pending_inc.fsid = osdmap.get_fsid();
47 entity_addr_t sample_addr;
48 uuid_d sample_uuid;
49 for (int i = 0; i < num_osds; ++i) {
50 sample_uuid.generate_random();
51 sample_addr.nonce = i;
52 pending_inc.new_state[i] = CEPH_OSD_EXISTS | CEPH_OSD_NEW;
53 pending_inc.new_up_client[i] = sample_addr;
54 pending_inc.new_up_cluster[i] = sample_addr;
55 pending_inc.new_hb_back_up[i] = sample_addr;
56 pending_inc.new_hb_front_up[i] = sample_addr;
57 pending_inc.new_weight[i] = CEPH_OSD_IN;
58 pending_inc.new_uuid[i] = sample_uuid;
59 }
60 osdmap.apply_incremental(pending_inc);
a8e16298
TL
61 if (no_default_pools) // do not create any default pool(s)
62 return;
7c673cae
FG
63
64 // Create an EC ruleset and a pool using it
31f18b77 65 int r = osdmap.crush->add_simple_rule(
224ce89b 66 "erasure", "default", "osd", "",
31f18b77
FG
67 "indep", pg_pool_t::TYPE_ERASURE,
68 &cerr);
7c673cae
FG
69
70 OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
71 new_pool_inc.new_pool_max = osdmap.get_pool_max();
72 new_pool_inc.fsid = osdmap.get_fsid();
73 pg_pool_t empty;
224ce89b 74 // make an ec pool
7c673cae 75 uint64_t pool_id = ++new_pool_inc.new_pool_max;
224ce89b 76 assert(pool_id == my_ec_pool);
7c673cae
FG
77 pg_pool_t *p = new_pool_inc.get_new_pool(pool_id, &empty);
78 p->size = 3;
79 p->set_pg_num(64);
80 p->set_pgp_num(64);
81 p->type = pg_pool_t::TYPE_ERASURE;
31f18b77 82 p->crush_rule = r;
7c673cae 83 new_pool_inc.new_pool_names[pool_id] = "ec";
224ce89b
WB
84 // and a replicated pool
85 pool_id = ++new_pool_inc.new_pool_max;
86 assert(pool_id == my_rep_pool);
87 p = new_pool_inc.get_new_pool(pool_id, &empty);
88 p->size = 3;
89 p->set_pg_num(64);
90 p->set_pgp_num(64);
91 p->type = pg_pool_t::TYPE_REPLICATED;
92 p->crush_rule = 0;
93 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
94 new_pool_inc.new_pool_names[pool_id] = "reppool";
7c673cae
FG
95 osdmap.apply_incremental(new_pool_inc);
96 }
97 unsigned int get_num_osds() { return num_osds; }
a8e16298 98 void get_crush(const OSDMap& tmap, CrushWrapper& newcrush) {
94b18763 99 bufferlist bl;
a8e16298 100 tmap.crush->encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT);
94b18763
FG
101 bufferlist::iterator p = bl.begin();
102 newcrush.decode(p);
103 }
a8e16298 104 int crush_move(OSDMap& tmap, const string &name, const vector<string> &argvec) {
94b18763
FG
105 map<string,string> loc;
106 CrushWrapper::parse_loc_map(argvec, &loc);
107 CrushWrapper newcrush;
a8e16298 108 get_crush(tmap, newcrush);
94b18763
FG
109 if (!newcrush.name_exists(name)) {
110 return -ENOENT;
111 }
112 int id = newcrush.get_item_id(name);
113 int err;
114 if (!newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) {
115 if (id >= 0) {
116 err = newcrush.create_or_move_item(g_ceph_context, id, 0, name, loc);
117 } else {
118 err = newcrush.move_bucket(g_ceph_context, id, loc);
119 }
120 if (err >= 0) {
a8e16298 121 OSDMap::Incremental pending_inc(tmap.get_epoch() + 1);
94b18763
FG
122 pending_inc.crush.clear();
123 newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
a8e16298 124 tmap.apply_incremental(pending_inc);
94b18763
FG
125 err = 0;
126 }
127 } else {
128 // already there
129 err = 0;
130 }
131 return err;
132 }
133 int crush_rule_create_replicated(const string &name,
134 const string &root,
135 const string &type) {
136 if (osdmap.crush->rule_exists(name)) {
137 return osdmap.crush->get_rule_id(name);
138 }
139 CrushWrapper newcrush;
a8e16298 140 get_crush(osdmap, newcrush);
94b18763
FG
141 string device_class;
142 stringstream ss;
143 int ruleno = newcrush.add_simple_rule(
144 name, root, type, device_class,
145 "firstn", pg_pool_t::TYPE_REPLICATED, &ss);
146 if (ruleno >= 0) {
147 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
148 pending_inc.crush.clear();
149 newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
150 osdmap.apply_incremental(pending_inc);
151 }
152 return ruleno;
153 }
7c673cae
FG
154 void test_mappings(int pool,
155 int num,
156 vector<int> *any,
157 vector<int> *first,
158 vector<int> *primary) {
159 mapping.update(osdmap);
160 for (int i=0; i<num; ++i) {
161 vector<int> up, acting;
162 int up_primary, acting_primary;
163 pg_t pgid(i, pool);
164 osdmap.pg_to_up_acting_osds(pgid,
165 &up, &up_primary, &acting, &acting_primary);
166 for (unsigned j=0; j<acting.size(); ++j)
167 (*any)[acting[j]]++;
168 if (!acting.empty())
169 (*first)[acting[0]]++;
170 if (acting_primary >= 0)
171 (*primary)[acting_primary]++;
172
173 // compare to precalc mapping
174 vector<int> up2, acting2;
175 int up_primary2, acting_primary2;
176 pgid = osdmap.raw_pg_to_pg(pgid);
177 mapping.get(pgid, &up2, &up_primary2, &acting2, &acting_primary2);
178 ASSERT_EQ(up, up2);
179 ASSERT_EQ(up_primary, up_primary2);
180 ASSERT_EQ(acting, acting2);
181 ASSERT_EQ(acting_primary, acting_primary2);
182 }
224ce89b
WB
183 cout << "any: " << *any << std::endl;;
184 cout << "first: " << *first << std::endl;;
185 cout << "primary: " << *primary << std::endl;;
7c673cae
FG
186 }
187};
188
189TEST_F(OSDMapTest, Create) {
190 set_up_map();
191 ASSERT_EQ(get_num_osds(), (unsigned)osdmap.get_max_osd());
192 ASSERT_EQ(get_num_osds(), osdmap.get_num_in_osds());
193}
194
195TEST_F(OSDMapTest, Features) {
196 // with EC pool
197 set_up_map();
198 uint64_t features = osdmap.get_features(CEPH_ENTITY_TYPE_OSD, NULL);
199 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
200 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
201 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3);
202 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2);
203 ASSERT_TRUE(features & CEPH_FEATURE_OSD_ERASURE_CODES);
204 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
205 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
206
207 // clients have a slightly different view
208 features = osdmap.get_features(CEPH_ENTITY_TYPE_CLIENT, NULL);
209 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
210 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
211 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3);
212 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2);
213 ASSERT_FALSE(features & CEPH_FEATURE_OSD_ERASURE_CODES); // dont' need this
214 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
215 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
216
217 // remove teh EC pool, but leave the rule. add primary affinity.
218 {
219 OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
220 new_pool_inc.old_pools.insert(osdmap.lookup_pg_pool_name("ec"));
221 new_pool_inc.new_primary_affinity[0] = 0x8000;
222 osdmap.apply_incremental(new_pool_inc);
223 }
224
225 features = osdmap.get_features(CEPH_ENTITY_TYPE_MON, NULL);
226 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
227 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
228 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); // shared bit with primary affinity
229 ASSERT_FALSE(features & CEPH_FEATURE_CRUSH_V2);
230 ASSERT_FALSE(features & CEPH_FEATURE_OSD_ERASURE_CODES);
231 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
232 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
233
234 // FIXME: test tiering feature bits
235}
236
237TEST_F(OSDMapTest, MapPG) {
238 set_up_map();
239
224ce89b
WB
240 std::cerr << " osdmap.pool_max==" << osdmap.get_pool_max() << std::endl;
241 pg_t rawpg(0, my_rep_pool, -1);
7c673cae
FG
242 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
243 vector<int> up_osds, acting_osds;
244 int up_primary, acting_primary;
245
246 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
247 &acting_osds, &acting_primary);
248
249 vector<int> old_up_osds, old_acting_osds;
250 osdmap.pg_to_up_acting_osds(pgid, old_up_osds, old_acting_osds);
251 ASSERT_EQ(old_up_osds, up_osds);
252 ASSERT_EQ(old_acting_osds, acting_osds);
253
224ce89b 254 ASSERT_EQ(osdmap.get_pg_pool(my_rep_pool)->get_size(), up_osds.size());
7c673cae
FG
255}
256
257TEST_F(OSDMapTest, MapFunctionsMatch) {
258 // TODO: make sure pg_to_up_acting_osds and pg_to_acting_osds match
259 set_up_map();
224ce89b 260 pg_t rawpg(0, my_rep_pool, -1);
7c673cae
FG
261 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
262 vector<int> up_osds, acting_osds;
263 int up_primary, acting_primary;
264
265 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
266 &acting_osds, &acting_primary);
267
268 vector<int> up_osds_two, acting_osds_two;
269
270 osdmap.pg_to_up_acting_osds(pgid, up_osds_two, acting_osds_two);
271
272 ASSERT_EQ(up_osds, up_osds_two);
273 ASSERT_EQ(acting_osds, acting_osds_two);
274
275 int acting_primary_two;
276 osdmap.pg_to_acting_osds(pgid, &acting_osds_two, &acting_primary_two);
277 EXPECT_EQ(acting_osds, acting_osds_two);
278 EXPECT_EQ(acting_primary, acting_primary_two);
279 osdmap.pg_to_acting_osds(pgid, acting_osds_two);
280 EXPECT_EQ(acting_osds, acting_osds_two);
281}
282
283/** This test must be removed or modified appropriately when we allow
284 * other ways to specify a primary. */
285TEST_F(OSDMapTest, PrimaryIsFirst) {
286 set_up_map();
287
224ce89b 288 pg_t rawpg(0, my_rep_pool, -1);
7c673cae
FG
289 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
290 vector<int> up_osds, acting_osds;
291 int up_primary, acting_primary;
292
293 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
294 &acting_osds, &acting_primary);
295 EXPECT_EQ(up_osds[0], up_primary);
296 EXPECT_EQ(acting_osds[0], acting_primary);
297}
298
299TEST_F(OSDMapTest, PGTempRespected) {
300 set_up_map();
301
224ce89b 302 pg_t rawpg(0, my_rep_pool, -1);
7c673cae
FG
303 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
304 vector<int> up_osds, acting_osds;
305 int up_primary, acting_primary;
306
307 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
308 &acting_osds, &acting_primary);
309
310 // copy and swap first and last element in acting_osds
311 vector<int> new_acting_osds(acting_osds);
312 int first = new_acting_osds[0];
313 new_acting_osds[0] = *new_acting_osds.rbegin();
314 *new_acting_osds.rbegin() = first;
315
316 // apply pg_temp to osdmap
317 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
318 pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>(
319 new_acting_osds.begin(), new_acting_osds.end());
320 osdmap.apply_incremental(pgtemp_map);
321
322 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
323 &acting_osds, &acting_primary);
324 EXPECT_EQ(new_acting_osds, acting_osds);
325}
326
327TEST_F(OSDMapTest, PrimaryTempRespected) {
328 set_up_map();
329
224ce89b 330 pg_t rawpg(0, my_rep_pool, -1);
7c673cae
FG
331 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
332 vector<int> up_osds;
333 vector<int> acting_osds;
334 int up_primary, acting_primary;
335
336 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
337 &acting_osds, &acting_primary);
338
339 // make second OSD primary via incremental
340 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
341 pgtemp_map.new_primary_temp[pgid] = acting_osds[1];
342 osdmap.apply_incremental(pgtemp_map);
343
344 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
345 &acting_osds, &acting_primary);
346 EXPECT_EQ(acting_primary, acting_osds[1]);
347}
348
349TEST_F(OSDMapTest, CleanTemps) {
350 set_up_map();
351
352 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
353 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 2);
224ce89b 354 pg_t pga = osdmap.raw_pg_to_pg(pg_t(0, my_rep_pool));
7c673cae
FG
355 {
356 vector<int> up_osds, acting_osds;
357 int up_primary, acting_primary;
358 osdmap.pg_to_up_acting_osds(pga, &up_osds, &up_primary,
359 &acting_osds, &acting_primary);
360 pgtemp_map.new_pg_temp[pga] = mempool::osdmap::vector<int>(
361 up_osds.begin(), up_osds.end());
362 pgtemp_map.new_primary_temp[pga] = up_primary;
363 }
224ce89b 364 pg_t pgb = osdmap.raw_pg_to_pg(pg_t(1, my_rep_pool));
7c673cae
FG
365 {
366 vector<int> up_osds, acting_osds;
367 int up_primary, acting_primary;
368 osdmap.pg_to_up_acting_osds(pgb, &up_osds, &up_primary,
369 &acting_osds, &acting_primary);
370 pending_inc.new_pg_temp[pgb] = mempool::osdmap::vector<int>(
371 up_osds.begin(), up_osds.end());
372 pending_inc.new_primary_temp[pgb] = up_primary;
373 }
374
375 osdmap.apply_incremental(pgtemp_map);
376
377 OSDMap::clean_temps(g_ceph_context, osdmap, &pending_inc);
378
379 EXPECT_TRUE(pending_inc.new_pg_temp.count(pga) &&
380 pending_inc.new_pg_temp[pga].size() == 0);
381 EXPECT_EQ(-1, pending_inc.new_primary_temp[pga]);
382
383 EXPECT_TRUE(!pending_inc.new_pg_temp.count(pgb) &&
384 !pending_inc.new_primary_temp.count(pgb));
385}
386
387TEST_F(OSDMapTest, KeepsNecessaryTemps) {
388 set_up_map();
389
224ce89b 390 pg_t rawpg(0, my_rep_pool, -1);
7c673cae
FG
391 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
392 vector<int> up_osds, acting_osds;
393 int up_primary, acting_primary;
394
395 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
396 &acting_osds, &acting_primary);
397
398 // find unused OSD and stick it in there
399 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
400 // find an unused osd and put it in place of the first one
401 int i = 0;
402 for(; i != (int)get_num_osds(); ++i) {
403 bool in_use = false;
404 for (vector<int>::iterator osd_it = up_osds.begin();
405 osd_it != up_osds.end();
406 ++osd_it) {
407 if (i == *osd_it) {
408 in_use = true;
409 break;
410 }
411 }
412 if (!in_use) {
413 up_osds[1] = i;
414 break;
415 }
416 }
417 if (i == (int)get_num_osds())
418 FAIL() << "did not find unused OSD for temp mapping";
419
420 pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>(
421 up_osds.begin(), up_osds.end());
422 pgtemp_map.new_primary_temp[pgid] = up_osds[1];
423 osdmap.apply_incremental(pgtemp_map);
424
425 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
426
427 OSDMap::clean_temps(g_ceph_context, osdmap, &pending_inc);
428 EXPECT_FALSE(pending_inc.new_pg_temp.count(pgid));
429 EXPECT_FALSE(pending_inc.new_primary_temp.count(pgid));
430}
431
432TEST_F(OSDMapTest, PrimaryAffinity) {
433 set_up_map();
434
7c673cae
FG
435 int n = get_num_osds();
436 for (map<int64_t,pg_pool_t>::const_iterator p = osdmap.get_pools().begin();
437 p != osdmap.get_pools().end();
438 ++p) {
439 int pool = p->first;
224ce89b
WB
440 int expect_primary = 10000 / n;
441 cout << "pool " << pool << " size " << (int)p->second.size
442 << " expect_primary " << expect_primary << std::endl;
7c673cae
FG
443 {
444 vector<int> any(n, 0);
445 vector<int> first(n, 0);
446 vector<int> primary(n, 0);
224ce89b 447 test_mappings(pool, 10000, &any, &first, &primary);
7c673cae 448 for (int i=0; i<n; ++i) {
7c673cae
FG
449 ASSERT_LT(0, any[i]);
450 ASSERT_LT(0, first[i]);
451 ASSERT_LT(0, primary[i]);
452 }
453 }
454
455 osdmap.set_primary_affinity(0, 0);
456 osdmap.set_primary_affinity(1, 0);
457 {
458 vector<int> any(n, 0);
459 vector<int> first(n, 0);
460 vector<int> primary(n, 0);
461 test_mappings(pool, 10000, &any, &first, &primary);
462 for (int i=0; i<n; ++i) {
7c673cae
FG
463 ASSERT_LT(0, any[i]);
464 if (i >= 2) {
465 ASSERT_LT(0, first[i]);
466 ASSERT_LT(0, primary[i]);
467 } else {
468 if (p->second.is_replicated()) {
469 ASSERT_EQ(0, first[i]);
470 }
471 ASSERT_EQ(0, primary[i]);
472 }
473 }
474 }
475
476 osdmap.set_primary_affinity(0, 0x8000);
477 osdmap.set_primary_affinity(1, 0);
478 {
479 vector<int> any(n, 0);
480 vector<int> first(n, 0);
481 vector<int> primary(n, 0);
482 test_mappings(pool, 10000, &any, &first, &primary);
224ce89b
WB
483 int expect = (10000 / (n-2)) / 2; // half weight
484 cout << "expect " << expect << std::endl;
7c673cae 485 for (int i=0; i<n; ++i) {
7c673cae
FG
486 ASSERT_LT(0, any[i]);
487 if (i >= 2) {
488 ASSERT_LT(0, first[i]);
489 ASSERT_LT(0, primary[i]);
490 } else if (i == 1) {
491 if (p->second.is_replicated()) {
492 ASSERT_EQ(0, first[i]);
493 }
494 ASSERT_EQ(0, primary[i]);
495 } else {
224ce89b
WB
496 ASSERT_LT(expect *2/3, primary[0]);
497 ASSERT_GT(expect *4/3, primary[0]);
7c673cae
FG
498 }
499 }
500 }
501
502 osdmap.set_primary_affinity(0, 0x10000);
503 osdmap.set_primary_affinity(1, 0x10000);
504 }
505}
31f18b77 506
35e4c445
FG
507TEST_F(OSDMapTest, parse_osd_id_list) {
508 set_up_map();
509 set<int> out;
510 set<int> all;
511 osdmap.get_all_osds(all);
512
513 ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0"}, &out, &cout));
514 ASSERT_EQ(1, out.size());
515 ASSERT_EQ(0, *out.begin());
516
517 ASSERT_EQ(0, osdmap.parse_osd_id_list({"1"}, &out, &cout));
518 ASSERT_EQ(1, out.size());
519 ASSERT_EQ(1, *out.begin());
520
521 ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","osd.1"}, &out, &cout));
522 ASSERT_EQ(2, out.size());
523 ASSERT_EQ(0, *out.begin());
524 ASSERT_EQ(1, *out.rbegin());
525
526 ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","1"}, &out, &cout));
527 ASSERT_EQ(2, out.size());
528 ASSERT_EQ(0, *out.begin());
529 ASSERT_EQ(1, *out.rbegin());
530
531 ASSERT_EQ(0, osdmap.parse_osd_id_list({"*"}, &out, &cout));
532 ASSERT_EQ(all.size(), out.size());
533 ASSERT_EQ(all, out);
534
535 ASSERT_EQ(0, osdmap.parse_osd_id_list({"all"}, &out, &cout));
536 ASSERT_EQ(all, out);
537
538 ASSERT_EQ(0, osdmap.parse_osd_id_list({"any"}, &out, &cout));
539 ASSERT_EQ(all, out);
540
541 ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"foo"}, &out, &cout));
542 ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"-12"}, &out, &cout));
543}
544
94b18763
FG
545TEST_F(OSDMapTest, CleanPGUpmaps) {
546 set_up_map();
547
548 // build a crush rule of type host
549 const int expected_host_num = 3;
550 int osd_per_host = get_num_osds() / expected_host_num;
551 ASSERT_GE(2, osd_per_host);
552 int index = 0;
553 for (int i = 0; i < (int)get_num_osds(); i++) {
554 if (i && i % osd_per_host == 0) {
555 ++index;
556 }
557 stringstream osd_name;
558 stringstream host_name;
559 vector<string> move_to;
560 osd_name << "osd." << i;
561 host_name << "host-" << index;
562 move_to.push_back("root=default");
563 string host_loc = "host=" + host_name.str();
564 move_to.push_back(host_loc);
a8e16298 565 int r = crush_move(osdmap, osd_name.str(), move_to);
94b18763
FG
566 ASSERT_EQ(0, r);
567 }
568 const string upmap_rule = "upmap";
569 int upmap_rule_no = crush_rule_create_replicated(
570 upmap_rule, "default", "host");
571 ASSERT_LT(0, upmap_rule_no);
572
573 // create a replicated pool which references the above rule
574 OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
575 new_pool_inc.new_pool_max = osdmap.get_pool_max();
576 new_pool_inc.fsid = osdmap.get_fsid();
577 pg_pool_t empty;
578 uint64_t upmap_pool_id = ++new_pool_inc.new_pool_max;
579 pg_pool_t *p = new_pool_inc.get_new_pool(upmap_pool_id, &empty);
580 p->size = 2;
581 p->set_pg_num(64);
582 p->set_pgp_num(64);
583 p->type = pg_pool_t::TYPE_REPLICATED;
584 p->crush_rule = upmap_rule_no;
585 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
586 new_pool_inc.new_pool_names[upmap_pool_id] = "upmap_pool";
587 osdmap.apply_incremental(new_pool_inc);
588
589 pg_t rawpg(0, upmap_pool_id);
590 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
591 vector<int> up;
592 int up_primary;
593 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
594 ASSERT_LT(1U, up.size());
595 {
596 // validate we won't have two OSDs from a same host
597 int parent_0 = osdmap.crush->get_parent_of_type(up[0],
598 osdmap.crush->get_type_id("host"));
599 int parent_1 = osdmap.crush->get_parent_of_type(up[1],
600 osdmap.crush->get_type_id("host"));
601 ASSERT_TRUE(parent_0 != parent_1);
602 }
603
f64942e4
AA
604 {
605 // cancel stale upmaps
606 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
607 int from = -1;
608 for (int i = 0; i < (int)get_num_osds(); i++) {
609 if (std::find(up.begin(), up.end(), i) == up.end()) {
610 from = i;
611 break;
612 }
613 }
614 ASSERT_TRUE(from >= 0);
615 int to = -1;
616 for (int i = 0; i < (int)get_num_osds(); i++) {
617 if (std::find(up.begin(), up.end(), i) == up.end() && i != from) {
618 to = i;
619 break;
620 }
621 }
622 ASSERT_TRUE(to >= 0);
623 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
624 new_pg_upmap_items.push_back(make_pair(from, to));
625 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
626 pending_inc.new_pg_upmap_items[pgid] =
627 mempool::osdmap::vector<pair<int32_t,int32_t>>(
628 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
629 OSDMap nextmap;
630 nextmap.deepish_copy_from(osdmap);
631 nextmap.apply_incremental(pending_inc);
632 ASSERT_TRUE(nextmap.have_pg_upmaps(pgid));
633 OSDMap::Incremental new_pending_inc(nextmap.get_epoch() + 1);
634 nextmap.clean_pg_upmaps(g_ceph_context, &new_pending_inc);
635 nextmap.apply_incremental(new_pending_inc);
636 ASSERT_TRUE(!nextmap.have_pg_upmaps(pgid));
637 }
638
639 {
640 // https://tracker.ceph.com/issues/37493
641 pg_t ec_pg(0, my_ec_pool);
642 pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg);
643 OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map..
644 int from = -1;
645 int to = -1;
646 {
647 // insert a valid pg_upmap_item
648 vector<int> ec_up;
649 int ec_up_primary;
650 osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary);
651 ASSERT_TRUE(!ec_up.empty());
652 from = *(ec_up.begin());
653 ASSERT_TRUE(from >= 0);
654 for (int i = 0; i < (int)get_num_osds(); i++) {
655 if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) {
656 to = i;
657 break;
658 }
659 }
660 ASSERT_TRUE(to >= 0);
661 ASSERT_TRUE(from != to);
662 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
663 new_pg_upmap_items.push_back(make_pair(from, to));
664 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
665 pending_inc.new_pg_upmap_items[ec_pgid] =
666 mempool::osdmap::vector<pair<int32_t,int32_t>>(
667 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
668 tmpmap.deepish_copy_from(osdmap);
669 tmpmap.apply_incremental(pending_inc);
670 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
671 }
672 {
673 // mark one of the target OSDs of the above pg_upmap_item as down
674 OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
675 pending_inc.new_state[to] = CEPH_OSD_UP;
676 tmpmap.apply_incremental(pending_inc);
677 ASSERT_TRUE(!tmpmap.is_up(to));
678 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
679 }
680 {
681 // confirm *maybe_remove_pg_upmaps* won't do anything bad
682 OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
683 tmpmap.maybe_remove_pg_upmaps(g_ceph_context, tmpmap, &pending_inc);
684 tmpmap.apply_incremental(pending_inc);
685 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
686 }
687 }
688
689 {
690 // http://tracker.ceph.com/issues/37501
691 pg_t ec_pg(0, my_ec_pool);
692 pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg);
693 OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map..
694 int from = -1;
695 int to = -1;
696 {
697 // insert a valid pg_upmap_item
698 vector<int> ec_up;
699 int ec_up_primary;
700 osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary);
701 ASSERT_TRUE(!ec_up.empty());
702 from = *(ec_up.begin());
703 ASSERT_TRUE(from >= 0);
704 for (int i = 0; i < (int)get_num_osds(); i++) {
705 if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) {
706 to = i;
707 break;
708 }
709 }
710 ASSERT_TRUE(to >= 0);
711 ASSERT_TRUE(from != to);
712 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
713 new_pg_upmap_items.push_back(make_pair(from, to));
714 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
715 pending_inc.new_pg_upmap_items[ec_pgid] =
716 mempool::osdmap::vector<pair<int32_t,int32_t>>(
717 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
718 tmpmap.deepish_copy_from(osdmap);
719 tmpmap.apply_incremental(pending_inc);
720 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
721 }
722 {
723 // mark one of the target OSDs of the above pg_upmap_item as out
724 OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
725 pending_inc.new_weight[to] = CEPH_OSD_OUT;
726 tmpmap.apply_incremental(pending_inc);
727 ASSERT_TRUE(tmpmap.is_out(to));
728 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
729 }
730 {
731 // *maybe_remove_pg_upmaps* should be able to remove the above *bad* mapping
732 OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
733 OSDMap nextmap;
734 nextmap.deepish_copy_from(tmpmap);
735 nextmap.maybe_remove_pg_upmaps(g_ceph_context, nextmap, &pending_inc);
736 tmpmap.apply_incremental(pending_inc);
737 ASSERT_TRUE(!tmpmap.have_pg_upmaps(ec_pgid));
738 }
739 }
740
a8e16298
TL
741 {
742 // http://tracker.ceph.com/issues/37968
743
744 // build a temporary crush topology of 2 hosts, 3 osds per host
745 OSDMap tmp; // use a tmpmap here, so we do not dirty origin map..
746 tmp.deepish_copy_from(osdmap);
747 const int expected_host_num = 2;
748 int osd_per_host = get_num_osds() / expected_host_num;
749 ASSERT_GE(osd_per_host, 3);
750 int index = 0;
751 for (int i = 0; i < (int)get_num_osds(); i++) {
752 if (i && i % osd_per_host == 0) {
753 ++index;
754 }
755 stringstream osd_name;
756 stringstream host_name;
757 vector<string> move_to;
758 osd_name << "osd." << i;
759 host_name << "host-" << index;
760 move_to.push_back("root=default");
761 string host_loc = "host=" + host_name.str();
762 move_to.push_back(host_loc);
763 auto r = crush_move(tmp, osd_name.str(), move_to);
764 ASSERT_EQ(0, r);
765 }
766
767 // build crush rule
768 CrushWrapper crush;
769 get_crush(tmp, crush);
770 string rule_name = "rule_37968";
771 int rule_type = pg_pool_t::TYPE_ERASURE;
772 ASSERT_TRUE(!crush.rule_exists(rule_name));
773 int rno;
774 for (rno = 0; rno < crush.get_max_rules(); rno++) {
775 if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
776 break;
777 }
778 string root_name = "default";
779 int root = crush.get_item_id(root_name);
780 int min_size = 3;
781 int max_size = 4;
782 int steps = 6;
783 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
784 int step = 0;
785 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
786 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
787 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
788 crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_INDEP, 2, 1 /* host*/);
789 crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_INDEP, 2, 0 /* osd */);
790 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
791 ASSERT_TRUE(step == steps);
792 auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
793 ASSERT_TRUE(r >= 0);
794 crush.set_rule_name(rno, rule_name);
795 {
796 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
797 pending_inc.crush.clear();
798 crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
799 tmp.apply_incremental(pending_inc);
800 }
801
802 // create a erasuce-coded pool referencing the above rule
803 int64_t pool_37968;
804 {
805 OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1);
806 new_pool_inc.new_pool_max = tmp.get_pool_max();
807 new_pool_inc.fsid = tmp.get_fsid();
808 pg_pool_t empty;
809 pool_37968 = ++new_pool_inc.new_pool_max;
810 pg_pool_t *p = new_pool_inc.get_new_pool(pool_37968, &empty);
811 p->size = 4;
812 p->set_pg_num(8);
813 p->set_pgp_num(8);
814 p->type = pg_pool_t::TYPE_ERASURE;
815 p->crush_rule = rno;
816 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
817 new_pool_inc.new_pool_names[pool_37968] = "pool_37968";
818 tmp.apply_incremental(new_pool_inc);
819 }
820
821 pg_t ec_pg(0, pool_37968);
822 pg_t ec_pgid = tmp.raw_pg_to_pg(ec_pg);
823 int from = -1;
824 int to = -1;
825 {
826 // insert a valid pg_upmap_item
827 vector<int> ec_up;
828 int ec_up_primary;
829 tmp.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary);
830 ASSERT_TRUE(ec_up.size() == 4);
831 from = *(ec_up.begin());
832 ASSERT_TRUE(from >= 0);
833 auto parent = tmp.crush->get_parent_of_type(from, 1 /* host */, rno);
834 ASSERT_TRUE(parent < 0);
835 // pick an osd of the same parent with *from*
836 for (int i = 0; i < (int)get_num_osds(); i++) {
837 if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) {
838 auto p = tmp.crush->get_parent_of_type(i, 1 /* host */, rno);
839 if (p == parent) {
840 to = i;
841 break;
842 }
843 }
844 }
845 ASSERT_TRUE(to >= 0);
846 ASSERT_TRUE(from != to);
847 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
848 new_pg_upmap_items.push_back(make_pair(from, to));
849 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
850 pending_inc.new_pg_upmap_items[ec_pgid] =
851 mempool::osdmap::vector<pair<int32_t,int32_t>>(
852 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
853 tmp.apply_incremental(pending_inc);
854 ASSERT_TRUE(tmp.have_pg_upmaps(ec_pgid));
855 }
856 {
857 // *maybe_remove_pg_upmaps* should not remove the above upmap_item
858 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
859 OSDMap nextmap;
860 nextmap.deepish_copy_from(tmp);
861 nextmap.maybe_remove_pg_upmaps(g_ceph_context, nextmap, &pending_inc);
862 tmp.apply_incremental(pending_inc);
863 ASSERT_TRUE(tmp.have_pg_upmaps(ec_pgid));
864 }
865 }
866
94b18763
FG
867 {
868 // TEST pg_upmap
869 {
870 // STEP-1: enumerate all children of up[0]'s parent,
871 // replace up[1] with one of them (other than up[0])
872 int parent = osdmap.crush->get_parent_of_type(up[0],
873 osdmap.crush->get_type_id("host"));
874 set<int> candidates;
875 osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent), &candidates);
876 ASSERT_LT(1U, candidates.size());
877 int replaced_by = -1;
878 for (auto c: candidates) {
879 if (c != up[0]) {
880 replaced_by = c;
881 break;
882 }
883 }
91327a77
AA
884 {
885 // Check we can handle a negative pg_upmap value
886 vector<int32_t> new_pg_upmap;
887 new_pg_upmap.push_back(up[0]);
888 new_pg_upmap.push_back(-823648512);
889 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
890 pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>(
891 new_pg_upmap.begin(), new_pg_upmap.end());
892 osdmap.apply_incremental(pending_inc);
893 vector<int> new_up;
894 int new_up_primary;
895 // crucial call - _apply_upmap should ignore the negative value
896 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
897 }
94b18763
FG
898 ASSERT_NE(-1, replaced_by);
899 // generate a new pg_upmap item and apply
900 vector<int32_t> new_pg_upmap;
901 new_pg_upmap.push_back(up[0]);
902 new_pg_upmap.push_back(replaced_by); // up[1] -> replaced_by
903 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
904 pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>(
905 new_pg_upmap.begin(), new_pg_upmap.end());
906 osdmap.apply_incremental(pending_inc);
907 {
908 // validate pg_upmap is there
909 vector<int> new_up;
910 int new_up_primary;
911 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
912 ASSERT_TRUE(up.size() == new_up.size());
913 ASSERT_TRUE(new_up[0] == new_pg_upmap[0]);
914 ASSERT_TRUE(new_up[1] == new_pg_upmap[1]);
915 // and we shall have two OSDs from a same host now..
916 int parent_0 = osdmap.crush->get_parent_of_type(new_up[0],
917 osdmap.crush->get_type_id("host"));
918 int parent_1 = osdmap.crush->get_parent_of_type(new_up[1],
919 osdmap.crush->get_type_id("host"));
920 ASSERT_TRUE(parent_0 == parent_1);
921 }
922 }
923 {
924 // STEP-2: apply cure
925 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
926 osdmap.maybe_remove_pg_upmaps(g_ceph_context, osdmap, &pending_inc);
927 osdmap.apply_incremental(pending_inc);
928 {
929 // validate pg_upmap is gone (reverted)
930 vector<int> new_up;
931 int new_up_primary;
932 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
933 ASSERT_TRUE(new_up == up);
934 ASSERT_TRUE(new_up_primary = up_primary);
935 }
936 }
937 }
938
939 {
940 // TEST pg_upmap_items
941 // enumerate all used hosts first
942 set<int> parents;
943 for (auto u: up) {
944 int parent = osdmap.crush->get_parent_of_type(u,
945 osdmap.crush->get_type_id("host"));
946 ASSERT_GT(0, parent);
947 parents.insert(parent);
948 }
949 int candidate_parent = 0;
950 set<int> candidate_children;
951 vector<int> up_after_out;
952 {
953 // STEP-1: try mark out up[1] and all other OSDs from the same host
954 int parent = osdmap.crush->get_parent_of_type(up[1],
955 osdmap.crush->get_type_id("host"));
956 set<int> children;
957 osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent),
958 &children);
959 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
960 for (auto c: children) {
961 pending_inc.new_weight[c] = CEPH_OSD_OUT;
962 }
963 OSDMap tmpmap;
964 tmpmap.deepish_copy_from(osdmap);
965 tmpmap.apply_incremental(pending_inc);
966 vector<int> new_up;
967 int new_up_primary;
968 tmpmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
969 // verify that we'll have OSDs from a different host..
970 int will_choose = -1;
971 for (auto o: new_up) {
972 int parent = tmpmap.crush->get_parent_of_type(o,
973 osdmap.crush->get_type_id("host"));
974 if (!parents.count(parent)) {
975 will_choose = o;
976 candidate_parent = parent; // record
977 break;
978 }
979 }
980 ASSERT_LT(-1, will_choose); // it is an OSD!
981 ASSERT_TRUE(candidate_parent != 0);
982 osdmap.crush->get_leaves(osdmap.crush->get_item_name(candidate_parent),
983 &candidate_children);
984 ASSERT_TRUE(candidate_children.count(will_choose));
985 candidate_children.erase(will_choose);
986 ASSERT_TRUE(!candidate_children.empty());
987 up_after_out = new_up; // needed for verification..
988 }
91327a77
AA
989 {
990 // Make sure we can handle a negative pg_upmap_item
991 int victim = up[0];
992 int replaced_by = -823648512;
993 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
994 new_pg_upmap_items.push_back(make_pair(victim, replaced_by));
995 // apply
996 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
997 pending_inc.new_pg_upmap_items[pgid] =
998 mempool::osdmap::vector<pair<int32_t,int32_t>>(
999 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
1000 osdmap.apply_incremental(pending_inc);
1001 vector<int> new_up;
1002 int new_up_primary;
1003 // crucial call - _apply_upmap should ignore the negative value
1004 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
1005 }
94b18763
FG
1006 {
1007 // STEP-2: generating a new pg_upmap_items entry by
1008 // replacing up[0] with one coming from candidate_children
1009 int victim = up[0];
1010 int replaced_by = *candidate_children.begin();
1011 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
1012 new_pg_upmap_items.push_back(make_pair(victim, replaced_by));
1013 // apply
1014 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1015 pending_inc.new_pg_upmap_items[pgid] =
1016 mempool::osdmap::vector<pair<int32_t,int32_t>>(
1017 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
1018 osdmap.apply_incremental(pending_inc);
1019 {
1020 // validate pg_upmap_items is there
1021 vector<int> new_up;
1022 int new_up_primary;
1023 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
1024 ASSERT_TRUE(up.size() == new_up.size());
1025 ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), replaced_by) !=
1026 new_up.end());
1027 // and up[1] too
1028 ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), up[1]) !=
1029 new_up.end());
1030 }
1031 }
1032 {
1033 // STEP-3: mark out up[1] and all other OSDs from the same host
1034 int parent = osdmap.crush->get_parent_of_type(up[1],
1035 osdmap.crush->get_type_id("host"));
1036 set<int> children;
1037 osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent),
1038 &children);
1039 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1040 for (auto c: children) {
1041 pending_inc.new_weight[c] = CEPH_OSD_OUT;
1042 }
1043 osdmap.apply_incremental(pending_inc);
1044 {
1045 // validate we have two OSDs from the same host now..
1046 vector<int> new_up;
1047 int new_up_primary;
1048 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
1049 ASSERT_TRUE(up.size() == new_up.size());
1050 int parent_0 = osdmap.crush->get_parent_of_type(new_up[0],
1051 osdmap.crush->get_type_id("host"));
1052 int parent_1 = osdmap.crush->get_parent_of_type(new_up[1],
1053 osdmap.crush->get_type_id("host"));
1054 ASSERT_TRUE(parent_0 == parent_1);
1055 }
1056 }
1057 {
1058 // STEP-4: apply cure
1059 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1060 osdmap.maybe_remove_pg_upmaps(g_ceph_context, osdmap, &pending_inc);
1061 osdmap.apply_incremental(pending_inc);
1062 {
1063 // validate pg_upmap_items is gone (reverted)
1064 vector<int> new_up;
1065 int new_up_primary;
1066 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
1067 ASSERT_TRUE(new_up == up_after_out);
1068 }
1069 }
1070 }
1071}
1072
a8e16298
TL
1073TEST_F(OSDMapTest, BUG_38897) {
1074 // http://tracker.ceph.com/issues/38897
1075 // build a fresh map with 12 OSDs, without any default pools
1076 set_up_map(12, true);
1077 const string pool_1("pool1");
1078 const string pool_2("pool2");
1079 int64_t pool_1_id = -1;
1080
1081 {
1082 // build customized crush rule for "pool1"
1083 string host_name = "host_for_pool_1";
1084 // build a customized host to capture osd.1~5
1085 for (int i = 1; i < 5; i++) {
1086 stringstream osd_name;
1087 vector<string> move_to;
1088 osd_name << "osd." << i;
1089 move_to.push_back("root=default");
1090 string host_loc = "host=" + host_name;
1091 move_to.push_back(host_loc);
1092 auto r = crush_move(osdmap, osd_name.str(), move_to);
1093 ASSERT_EQ(0, r);
1094 }
1095 CrushWrapper crush;
1096 get_crush(osdmap, crush);
1097 auto host_id = crush.get_item_id(host_name);
1098 ASSERT_TRUE(host_id < 0);
1099 string rule_name = "rule_for_pool1";
1100 int rule_type = pg_pool_t::TYPE_REPLICATED;
1101 ASSERT_TRUE(!crush.rule_exists(rule_name));
1102 int rno;
1103 for (rno = 0; rno < crush.get_max_rules(); rno++) {
1104 if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
1105 break;
1106 }
1107 int min_size = 3;
1108 int max_size = 3;
1109 int steps = 7;
1110 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
1111 int step = 0;
1112 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
1113 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
1114 // always choose osd.0
1115 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0);
1116 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1117 // then pick any other random osds
1118 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, host_id, 0);
1119 crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 0);
1120 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1121 ASSERT_TRUE(step == steps);
1122 auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
1123 ASSERT_TRUE(r >= 0);
1124 crush.set_rule_name(rno, rule_name);
1125 {
1126 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1127 pending_inc.crush.clear();
1128 crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
1129 osdmap.apply_incremental(pending_inc);
1130 }
1131
1132 // create "pool1"
1133 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1134 pending_inc.new_pool_max = osdmap.get_pool_max();
1135 auto pool_id = ++pending_inc.new_pool_max;
1136 pool_1_id = pool_id;
1137 pg_pool_t empty;
1138 auto p = pending_inc.get_new_pool(pool_id, &empty);
1139 p->size = 3;
1140 p->min_size = 1;
1141 p->set_pg_num(3);
1142 p->set_pgp_num(3);
1143 p->type = pg_pool_t::TYPE_REPLICATED;
1144 p->crush_rule = rno;
1145 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
1146 pending_inc.new_pool_names[pool_id] = pool_1;
1147 osdmap.apply_incremental(pending_inc);
1148 ASSERT_TRUE(osdmap.have_pg_pool(pool_id));
1149 ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_1);
1150 {
1151 for (unsigned i = 0; i < 3; i++) {
1152 // 1.x -> [1]
1153 pg_t rawpg(i, pool_id);
1154 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
1155 vector<int> up;
1156 int up_primary;
1157 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
1158 ASSERT_TRUE(up.size() == 3);
1159 ASSERT_TRUE(up[0] == 0);
1160
1161 // insert a new pg_upmap
1162 vector<int32_t> new_up;
1163 // and remap 1.x to osd.1 only
1164 // this way osd.0 is deemed to be *underfull*
1165 // and osd.1 is deemed to be *overfull*
1166 new_up.push_back(1);
1167 {
1168 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1169 pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>(
1170 new_up.begin(), new_up.end());
1171 osdmap.apply_incremental(pending_inc);
1172 }
1173 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
1174 ASSERT_TRUE(up.size() == 1);
1175 ASSERT_TRUE(up[0] == 1);
1176 }
1177 }
1178 }
1179
1180 {
1181 // build customized crush rule for "pool2"
1182 string host_name = "host_for_pool_2";
1183 // build a customized host to capture osd.6~11
1184 for (int i = 6; i < (int)get_num_osds(); i++) {
1185 stringstream osd_name;
1186 vector<string> move_to;
1187 osd_name << "osd." << i;
1188 move_to.push_back("root=default");
1189 string host_loc = "host=" + host_name;
1190 move_to.push_back(host_loc);
1191 auto r = crush_move(osdmap, osd_name.str(), move_to);
1192 ASSERT_EQ(0, r);
1193 }
1194 CrushWrapper crush;
1195 get_crush(osdmap, crush);
1196 auto host_id = crush.get_item_id(host_name);
1197 ASSERT_TRUE(host_id < 0);
1198 string rule_name = "rule_for_pool2";
1199 int rule_type = pg_pool_t::TYPE_REPLICATED;
1200 ASSERT_TRUE(!crush.rule_exists(rule_name));
1201 int rno;
1202 for (rno = 0; rno < crush.get_max_rules(); rno++) {
1203 if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
1204 break;
1205 }
1206 int min_size = 3;
1207 int max_size = 3;
1208 int steps = 7;
1209 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
1210 int step = 0;
1211 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
1212 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
1213 // always choose osd.0
1214 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0);
1215 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1216 // then pick any other random osds
1217 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, host_id, 0);
1218 crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 0);
1219 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1220 ASSERT_TRUE(step == steps);
1221 auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
1222 ASSERT_TRUE(r >= 0);
1223 crush.set_rule_name(rno, rule_name);
1224 {
1225 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1226 pending_inc.crush.clear();
1227 crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
1228 osdmap.apply_incremental(pending_inc);
1229 }
1230
1231 // create "pool2"
1232 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1233 pending_inc.new_pool_max = osdmap.get_pool_max();
1234 auto pool_id = ++pending_inc.new_pool_max;
1235 pg_pool_t empty;
1236 auto p = pending_inc.get_new_pool(pool_id, &empty);
1237 p->size = 3;
1238 // include a single PG
1239 p->set_pg_num(1);
1240 p->set_pgp_num(1);
1241 p->type = pg_pool_t::TYPE_REPLICATED;
1242 p->crush_rule = rno;
1243 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
1244 pending_inc.new_pool_names[pool_id] = pool_2;
1245 osdmap.apply_incremental(pending_inc);
1246 ASSERT_TRUE(osdmap.have_pg_pool(pool_id));
1247 ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_2);
1248 pg_t rawpg(0, pool_id);
1249 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
1250 EXPECT_TRUE(!osdmap.have_pg_upmaps(pgid));
1251 vector<int> up;
1252 int up_primary;
1253 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
1254 ASSERT_TRUE(up.size() == 3);
1255 ASSERT_TRUE(up[0] == 0);
1256
1257 {
1258 // build a pg_upmap_item that will
1259 // remap pg out from *underfull* osd.0
1260 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
1261 new_pg_upmap_items.push_back(make_pair(0, 10)); // osd.0 -> osd.10
1262 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1263 pending_inc.new_pg_upmap_items[pgid] =
1264 mempool::osdmap::vector<pair<int32_t,int32_t>>(
1265 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
1266 osdmap.apply_incremental(pending_inc);
1267 ASSERT_TRUE(osdmap.have_pg_upmaps(pgid));
1268 vector<int> up;
1269 int up_primary;
1270 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
1271 ASSERT_TRUE(up.size() == 3);
1272 ASSERT_TRUE(up[0] == 10);
1273 }
1274 }
1275
1276 // ready to go
1277 {
1278 // require perfect distribution!
1279 auto ret = g_ceph_context->_conf->set_val(
1280 "osd_calc_pg_upmaps_max_stddev", "0");
1281 ASSERT_EQ(0, ret);
1282 g_ceph_context->_conf->apply_changes(nullptr);
1283 set<int64_t> only_pools;
1284 ASSERT_TRUE(pool_1_id >= 0);
1285 only_pools.insert(pool_1_id);
1286 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1287 osdmap.calc_pg_upmaps(g_ceph_context,
1288 0, // so we can force optimizing
1289 100,
1290 only_pools,
1291 &pending_inc);
1292 osdmap.apply_incremental(pending_inc);
1293 }
1294}
1295
31f18b77
FG
1296TEST(PGTempMap, basic)
1297{
1298 PGTempMap m;
1299 pg_t a(1,1);
1300 for (auto i=3; i<1000; ++i) {
1301 pg_t x(i, 1);
1302 m.set(x, {static_cast<int>(i)});
1303 }
1304 pg_t b(2,1);
1305 m.set(a, {1, 2});
1306 ASSERT_NE(m.find(a), m.end());
1307 ASSERT_EQ(m.find(a), m.begin());
1308 ASSERT_EQ(m.find(b), m.end());
1309 ASSERT_EQ(998u, m.size());
1310}
35e4c445 1311