]> git.proxmox.com Git - ceph.git/blame - ceph/src/test/osd/TestOSDMap.cc
import ceph 16.2.7
[ceph.git] / ceph / src / test / osd / TestOSDMap.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2#include "gtest/gtest.h"
3#include "osd/OSDMap.h"
4#include "osd/OSDMapMapping.h"
494da23a 5#include "mon/OSDMonitor.h"
f67539c2 6#include "mon/PGMap.h"
7c673cae
FG
7
8#include "global/global_context.h"
9#include "global/global_init.h"
10#include "common/common_init.h"
31f18b77 11#include "common/ceph_argparse.h"
f67539c2 12#include "common/ceph_json.h"
7c673cae
FG
13
14#include <iostream>
15
16using namespace std;
17
18int main(int argc, char **argv) {
11fdf7f2
TL
19 map<string,string> defaults = {
20 // make sure we have 3 copies, or some tests won't work
21 { "osd_pool_default_size", "3" },
22 // our map is flat, so just try and split across OSDs, not hosts or whatever
23 { "osd_crush_chooseleaf_type", "0" },
24 };
7c673cae 25 std::vector<const char*> args(argv, argv+argc);
11fdf7f2 26 auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT,
7c673cae
FG
27 CODE_ENVIRONMENT_UTILITY,
28 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
29 common_init_finish(g_ceph_context);
7c673cae
FG
30 ::testing::InitGoogleTest(&argc, argv);
31 return RUN_ALL_TESTS();
32}
33
a4b75251
TL
34class OSDMapTest : public testing::Test,
35 public ::testing::WithParamInterface<std::pair<int, int>> {
a8e16298 36 int num_osds = 6;
7c673cae
FG
37public:
38 OSDMap osdmap;
39 OSDMapMapping mapping;
224ce89b
WB
40 const uint64_t my_ec_pool = 1;
41 const uint64_t my_rep_pool = 2;
42
7c673cae
FG
43
44 OSDMapTest() {}
45
a8e16298
TL
46 void set_up_map(int new_num_osds = 6, bool no_default_pools = false) {
47 num_osds = new_num_osds;
7c673cae 48 uuid_d fsid;
224ce89b 49 osdmap.build_simple(g_ceph_context, 0, fsid, num_osds);
7c673cae
FG
50 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
51 pending_inc.fsid = osdmap.get_fsid();
11fdf7f2
TL
52 entity_addrvec_t sample_addrs;
53 sample_addrs.v.push_back(entity_addr_t());
7c673cae
FG
54 uuid_d sample_uuid;
55 for (int i = 0; i < num_osds; ++i) {
56 sample_uuid.generate_random();
11fdf7f2 57 sample_addrs.v[0].nonce = i;
7c673cae 58 pending_inc.new_state[i] = CEPH_OSD_EXISTS | CEPH_OSD_NEW;
11fdf7f2
TL
59 pending_inc.new_up_client[i] = sample_addrs;
60 pending_inc.new_up_cluster[i] = sample_addrs;
61 pending_inc.new_hb_back_up[i] = sample_addrs;
62 pending_inc.new_hb_front_up[i] = sample_addrs;
7c673cae
FG
63 pending_inc.new_weight[i] = CEPH_OSD_IN;
64 pending_inc.new_uuid[i] = sample_uuid;
65 }
66 osdmap.apply_incremental(pending_inc);
a8e16298
TL
67 if (no_default_pools) // do not create any default pool(s)
68 return;
7c673cae
FG
69
70 // Create an EC ruleset and a pool using it
31f18b77 71 int r = osdmap.crush->add_simple_rule(
224ce89b 72 "erasure", "default", "osd", "",
31f18b77
FG
73 "indep", pg_pool_t::TYPE_ERASURE,
74 &cerr);
7c673cae
FG
75
76 OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
77 new_pool_inc.new_pool_max = osdmap.get_pool_max();
78 new_pool_inc.fsid = osdmap.get_fsid();
79 pg_pool_t empty;
224ce89b 80 // make an ec pool
7c673cae 81 uint64_t pool_id = ++new_pool_inc.new_pool_max;
11fdf7f2 82 ceph_assert(pool_id == my_ec_pool);
7c673cae
FG
83 pg_pool_t *p = new_pool_inc.get_new_pool(pool_id, &empty);
84 p->size = 3;
85 p->set_pg_num(64);
86 p->set_pgp_num(64);
87 p->type = pg_pool_t::TYPE_ERASURE;
31f18b77 88 p->crush_rule = r;
7c673cae 89 new_pool_inc.new_pool_names[pool_id] = "ec";
224ce89b
WB
90 // and a replicated pool
91 pool_id = ++new_pool_inc.new_pool_max;
11fdf7f2 92 ceph_assert(pool_id == my_rep_pool);
224ce89b
WB
93 p = new_pool_inc.get_new_pool(pool_id, &empty);
94 p->size = 3;
95 p->set_pg_num(64);
96 p->set_pgp_num(64);
97 p->type = pg_pool_t::TYPE_REPLICATED;
98 p->crush_rule = 0;
99 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
100 new_pool_inc.new_pool_names[pool_id] = "reppool";
7c673cae
FG
101 osdmap.apply_incremental(new_pool_inc);
102 }
103 unsigned int get_num_osds() { return num_osds; }
a8e16298 104 void get_crush(const OSDMap& tmap, CrushWrapper& newcrush) {
94b18763 105 bufferlist bl;
a8e16298 106 tmap.crush->encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT);
11fdf7f2 107 auto p = bl.cbegin();
94b18763
FG
108 newcrush.decode(p);
109 }
a8e16298 110 int crush_move(OSDMap& tmap, const string &name, const vector<string> &argvec) {
94b18763
FG
111 map<string,string> loc;
112 CrushWrapper::parse_loc_map(argvec, &loc);
113 CrushWrapper newcrush;
a8e16298 114 get_crush(tmap, newcrush);
94b18763
FG
115 if (!newcrush.name_exists(name)) {
116 return -ENOENT;
117 }
118 int id = newcrush.get_item_id(name);
119 int err;
120 if (!newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) {
121 if (id >= 0) {
122 err = newcrush.create_or_move_item(g_ceph_context, id, 0, name, loc);
123 } else {
124 err = newcrush.move_bucket(g_ceph_context, id, loc);
125 }
126 if (err >= 0) {
a8e16298 127 OSDMap::Incremental pending_inc(tmap.get_epoch() + 1);
94b18763
FG
128 pending_inc.crush.clear();
129 newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
a8e16298 130 tmap.apply_incremental(pending_inc);
94b18763
FG
131 err = 0;
132 }
133 } else {
134 // already there
135 err = 0;
136 }
137 return err;
138 }
139 int crush_rule_create_replicated(const string &name,
140 const string &root,
141 const string &type) {
142 if (osdmap.crush->rule_exists(name)) {
143 return osdmap.crush->get_rule_id(name);
144 }
145 CrushWrapper newcrush;
a8e16298 146 get_crush(osdmap, newcrush);
94b18763
FG
147 string device_class;
148 stringstream ss;
149 int ruleno = newcrush.add_simple_rule(
150 name, root, type, device_class,
151 "firstn", pg_pool_t::TYPE_REPLICATED, &ss);
152 if (ruleno >= 0) {
153 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
154 pending_inc.crush.clear();
155 newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
156 osdmap.apply_incremental(pending_inc);
157 }
158 return ruleno;
159 }
7c673cae
FG
160 void test_mappings(int pool,
161 int num,
162 vector<int> *any,
163 vector<int> *first,
164 vector<int> *primary) {
165 mapping.update(osdmap);
166 for (int i=0; i<num; ++i) {
167 vector<int> up, acting;
168 int up_primary, acting_primary;
169 pg_t pgid(i, pool);
170 osdmap.pg_to_up_acting_osds(pgid,
171 &up, &up_primary, &acting, &acting_primary);
172 for (unsigned j=0; j<acting.size(); ++j)
173 (*any)[acting[j]]++;
174 if (!acting.empty())
175 (*first)[acting[0]]++;
176 if (acting_primary >= 0)
177 (*primary)[acting_primary]++;
178
179 // compare to precalc mapping
180 vector<int> up2, acting2;
181 int up_primary2, acting_primary2;
182 pgid = osdmap.raw_pg_to_pg(pgid);
183 mapping.get(pgid, &up2, &up_primary2, &acting2, &acting_primary2);
184 ASSERT_EQ(up, up2);
185 ASSERT_EQ(up_primary, up_primary2);
186 ASSERT_EQ(acting, acting2);
187 ASSERT_EQ(acting_primary, acting_primary2);
188 }
224ce89b
WB
189 cout << "any: " << *any << std::endl;;
190 cout << "first: " << *first << std::endl;;
191 cout << "primary: " << *primary << std::endl;;
7c673cae 192 }
494da23a
TL
193 void clean_pg_upmaps(CephContext *cct,
194 const OSDMap& om,
195 OSDMap::Incremental& pending_inc) {
196 int cpu_num = 8;
197 int pgs_per_chunk = 256;
198 ThreadPool tp(cct, "BUG_40104::clean_upmap_tp", "clean_upmap_tp", cpu_num);
199 tp.start();
200 ParallelPGMapper mapper(cct, &tp);
201 vector<pg_t> pgs_to_check;
202 om.get_upmap_pgs(&pgs_to_check);
203 OSDMonitor::CleanUpmapJob job(cct, om, pending_inc);
204 mapper.queue(&job, pgs_per_chunk, pgs_to_check);
205 job.wait();
206 tp.stop();
207 }
7c673cae
FG
208};
209
210TEST_F(OSDMapTest, Create) {
211 set_up_map();
212 ASSERT_EQ(get_num_osds(), (unsigned)osdmap.get_max_osd());
213 ASSERT_EQ(get_num_osds(), osdmap.get_num_in_osds());
214}
215
216TEST_F(OSDMapTest, Features) {
217 // with EC pool
218 set_up_map();
219 uint64_t features = osdmap.get_features(CEPH_ENTITY_TYPE_OSD, NULL);
220 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
221 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
222 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3);
223 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2);
7c673cae
FG
224 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
225 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
226
227 // clients have a slightly different view
228 features = osdmap.get_features(CEPH_ENTITY_TYPE_CLIENT, NULL);
229 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
230 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
231 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3);
232 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2);
7c673cae
FG
233 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
234 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
235
236 // remove teh EC pool, but leave the rule. add primary affinity.
237 {
238 OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
239 new_pool_inc.old_pools.insert(osdmap.lookup_pg_pool_name("ec"));
240 new_pool_inc.new_primary_affinity[0] = 0x8000;
241 osdmap.apply_incremental(new_pool_inc);
242 }
243
244 features = osdmap.get_features(CEPH_ENTITY_TYPE_MON, NULL);
245 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
246 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
247 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); // shared bit with primary affinity
248 ASSERT_FALSE(features & CEPH_FEATURE_CRUSH_V2);
7c673cae
FG
249 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
250 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
251
252 // FIXME: test tiering feature bits
253}
254
255TEST_F(OSDMapTest, MapPG) {
256 set_up_map();
257
224ce89b 258 std::cerr << " osdmap.pool_max==" << osdmap.get_pool_max() << std::endl;
11fdf7f2 259 pg_t rawpg(0, my_rep_pool);
7c673cae
FG
260 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
261 vector<int> up_osds, acting_osds;
262 int up_primary, acting_primary;
263
264 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
265 &acting_osds, &acting_primary);
266
267 vector<int> old_up_osds, old_acting_osds;
268 osdmap.pg_to_up_acting_osds(pgid, old_up_osds, old_acting_osds);
269 ASSERT_EQ(old_up_osds, up_osds);
270 ASSERT_EQ(old_acting_osds, acting_osds);
271
224ce89b 272 ASSERT_EQ(osdmap.get_pg_pool(my_rep_pool)->get_size(), up_osds.size());
7c673cae
FG
273}
274
275TEST_F(OSDMapTest, MapFunctionsMatch) {
276 // TODO: make sure pg_to_up_acting_osds and pg_to_acting_osds match
277 set_up_map();
11fdf7f2 278 pg_t rawpg(0, my_rep_pool);
7c673cae
FG
279 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
280 vector<int> up_osds, acting_osds;
281 int up_primary, acting_primary;
282
283 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
284 &acting_osds, &acting_primary);
285
286 vector<int> up_osds_two, acting_osds_two;
287
288 osdmap.pg_to_up_acting_osds(pgid, up_osds_two, acting_osds_two);
289
290 ASSERT_EQ(up_osds, up_osds_two);
291 ASSERT_EQ(acting_osds, acting_osds_two);
292
293 int acting_primary_two;
294 osdmap.pg_to_acting_osds(pgid, &acting_osds_two, &acting_primary_two);
295 EXPECT_EQ(acting_osds, acting_osds_two);
296 EXPECT_EQ(acting_primary, acting_primary_two);
297 osdmap.pg_to_acting_osds(pgid, acting_osds_two);
298 EXPECT_EQ(acting_osds, acting_osds_two);
299}
300
301/** This test must be removed or modified appropriately when we allow
302 * other ways to specify a primary. */
303TEST_F(OSDMapTest, PrimaryIsFirst) {
304 set_up_map();
305
11fdf7f2 306 pg_t rawpg(0, my_rep_pool);
7c673cae
FG
307 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
308 vector<int> up_osds, acting_osds;
309 int up_primary, acting_primary;
310
311 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
312 &acting_osds, &acting_primary);
313 EXPECT_EQ(up_osds[0], up_primary);
314 EXPECT_EQ(acting_osds[0], acting_primary);
315}
316
317TEST_F(OSDMapTest, PGTempRespected) {
318 set_up_map();
319
11fdf7f2 320 pg_t rawpg(0, my_rep_pool);
7c673cae
FG
321 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
322 vector<int> up_osds, acting_osds;
323 int up_primary, acting_primary;
324
325 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
326 &acting_osds, &acting_primary);
327
328 // copy and swap first and last element in acting_osds
329 vector<int> new_acting_osds(acting_osds);
330 int first = new_acting_osds[0];
331 new_acting_osds[0] = *new_acting_osds.rbegin();
332 *new_acting_osds.rbegin() = first;
333
334 // apply pg_temp to osdmap
335 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
336 pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>(
337 new_acting_osds.begin(), new_acting_osds.end());
338 osdmap.apply_incremental(pgtemp_map);
339
340 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
341 &acting_osds, &acting_primary);
342 EXPECT_EQ(new_acting_osds, acting_osds);
343}
344
345TEST_F(OSDMapTest, PrimaryTempRespected) {
346 set_up_map();
347
11fdf7f2 348 pg_t rawpg(0, my_rep_pool);
7c673cae
FG
349 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
350 vector<int> up_osds;
351 vector<int> acting_osds;
352 int up_primary, acting_primary;
353
354 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
355 &acting_osds, &acting_primary);
356
357 // make second OSD primary via incremental
358 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
359 pgtemp_map.new_primary_temp[pgid] = acting_osds[1];
360 osdmap.apply_incremental(pgtemp_map);
361
362 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
363 &acting_osds, &acting_primary);
364 EXPECT_EQ(acting_primary, acting_osds[1]);
365}
366
367TEST_F(OSDMapTest, CleanTemps) {
368 set_up_map();
369
370 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
371 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 2);
224ce89b 372 pg_t pga = osdmap.raw_pg_to_pg(pg_t(0, my_rep_pool));
7c673cae
FG
373 {
374 vector<int> up_osds, acting_osds;
375 int up_primary, acting_primary;
376 osdmap.pg_to_up_acting_osds(pga, &up_osds, &up_primary,
377 &acting_osds, &acting_primary);
378 pgtemp_map.new_pg_temp[pga] = mempool::osdmap::vector<int>(
379 up_osds.begin(), up_osds.end());
380 pgtemp_map.new_primary_temp[pga] = up_primary;
381 }
224ce89b 382 pg_t pgb = osdmap.raw_pg_to_pg(pg_t(1, my_rep_pool));
7c673cae
FG
383 {
384 vector<int> up_osds, acting_osds;
385 int up_primary, acting_primary;
386 osdmap.pg_to_up_acting_osds(pgb, &up_osds, &up_primary,
387 &acting_osds, &acting_primary);
388 pending_inc.new_pg_temp[pgb] = mempool::osdmap::vector<int>(
389 up_osds.begin(), up_osds.end());
390 pending_inc.new_primary_temp[pgb] = up_primary;
391 }
392
393 osdmap.apply_incremental(pgtemp_map);
394
11fdf7f2
TL
395 OSDMap tmpmap;
396 tmpmap.deepish_copy_from(osdmap);
397 tmpmap.apply_incremental(pending_inc);
398 OSDMap::clean_temps(g_ceph_context, osdmap, tmpmap, &pending_inc);
7c673cae
FG
399
400 EXPECT_TRUE(pending_inc.new_pg_temp.count(pga) &&
401 pending_inc.new_pg_temp[pga].size() == 0);
402 EXPECT_EQ(-1, pending_inc.new_primary_temp[pga]);
403
404 EXPECT_TRUE(!pending_inc.new_pg_temp.count(pgb) &&
405 !pending_inc.new_primary_temp.count(pgb));
406}
407
408TEST_F(OSDMapTest, KeepsNecessaryTemps) {
409 set_up_map();
410
11fdf7f2 411 pg_t rawpg(0, my_rep_pool);
7c673cae
FG
412 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
413 vector<int> up_osds, acting_osds;
414 int up_primary, acting_primary;
415
416 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
417 &acting_osds, &acting_primary);
418
419 // find unused OSD and stick it in there
420 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
421 // find an unused osd and put it in place of the first one
422 int i = 0;
423 for(; i != (int)get_num_osds(); ++i) {
424 bool in_use = false;
425 for (vector<int>::iterator osd_it = up_osds.begin();
426 osd_it != up_osds.end();
427 ++osd_it) {
428 if (i == *osd_it) {
429 in_use = true;
430 break;
431 }
432 }
433 if (!in_use) {
434 up_osds[1] = i;
435 break;
436 }
437 }
438 if (i == (int)get_num_osds())
439 FAIL() << "did not find unused OSD for temp mapping";
440
441 pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>(
442 up_osds.begin(), up_osds.end());
443 pgtemp_map.new_primary_temp[pgid] = up_osds[1];
444 osdmap.apply_incremental(pgtemp_map);
445
446 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
447
11fdf7f2
TL
448 OSDMap tmpmap;
449 tmpmap.deepish_copy_from(osdmap);
450 tmpmap.apply_incremental(pending_inc);
451 OSDMap::clean_temps(g_ceph_context, osdmap, tmpmap, &pending_inc);
7c673cae
FG
452 EXPECT_FALSE(pending_inc.new_pg_temp.count(pgid));
453 EXPECT_FALSE(pending_inc.new_primary_temp.count(pgid));
454}
455
456TEST_F(OSDMapTest, PrimaryAffinity) {
457 set_up_map();
458
7c673cae
FG
459 int n = get_num_osds();
460 for (map<int64_t,pg_pool_t>::const_iterator p = osdmap.get_pools().begin();
461 p != osdmap.get_pools().end();
462 ++p) {
463 int pool = p->first;
224ce89b
WB
464 int expect_primary = 10000 / n;
465 cout << "pool " << pool << " size " << (int)p->second.size
466 << " expect_primary " << expect_primary << std::endl;
7c673cae
FG
467 {
468 vector<int> any(n, 0);
469 vector<int> first(n, 0);
470 vector<int> primary(n, 0);
224ce89b 471 test_mappings(pool, 10000, &any, &first, &primary);
7c673cae 472 for (int i=0; i<n; ++i) {
7c673cae
FG
473 ASSERT_LT(0, any[i]);
474 ASSERT_LT(0, first[i]);
475 ASSERT_LT(0, primary[i]);
476 }
477 }
478
479 osdmap.set_primary_affinity(0, 0);
480 osdmap.set_primary_affinity(1, 0);
481 {
482 vector<int> any(n, 0);
483 vector<int> first(n, 0);
484 vector<int> primary(n, 0);
485 test_mappings(pool, 10000, &any, &first, &primary);
486 for (int i=0; i<n; ++i) {
7c673cae
FG
487 ASSERT_LT(0, any[i]);
488 if (i >= 2) {
489 ASSERT_LT(0, first[i]);
490 ASSERT_LT(0, primary[i]);
491 } else {
492 if (p->second.is_replicated()) {
493 ASSERT_EQ(0, first[i]);
494 }
495 ASSERT_EQ(0, primary[i]);
496 }
497 }
498 }
499
500 osdmap.set_primary_affinity(0, 0x8000);
501 osdmap.set_primary_affinity(1, 0);
502 {
503 vector<int> any(n, 0);
504 vector<int> first(n, 0);
505 vector<int> primary(n, 0);
506 test_mappings(pool, 10000, &any, &first, &primary);
224ce89b
WB
507 int expect = (10000 / (n-2)) / 2; // half weight
508 cout << "expect " << expect << std::endl;
7c673cae 509 for (int i=0; i<n; ++i) {
7c673cae
FG
510 ASSERT_LT(0, any[i]);
511 if (i >= 2) {
512 ASSERT_LT(0, first[i]);
513 ASSERT_LT(0, primary[i]);
514 } else if (i == 1) {
515 if (p->second.is_replicated()) {
516 ASSERT_EQ(0, first[i]);
517 }
518 ASSERT_EQ(0, primary[i]);
519 } else {
224ce89b
WB
520 ASSERT_LT(expect *2/3, primary[0]);
521 ASSERT_GT(expect *4/3, primary[0]);
7c673cae
FG
522 }
523 }
524 }
525
526 osdmap.set_primary_affinity(0, 0x10000);
527 osdmap.set_primary_affinity(1, 0x10000);
528 }
529}
31f18b77 530
81eedcae
TL
531TEST_F(OSDMapTest, get_osd_crush_node_flags) {
532 set_up_map();
533
534 for (unsigned i=0; i<get_num_osds(); ++i) {
535 ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(i));
536 }
537
538 OSDMap::Incremental inc(osdmap.get_epoch() + 1);
539 inc.new_crush_node_flags[-1] = 123u;
540 osdmap.apply_incremental(inc);
541 for (unsigned i=0; i<get_num_osds(); ++i) {
542 ASSERT_EQ(123u, osdmap.get_osd_crush_node_flags(i));
543 }
544 ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(1000));
545
546 OSDMap::Incremental inc3(osdmap.get_epoch() + 1);
547 inc3.new_crush_node_flags[-1] = 456u;
548 osdmap.apply_incremental(inc3);
549 for (unsigned i=0; i<get_num_osds(); ++i) {
550 ASSERT_EQ(456u, osdmap.get_osd_crush_node_flags(i));
551 }
552 ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(1000));
553
554 OSDMap::Incremental inc2(osdmap.get_epoch() + 1);
555 inc2.new_crush_node_flags[-1] = 0;
556 osdmap.apply_incremental(inc2);
557 for (unsigned i=0; i<get_num_osds(); ++i) {
558 ASSERT_EQ(0u, osdmap.get_crush_node_flags(i));
559 }
560}
561
35e4c445
FG
562TEST_F(OSDMapTest, parse_osd_id_list) {
563 set_up_map();
564 set<int> out;
565 set<int> all;
566 osdmap.get_all_osds(all);
567
568 ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0"}, &out, &cout));
11fdf7f2 569 ASSERT_EQ(1u, out.size());
35e4c445
FG
570 ASSERT_EQ(0, *out.begin());
571
572 ASSERT_EQ(0, osdmap.parse_osd_id_list({"1"}, &out, &cout));
11fdf7f2 573 ASSERT_EQ(1u, out.size());
35e4c445
FG
574 ASSERT_EQ(1, *out.begin());
575
576 ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","osd.1"}, &out, &cout));
11fdf7f2 577 ASSERT_EQ(2u, out.size());
35e4c445
FG
578 ASSERT_EQ(0, *out.begin());
579 ASSERT_EQ(1, *out.rbegin());
580
581 ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","1"}, &out, &cout));
11fdf7f2 582 ASSERT_EQ(2u, out.size());
35e4c445
FG
583 ASSERT_EQ(0, *out.begin());
584 ASSERT_EQ(1, *out.rbegin());
585
586 ASSERT_EQ(0, osdmap.parse_osd_id_list({"*"}, &out, &cout));
587 ASSERT_EQ(all.size(), out.size());
588 ASSERT_EQ(all, out);
589
590 ASSERT_EQ(0, osdmap.parse_osd_id_list({"all"}, &out, &cout));
591 ASSERT_EQ(all, out);
592
593 ASSERT_EQ(0, osdmap.parse_osd_id_list({"any"}, &out, &cout));
594 ASSERT_EQ(all, out);
595
596 ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"foo"}, &out, &cout));
597 ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"-12"}, &out, &cout));
598}
599
94b18763
FG
600TEST_F(OSDMapTest, CleanPGUpmaps) {
601 set_up_map();
602
603 // build a crush rule of type host
604 const int expected_host_num = 3;
605 int osd_per_host = get_num_osds() / expected_host_num;
606 ASSERT_GE(2, osd_per_host);
607 int index = 0;
608 for (int i = 0; i < (int)get_num_osds(); i++) {
609 if (i && i % osd_per_host == 0) {
610 ++index;
611 }
612 stringstream osd_name;
613 stringstream host_name;
614 vector<string> move_to;
615 osd_name << "osd." << i;
616 host_name << "host-" << index;
617 move_to.push_back("root=default");
618 string host_loc = "host=" + host_name.str();
619 move_to.push_back(host_loc);
a8e16298 620 int r = crush_move(osdmap, osd_name.str(), move_to);
94b18763
FG
621 ASSERT_EQ(0, r);
622 }
623 const string upmap_rule = "upmap";
624 int upmap_rule_no = crush_rule_create_replicated(
625 upmap_rule, "default", "host");
626 ASSERT_LT(0, upmap_rule_no);
627
628 // create a replicated pool which references the above rule
629 OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
630 new_pool_inc.new_pool_max = osdmap.get_pool_max();
631 new_pool_inc.fsid = osdmap.get_fsid();
632 pg_pool_t empty;
633 uint64_t upmap_pool_id = ++new_pool_inc.new_pool_max;
634 pg_pool_t *p = new_pool_inc.get_new_pool(upmap_pool_id, &empty);
635 p->size = 2;
636 p->set_pg_num(64);
637 p->set_pgp_num(64);
638 p->type = pg_pool_t::TYPE_REPLICATED;
639 p->crush_rule = upmap_rule_no;
640 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
641 new_pool_inc.new_pool_names[upmap_pool_id] = "upmap_pool";
642 osdmap.apply_incremental(new_pool_inc);
643
644 pg_t rawpg(0, upmap_pool_id);
645 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
646 vector<int> up;
647 int up_primary;
648 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
649 ASSERT_LT(1U, up.size());
650 {
651 // validate we won't have two OSDs from a same host
652 int parent_0 = osdmap.crush->get_parent_of_type(up[0],
653 osdmap.crush->get_type_id("host"));
654 int parent_1 = osdmap.crush->get_parent_of_type(up[1],
655 osdmap.crush->get_type_id("host"));
656 ASSERT_TRUE(parent_0 != parent_1);
657 }
658
f64942e4
AA
659 {
660 // cancel stale upmaps
661 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
662 int from = -1;
663 for (int i = 0; i < (int)get_num_osds(); i++) {
664 if (std::find(up.begin(), up.end(), i) == up.end()) {
665 from = i;
666 break;
667 }
668 }
669 ASSERT_TRUE(from >= 0);
670 int to = -1;
671 for (int i = 0; i < (int)get_num_osds(); i++) {
672 if (std::find(up.begin(), up.end(), i) == up.end() && i != from) {
673 to = i;
674 break;
675 }
676 }
677 ASSERT_TRUE(to >= 0);
678 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
679 new_pg_upmap_items.push_back(make_pair(from, to));
680 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
681 pending_inc.new_pg_upmap_items[pgid] =
682 mempool::osdmap::vector<pair<int32_t,int32_t>>(
683 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
684 OSDMap nextmap;
685 nextmap.deepish_copy_from(osdmap);
686 nextmap.apply_incremental(pending_inc);
687 ASSERT_TRUE(nextmap.have_pg_upmaps(pgid));
688 OSDMap::Incremental new_pending_inc(nextmap.get_epoch() + 1);
494da23a 689 clean_pg_upmaps(g_ceph_context, nextmap, new_pending_inc);
f64942e4
AA
690 nextmap.apply_incremental(new_pending_inc);
691 ASSERT_TRUE(!nextmap.have_pg_upmaps(pgid));
692 }
693
694 {
695 // https://tracker.ceph.com/issues/37493
696 pg_t ec_pg(0, my_ec_pool);
697 pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg);
698 OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map..
699 int from = -1;
700 int to = -1;
701 {
702 // insert a valid pg_upmap_item
703 vector<int> ec_up;
704 int ec_up_primary;
705 osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary);
706 ASSERT_TRUE(!ec_up.empty());
707 from = *(ec_up.begin());
708 ASSERT_TRUE(from >= 0);
709 for (int i = 0; i < (int)get_num_osds(); i++) {
710 if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) {
711 to = i;
712 break;
713 }
714 }
715 ASSERT_TRUE(to >= 0);
716 ASSERT_TRUE(from != to);
717 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
718 new_pg_upmap_items.push_back(make_pair(from, to));
719 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
720 pending_inc.new_pg_upmap_items[ec_pgid] =
721 mempool::osdmap::vector<pair<int32_t,int32_t>>(
722 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
723 tmpmap.deepish_copy_from(osdmap);
724 tmpmap.apply_incremental(pending_inc);
725 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
726 }
727 {
728 // mark one of the target OSDs of the above pg_upmap_item as down
729 OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
730 pending_inc.new_state[to] = CEPH_OSD_UP;
731 tmpmap.apply_incremental(pending_inc);
732 ASSERT_TRUE(!tmpmap.is_up(to));
733 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
734 }
735 {
494da23a 736 // confirm *clean_pg_upmaps* won't do anything bad
f64942e4 737 OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
494da23a 738 clean_pg_upmaps(g_ceph_context, tmpmap, pending_inc);
f64942e4
AA
739 tmpmap.apply_incremental(pending_inc);
740 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
741 }
742 }
743
744 {
745 // http://tracker.ceph.com/issues/37501
746 pg_t ec_pg(0, my_ec_pool);
747 pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg);
748 OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map..
749 int from = -1;
750 int to = -1;
751 {
752 // insert a valid pg_upmap_item
753 vector<int> ec_up;
754 int ec_up_primary;
755 osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary);
756 ASSERT_TRUE(!ec_up.empty());
757 from = *(ec_up.begin());
758 ASSERT_TRUE(from >= 0);
759 for (int i = 0; i < (int)get_num_osds(); i++) {
760 if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) {
761 to = i;
762 break;
763 }
764 }
765 ASSERT_TRUE(to >= 0);
766 ASSERT_TRUE(from != to);
767 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
768 new_pg_upmap_items.push_back(make_pair(from, to));
769 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
770 pending_inc.new_pg_upmap_items[ec_pgid] =
771 mempool::osdmap::vector<pair<int32_t,int32_t>>(
772 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
773 tmpmap.deepish_copy_from(osdmap);
774 tmpmap.apply_incremental(pending_inc);
775 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
776 }
777 {
778 // mark one of the target OSDs of the above pg_upmap_item as out
779 OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
780 pending_inc.new_weight[to] = CEPH_OSD_OUT;
781 tmpmap.apply_incremental(pending_inc);
782 ASSERT_TRUE(tmpmap.is_out(to));
783 ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
784 }
785 {
494da23a 786 // *clean_pg_upmaps* should be able to remove the above *bad* mapping
f64942e4 787 OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
494da23a 788 clean_pg_upmaps(g_ceph_context, tmpmap, pending_inc);
f64942e4
AA
789 tmpmap.apply_incremental(pending_inc);
790 ASSERT_TRUE(!tmpmap.have_pg_upmaps(ec_pgid));
791 }
792 }
793
a8e16298
TL
794 {
795 // http://tracker.ceph.com/issues/37968
796
797 // build a temporary crush topology of 2 hosts, 3 osds per host
798 OSDMap tmp; // use a tmpmap here, so we do not dirty origin map..
799 tmp.deepish_copy_from(osdmap);
800 const int expected_host_num = 2;
801 int osd_per_host = get_num_osds() / expected_host_num;
802 ASSERT_GE(osd_per_host, 3);
803 int index = 0;
804 for (int i = 0; i < (int)get_num_osds(); i++) {
805 if (i && i % osd_per_host == 0) {
806 ++index;
807 }
808 stringstream osd_name;
809 stringstream host_name;
810 vector<string> move_to;
811 osd_name << "osd." << i;
812 host_name << "host-" << index;
813 move_to.push_back("root=default");
814 string host_loc = "host=" + host_name.str();
815 move_to.push_back(host_loc);
816 auto r = crush_move(tmp, osd_name.str(), move_to);
817 ASSERT_EQ(0, r);
818 }
819
820 // build crush rule
821 CrushWrapper crush;
822 get_crush(tmp, crush);
823 string rule_name = "rule_37968";
824 int rule_type = pg_pool_t::TYPE_ERASURE;
825 ASSERT_TRUE(!crush.rule_exists(rule_name));
826 int rno;
827 for (rno = 0; rno < crush.get_max_rules(); rno++) {
828 if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
829 break;
830 }
831 string root_name = "default";
832 int root = crush.get_item_id(root_name);
833 int min_size = 3;
834 int max_size = 4;
835 int steps = 6;
836 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
837 int step = 0;
838 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
839 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
840 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
841 crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_INDEP, 2, 1 /* host*/);
842 crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_INDEP, 2, 0 /* osd */);
843 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
844 ASSERT_TRUE(step == steps);
845 auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
846 ASSERT_TRUE(r >= 0);
847 crush.set_rule_name(rno, rule_name);
848 {
849 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
850 pending_inc.crush.clear();
851 crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
852 tmp.apply_incremental(pending_inc);
853 }
854
855 // create a erasuce-coded pool referencing the above rule
856 int64_t pool_37968;
857 {
858 OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1);
859 new_pool_inc.new_pool_max = tmp.get_pool_max();
860 new_pool_inc.fsid = tmp.get_fsid();
861 pg_pool_t empty;
862 pool_37968 = ++new_pool_inc.new_pool_max;
863 pg_pool_t *p = new_pool_inc.get_new_pool(pool_37968, &empty);
864 p->size = 4;
865 p->set_pg_num(8);
866 p->set_pgp_num(8);
867 p->type = pg_pool_t::TYPE_ERASURE;
868 p->crush_rule = rno;
869 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
870 new_pool_inc.new_pool_names[pool_37968] = "pool_37968";
871 tmp.apply_incremental(new_pool_inc);
872 }
873
874 pg_t ec_pg(0, pool_37968);
875 pg_t ec_pgid = tmp.raw_pg_to_pg(ec_pg);
876 int from = -1;
877 int to = -1;
878 {
879 // insert a valid pg_upmap_item
880 vector<int> ec_up;
881 int ec_up_primary;
882 tmp.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary);
883 ASSERT_TRUE(ec_up.size() == 4);
884 from = *(ec_up.begin());
885 ASSERT_TRUE(from >= 0);
886 auto parent = tmp.crush->get_parent_of_type(from, 1 /* host */, rno);
887 ASSERT_TRUE(parent < 0);
888 // pick an osd of the same parent with *from*
889 for (int i = 0; i < (int)get_num_osds(); i++) {
890 if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) {
891 auto p = tmp.crush->get_parent_of_type(i, 1 /* host */, rno);
892 if (p == parent) {
893 to = i;
894 break;
895 }
896 }
897 }
898 ASSERT_TRUE(to >= 0);
899 ASSERT_TRUE(from != to);
900 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
901 new_pg_upmap_items.push_back(make_pair(from, to));
902 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
903 pending_inc.new_pg_upmap_items[ec_pgid] =
904 mempool::osdmap::vector<pair<int32_t,int32_t>>(
905 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
906 tmp.apply_incremental(pending_inc);
907 ASSERT_TRUE(tmp.have_pg_upmaps(ec_pgid));
908 }
909 {
494da23a 910 // *clean_pg_upmaps* should not remove the above upmap_item
a8e16298 911 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
494da23a 912 clean_pg_upmaps(g_ceph_context, tmp, pending_inc);
a8e16298
TL
913 tmp.apply_incremental(pending_inc);
914 ASSERT_TRUE(tmp.have_pg_upmaps(ec_pgid));
915 }
916 }
917
94b18763
FG
918 {
919 // TEST pg_upmap
920 {
921 // STEP-1: enumerate all children of up[0]'s parent,
922 // replace up[1] with one of them (other than up[0])
923 int parent = osdmap.crush->get_parent_of_type(up[0],
924 osdmap.crush->get_type_id("host"));
925 set<int> candidates;
926 osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent), &candidates);
927 ASSERT_LT(1U, candidates.size());
928 int replaced_by = -1;
929 for (auto c: candidates) {
930 if (c != up[0]) {
931 replaced_by = c;
932 break;
933 }
934 }
91327a77
AA
935 {
936 // Check we can handle a negative pg_upmap value
937 vector<int32_t> new_pg_upmap;
938 new_pg_upmap.push_back(up[0]);
939 new_pg_upmap.push_back(-823648512);
940 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
941 pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>(
942 new_pg_upmap.begin(), new_pg_upmap.end());
943 osdmap.apply_incremental(pending_inc);
944 vector<int> new_up;
945 int new_up_primary;
946 // crucial call - _apply_upmap should ignore the negative value
947 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
948 }
94b18763
FG
949 ASSERT_NE(-1, replaced_by);
950 // generate a new pg_upmap item and apply
951 vector<int32_t> new_pg_upmap;
952 new_pg_upmap.push_back(up[0]);
953 new_pg_upmap.push_back(replaced_by); // up[1] -> replaced_by
954 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
955 pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>(
956 new_pg_upmap.begin(), new_pg_upmap.end());
957 osdmap.apply_incremental(pending_inc);
958 {
959 // validate pg_upmap is there
960 vector<int> new_up;
961 int new_up_primary;
962 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
9f95a23c
TL
963 ASSERT_EQ(new_up.size(), up.size());
964 ASSERT_EQ(new_up[0], new_pg_upmap[0]);
965 ASSERT_EQ(new_up[1], new_pg_upmap[1]);
94b18763
FG
966 // and we shall have two OSDs from a same host now..
967 int parent_0 = osdmap.crush->get_parent_of_type(new_up[0],
968 osdmap.crush->get_type_id("host"));
969 int parent_1 = osdmap.crush->get_parent_of_type(new_up[1],
970 osdmap.crush->get_type_id("host"));
9f95a23c 971 ASSERT_EQ(parent_0, parent_1);
94b18763
FG
972 }
973 }
974 {
975 // STEP-2: apply cure
976 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
494da23a 977 clean_pg_upmaps(g_ceph_context, osdmap, pending_inc);
94b18763
FG
978 osdmap.apply_incremental(pending_inc);
979 {
980 // validate pg_upmap is gone (reverted)
981 vector<int> new_up;
982 int new_up_primary;
983 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
9f95a23c
TL
984 ASSERT_EQ(new_up, up);
985 ASSERT_EQ(new_up_primary, up_primary);
94b18763
FG
986 }
987 }
988 }
989
990 {
991 // TEST pg_upmap_items
992 // enumerate all used hosts first
993 set<int> parents;
994 for (auto u: up) {
995 int parent = osdmap.crush->get_parent_of_type(u,
996 osdmap.crush->get_type_id("host"));
997 ASSERT_GT(0, parent);
998 parents.insert(parent);
999 }
1000 int candidate_parent = 0;
1001 set<int> candidate_children;
1002 vector<int> up_after_out;
1003 {
1004 // STEP-1: try mark out up[1] and all other OSDs from the same host
1005 int parent = osdmap.crush->get_parent_of_type(up[1],
1006 osdmap.crush->get_type_id("host"));
1007 set<int> children;
1008 osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent),
1009 &children);
1010 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1011 for (auto c: children) {
1012 pending_inc.new_weight[c] = CEPH_OSD_OUT;
1013 }
1014 OSDMap tmpmap;
1015 tmpmap.deepish_copy_from(osdmap);
1016 tmpmap.apply_incremental(pending_inc);
1017 vector<int> new_up;
1018 int new_up_primary;
1019 tmpmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
1020 // verify that we'll have OSDs from a different host..
1021 int will_choose = -1;
1022 for (auto o: new_up) {
1023 int parent = tmpmap.crush->get_parent_of_type(o,
1024 osdmap.crush->get_type_id("host"));
1025 if (!parents.count(parent)) {
1026 will_choose = o;
1027 candidate_parent = parent; // record
1028 break;
1029 }
1030 }
1031 ASSERT_LT(-1, will_choose); // it is an OSD!
9f95a23c 1032 ASSERT_NE(candidate_parent, 0);
94b18763
FG
1033 osdmap.crush->get_leaves(osdmap.crush->get_item_name(candidate_parent),
1034 &candidate_children);
1035 ASSERT_TRUE(candidate_children.count(will_choose));
1036 candidate_children.erase(will_choose);
9f95a23c 1037 ASSERT_FALSE(candidate_children.empty());
94b18763
FG
1038 up_after_out = new_up; // needed for verification..
1039 }
91327a77
AA
1040 {
1041 // Make sure we can handle a negative pg_upmap_item
1042 int victim = up[0];
1043 int replaced_by = -823648512;
1044 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
1045 new_pg_upmap_items.push_back(make_pair(victim, replaced_by));
1046 // apply
1047 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1048 pending_inc.new_pg_upmap_items[pgid] =
1049 mempool::osdmap::vector<pair<int32_t,int32_t>>(
1050 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
1051 osdmap.apply_incremental(pending_inc);
1052 vector<int> new_up;
1053 int new_up_primary;
1054 // crucial call - _apply_upmap should ignore the negative value
1055 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
1056 }
94b18763
FG
1057 {
1058 // STEP-2: generating a new pg_upmap_items entry by
1059 // replacing up[0] with one coming from candidate_children
1060 int victim = up[0];
1061 int replaced_by = *candidate_children.begin();
1062 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
1063 new_pg_upmap_items.push_back(make_pair(victim, replaced_by));
1064 // apply
1065 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1066 pending_inc.new_pg_upmap_items[pgid] =
1067 mempool::osdmap::vector<pair<int32_t,int32_t>>(
1068 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
1069 osdmap.apply_incremental(pending_inc);
1070 {
1071 // validate pg_upmap_items is there
1072 vector<int> new_up;
1073 int new_up_primary;
1074 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
9f95a23c 1075 ASSERT_EQ(new_up.size(), up.size());
94b18763
FG
1076 ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), replaced_by) !=
1077 new_up.end());
1078 // and up[1] too
1079 ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), up[1]) !=
1080 new_up.end());
1081 }
1082 }
1083 {
1084 // STEP-3: mark out up[1] and all other OSDs from the same host
1085 int parent = osdmap.crush->get_parent_of_type(up[1],
1086 osdmap.crush->get_type_id("host"));
1087 set<int> children;
1088 osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent),
1089 &children);
1090 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1091 for (auto c: children) {
1092 pending_inc.new_weight[c] = CEPH_OSD_OUT;
1093 }
1094 osdmap.apply_incremental(pending_inc);
1095 {
1096 // validate we have two OSDs from the same host now..
1097 vector<int> new_up;
1098 int new_up_primary;
1099 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
9f95a23c 1100 ASSERT_EQ(up.size(), new_up.size());
94b18763
FG
1101 int parent_0 = osdmap.crush->get_parent_of_type(new_up[0],
1102 osdmap.crush->get_type_id("host"));
1103 int parent_1 = osdmap.crush->get_parent_of_type(new_up[1],
1104 osdmap.crush->get_type_id("host"));
9f95a23c 1105 ASSERT_EQ(parent_0, parent_1);
94b18763
FG
1106 }
1107 }
1108 {
1109 // STEP-4: apply cure
1110 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
494da23a 1111 clean_pg_upmaps(g_ceph_context, osdmap, pending_inc);
94b18763
FG
1112 osdmap.apply_incremental(pending_inc);
1113 {
1114 // validate pg_upmap_items is gone (reverted)
1115 vector<int> new_up;
1116 int new_up_primary;
1117 osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
9f95a23c 1118 ASSERT_EQ(new_up, up_after_out);
94b18763
FG
1119 }
1120 }
1121 }
1122}
1123
a8e16298
TL
1124TEST_F(OSDMapTest, BUG_38897) {
1125 // http://tracker.ceph.com/issues/38897
1126 // build a fresh map with 12 OSDs, without any default pools
1127 set_up_map(12, true);
1128 const string pool_1("pool1");
1129 const string pool_2("pool2");
1130 int64_t pool_1_id = -1;
1131
1132 {
1133 // build customized crush rule for "pool1"
1134 string host_name = "host_for_pool_1";
1135 // build a customized host to capture osd.1~5
1136 for (int i = 1; i < 5; i++) {
1137 stringstream osd_name;
1138 vector<string> move_to;
1139 osd_name << "osd." << i;
1140 move_to.push_back("root=default");
1141 string host_loc = "host=" + host_name;
1142 move_to.push_back(host_loc);
1143 auto r = crush_move(osdmap, osd_name.str(), move_to);
1144 ASSERT_EQ(0, r);
1145 }
1146 CrushWrapper crush;
1147 get_crush(osdmap, crush);
1148 auto host_id = crush.get_item_id(host_name);
1149 ASSERT_TRUE(host_id < 0);
1150 string rule_name = "rule_for_pool1";
1151 int rule_type = pg_pool_t::TYPE_REPLICATED;
1152 ASSERT_TRUE(!crush.rule_exists(rule_name));
1153 int rno;
1154 for (rno = 0; rno < crush.get_max_rules(); rno++) {
1155 if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
1156 break;
1157 }
1158 int min_size = 3;
1159 int max_size = 3;
1160 int steps = 7;
1161 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
1162 int step = 0;
1163 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
1164 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
1165 // always choose osd.0
1166 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0);
1167 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1168 // then pick any other random osds
1169 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, host_id, 0);
1170 crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 0);
1171 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1172 ASSERT_TRUE(step == steps);
1173 auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
1174 ASSERT_TRUE(r >= 0);
1175 crush.set_rule_name(rno, rule_name);
1176 {
1177 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1178 pending_inc.crush.clear();
1179 crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
1180 osdmap.apply_incremental(pending_inc);
1181 }
1182
1183 // create "pool1"
1184 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1185 pending_inc.new_pool_max = osdmap.get_pool_max();
1186 auto pool_id = ++pending_inc.new_pool_max;
1187 pool_1_id = pool_id;
1188 pg_pool_t empty;
1189 auto p = pending_inc.get_new_pool(pool_id, &empty);
1190 p->size = 3;
1191 p->min_size = 1;
1192 p->set_pg_num(3);
1193 p->set_pgp_num(3);
1194 p->type = pg_pool_t::TYPE_REPLICATED;
1195 p->crush_rule = rno;
1196 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
1197 pending_inc.new_pool_names[pool_id] = pool_1;
1198 osdmap.apply_incremental(pending_inc);
1199 ASSERT_TRUE(osdmap.have_pg_pool(pool_id));
1200 ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_1);
1201 {
1202 for (unsigned i = 0; i < 3; i++) {
1203 // 1.x -> [1]
1204 pg_t rawpg(i, pool_id);
1205 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
1206 vector<int> up;
1207 int up_primary;
1208 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
1209 ASSERT_TRUE(up.size() == 3);
1210 ASSERT_TRUE(up[0] == 0);
1211
1212 // insert a new pg_upmap
1213 vector<int32_t> new_up;
1214 // and remap 1.x to osd.1 only
1215 // this way osd.0 is deemed to be *underfull*
1216 // and osd.1 is deemed to be *overfull*
1217 new_up.push_back(1);
1218 {
1219 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1220 pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>(
1221 new_up.begin(), new_up.end());
1222 osdmap.apply_incremental(pending_inc);
1223 }
1224 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
1225 ASSERT_TRUE(up.size() == 1);
1226 ASSERT_TRUE(up[0] == 1);
1227 }
1228 }
1229 }
1230
1231 {
1232 // build customized crush rule for "pool2"
1233 string host_name = "host_for_pool_2";
1234 // build a customized host to capture osd.6~11
1235 for (int i = 6; i < (int)get_num_osds(); i++) {
1236 stringstream osd_name;
1237 vector<string> move_to;
1238 osd_name << "osd." << i;
1239 move_to.push_back("root=default");
1240 string host_loc = "host=" + host_name;
1241 move_to.push_back(host_loc);
1242 auto r = crush_move(osdmap, osd_name.str(), move_to);
1243 ASSERT_EQ(0, r);
1244 }
1245 CrushWrapper crush;
1246 get_crush(osdmap, crush);
1247 auto host_id = crush.get_item_id(host_name);
1248 ASSERT_TRUE(host_id < 0);
1249 string rule_name = "rule_for_pool2";
1250 int rule_type = pg_pool_t::TYPE_REPLICATED;
1251 ASSERT_TRUE(!crush.rule_exists(rule_name));
1252 int rno;
1253 for (rno = 0; rno < crush.get_max_rules(); rno++) {
1254 if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
1255 break;
1256 }
1257 int min_size = 3;
1258 int max_size = 3;
1259 int steps = 7;
1260 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
1261 int step = 0;
1262 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
1263 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
1264 // always choose osd.0
1265 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0);
1266 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1267 // then pick any other random osds
1268 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, host_id, 0);
1269 crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 0);
1270 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1271 ASSERT_TRUE(step == steps);
1272 auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
1273 ASSERT_TRUE(r >= 0);
1274 crush.set_rule_name(rno, rule_name);
1275 {
1276 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1277 pending_inc.crush.clear();
1278 crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
1279 osdmap.apply_incremental(pending_inc);
1280 }
1281
1282 // create "pool2"
1283 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1284 pending_inc.new_pool_max = osdmap.get_pool_max();
1285 auto pool_id = ++pending_inc.new_pool_max;
1286 pg_pool_t empty;
1287 auto p = pending_inc.get_new_pool(pool_id, &empty);
1288 p->size = 3;
1289 // include a single PG
1290 p->set_pg_num(1);
1291 p->set_pgp_num(1);
1292 p->type = pg_pool_t::TYPE_REPLICATED;
1293 p->crush_rule = rno;
1294 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
1295 pending_inc.new_pool_names[pool_id] = pool_2;
1296 osdmap.apply_incremental(pending_inc);
1297 ASSERT_TRUE(osdmap.have_pg_pool(pool_id));
1298 ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_2);
1299 pg_t rawpg(0, pool_id);
1300 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
1301 EXPECT_TRUE(!osdmap.have_pg_upmaps(pgid));
1302 vector<int> up;
1303 int up_primary;
1304 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
1305 ASSERT_TRUE(up.size() == 3);
1306 ASSERT_TRUE(up[0] == 0);
1307
1308 {
1309 // build a pg_upmap_item that will
1310 // remap pg out from *underfull* osd.0
1311 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
1312 new_pg_upmap_items.push_back(make_pair(0, 10)); // osd.0 -> osd.10
1313 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1314 pending_inc.new_pg_upmap_items[pgid] =
1315 mempool::osdmap::vector<pair<int32_t,int32_t>>(
1316 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
1317 osdmap.apply_incremental(pending_inc);
1318 ASSERT_TRUE(osdmap.have_pg_upmaps(pgid));
1319 vector<int> up;
1320 int up_primary;
1321 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
1322 ASSERT_TRUE(up.size() == 3);
1323 ASSERT_TRUE(up[0] == 10);
1324 }
1325 }
1326
1327 // ready to go
1328 {
a8e16298
TL
1329 set<int64_t> only_pools;
1330 ASSERT_TRUE(pool_1_id >= 0);
1331 only_pools.insert(pool_1_id);
1332 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
92f5a8d4 1333 // require perfect distribution! (max deviation 0)
a8e16298
TL
1334 osdmap.calc_pg_upmaps(g_ceph_context,
1335 0, // so we can force optimizing
1336 100,
1337 only_pools,
1338 &pending_inc);
1339 osdmap.apply_incremental(pending_inc);
1340 }
1341}
1342
494da23a
TL
1343TEST_F(OSDMapTest, BUG_40104) {
1344 // http://tracker.ceph.com/issues/40104
1345 int big_osd_num = 5000;
1346 int big_pg_num = 10000;
1347 set_up_map(big_osd_num, true);
1348 int pool_id;
1349 {
1350 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1351 pending_inc.new_pool_max = osdmap.get_pool_max();
1352 pool_id = ++pending_inc.new_pool_max;
1353 pg_pool_t empty;
1354 auto p = pending_inc.get_new_pool(pool_id, &empty);
1355 p->size = 3;
1356 p->min_size = 1;
1357 p->set_pg_num(big_pg_num);
1358 p->set_pgp_num(big_pg_num);
1359 p->type = pg_pool_t::TYPE_REPLICATED;
1360 p->crush_rule = 0;
1361 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
1362 pending_inc.new_pool_names[pool_id] = "big_pool";
1363 osdmap.apply_incremental(pending_inc);
1364 ASSERT_TRUE(osdmap.have_pg_pool(pool_id));
1365 ASSERT_TRUE(osdmap.get_pool_name(pool_id) == "big_pool");
1366 }
1367 {
1368 // generate pg_upmap_items for each pg
1369 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1370 for (int i = 0; i < big_pg_num; i++) {
1371 pg_t rawpg(i, pool_id);
1372 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
1373 vector<int> up;
1374 int up_primary;
1375 osdmap.pg_to_raw_up(pgid, &up, &up_primary);
1376 ASSERT_TRUE(up.size() == 3);
1377 int victim = up[0];
1378 int replaced_by = random() % big_osd_num;
1379 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
1380 // note that it might or might not be valid, we don't care
1381 new_pg_upmap_items.push_back(make_pair(victim, replaced_by));
1382 pending_inc.new_pg_upmap_items[pgid] =
1383 mempool::osdmap::vector<pair<int32_t,int32_t>>(
1384 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
1385 }
1386 osdmap.apply_incremental(pending_inc);
1387 }
1388 {
1389 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1390 auto start = mono_clock::now();
1391 clean_pg_upmaps(g_ceph_context, osdmap, pending_inc);
1392 auto latency = mono_clock::now() - start;
1393 std::cout << "clean_pg_upmaps (~" << big_pg_num
1394 << " pg_upmap_items) latency:" << timespan_str(latency)
1395 << std::endl;
1396 }
1397}
1398
eafe8130
TL
1399TEST_F(OSDMapTest, BUG_42052) {
1400 // https://tracker.ceph.com/issues/42052
1401 set_up_map(6, true);
1402 const string pool_name("pool");
1403 // build customized crush rule for "pool"
1404 CrushWrapper crush;
1405 get_crush(osdmap, crush);
1406 string rule_name = "rule";
1407 int rule_type = pg_pool_t::TYPE_REPLICATED;
1408 ASSERT_TRUE(!crush.rule_exists(rule_name));
1409 int rno;
1410 for (rno = 0; rno < crush.get_max_rules(); rno++) {
1411 if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
1412 break;
1413 }
1414 int min_size = 3;
1415 int max_size = 3;
1416 int steps = 8;
1417 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
1418 int step = 0;
1419 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
1420 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
1421 // always choose osd.0, osd.1, osd.2
1422 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0);
1423 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1424 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 1);
1425 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1426 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 2);
1427 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1428 ASSERT_TRUE(step == steps);
1429 auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
1430 ASSERT_TRUE(r >= 0);
1431 crush.set_rule_name(rno, rule_name);
1432 {
1433 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1434 pending_inc.crush.clear();
1435 crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
1436 osdmap.apply_incremental(pending_inc);
1437 }
1438
1439 // create "pool"
1440 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1441 pending_inc.new_pool_max = osdmap.get_pool_max();
1442 auto pool_id = ++pending_inc.new_pool_max;
1443 pg_pool_t empty;
1444 auto p = pending_inc.get_new_pool(pool_id, &empty);
1445 p->size = 3;
1446 p->min_size = 1;
1447 p->set_pg_num(1);
1448 p->set_pgp_num(1);
1449 p->type = pg_pool_t::TYPE_REPLICATED;
1450 p->crush_rule = rno;
1451 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
1452 pending_inc.new_pool_names[pool_id] = pool_name;
1453 osdmap.apply_incremental(pending_inc);
1454 ASSERT_TRUE(osdmap.have_pg_pool(pool_id));
1455 ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_name);
1456 pg_t rawpg(0, pool_id);
1457 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
1458 {
1459 // pg_upmap 1.0 [2,3,5]
1460 vector<int32_t> new_up{2,3,5};
1461 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1462 pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>(
1463 new_up.begin(), new_up.end());
1464 osdmap.apply_incremental(pending_inc);
1465 }
1466 {
1467 // pg_upmap_items 1.0 [0,3,4,5]
1468 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
1469 new_pg_upmap_items.push_back(make_pair(0, 3));
1470 new_pg_upmap_items.push_back(make_pair(4, 5));
1471 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1472 pending_inc.new_pg_upmap_items[pgid] =
1473 mempool::osdmap::vector<pair<int32_t,int32_t>>(
1474 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
1475 osdmap.apply_incremental(pending_inc);
1476 }
1477 {
1478 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1479 clean_pg_upmaps(g_ceph_context, osdmap, pending_inc);
1480 osdmap.apply_incremental(pending_inc);
1481 ASSERT_FALSE(osdmap.have_pg_upmaps(pgid));
1482 }
1483}
1484
9f95a23c
TL
1485TEST_F(OSDMapTest, BUG_42485) {
1486 set_up_map(60);
1487 {
1488 // build a temporary crush topology of 2datacenters, 3racks per dc,
1489 // 1host per rack, 10osds per host
1490 OSDMap tmp; // use a tmpmap here, so we do not dirty origin map..
1491 tmp.deepish_copy_from(osdmap);
1492 const int expected_host_num = 6;
1493 int osd_per_host = (int)get_num_osds() / expected_host_num;
1494 ASSERT_GE(osd_per_host, 10);
1495 int host_per_dc = 3;
1496 int index = 0;
1497 int dc_index = 0;
1498 for (int i = 0; i < (int)get_num_osds(); i++) {
1499 if (i && i % osd_per_host == 0) {
1500 ++index;
1501 }
1502 if (i && i % (host_per_dc * osd_per_host) == 0) {
1503 ++dc_index;
1504 }
1505 stringstream osd_name;
1506 stringstream host_name;
1507 stringstream rack_name;
1508 stringstream dc_name;
1509 vector<string> move_to;
1510 osd_name << "osd." << i;
1511 host_name << "host-" << index;
1512 rack_name << "rack-" << index;
1513 dc_name << "dc-" << dc_index;
1514 move_to.push_back("root=default");
1515 string dc_loc = "datacenter=" + dc_name.str();
1516 move_to.push_back(dc_loc);
1517 string rack_loc = "rack=" + rack_name.str();
1518 move_to.push_back(rack_loc);
1519 string host_loc = "host=" + host_name.str();
1520 move_to.push_back(host_loc);
1521 auto r = crush_move(tmp, osd_name.str(), move_to);
1522 ASSERT_EQ(0, r);
1523 }
1524
1525 // build crush rule
1526 CrushWrapper crush;
1527 get_crush(tmp, crush);
1528 string rule_name = "rule_xeus_993_1";
1529 int rule_type = pg_pool_t::TYPE_REPLICATED;
1530 ASSERT_TRUE(!crush.rule_exists(rule_name));
1531 int rno;
1532 for (rno = 0; rno < crush.get_max_rules(); rno++) {
1533 if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
1534 break;
1535 }
1536 string root_name = "default";
1537 string dc_1 = "dc-0";
1538 int dc1 = crush.get_item_id(dc_1);
1539 string dc_2 = "dc-1";
1540 int dc2 = crush.get_item_id(dc_2);
1541 int min_size = 1;
1542 int max_size = 20;
1543 int steps = 8;
1544 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
1545 int step = 0;
1546 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
1547 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
1548 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, dc1, 0);
1549 crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 3 /* rack */);
1550 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1551 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, dc2, 0);
1552 crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 3 /* rack */);
1553 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1554 ASSERT_TRUE(step == steps);
1555 auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
1556 ASSERT_TRUE(r >= 0);
1557 crush.set_rule_name(rno, rule_name);
1558 {
1559 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
1560 pending_inc.crush.clear();
1561 crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
1562 tmp.apply_incremental(pending_inc);
1563 }
1564 // create a repliacted pool referencing the above rule
1565 int64_t pool_xeus_993;
1566 {
1567 OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1);
1568 new_pool_inc.new_pool_max = tmp.get_pool_max();
1569 new_pool_inc.fsid = tmp.get_fsid();
1570 pg_pool_t empty;
1571 pool_xeus_993 = ++new_pool_inc.new_pool_max;
1572 pg_pool_t *p = new_pool_inc.get_new_pool(pool_xeus_993, &empty);
1573 p->size = 4;
1574 p->set_pg_num(4096);
1575 p->set_pgp_num(4096);
1576 p->type = pg_pool_t::TYPE_REPLICATED;
1577 p->crush_rule = rno;
1578 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
1579 new_pool_inc.new_pool_names[pool_xeus_993] = "pool_xeus_993";
1580 tmp.apply_incremental(new_pool_inc);
1581 }
1582
1583 pg_t rep_pg(0, pool_xeus_993);
1584 pg_t rep_pgid = tmp.raw_pg_to_pg(rep_pg);
1585 {
1586 int from = -1;
1587 int to = -1;
1588 vector<int> rep_up;
1589 int rep_up_primary;
1590 tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary);
1591 std::cout << "pgid " << rep_up << " up " << rep_up << std::endl;
1592 ASSERT_TRUE(rep_up.size() == 4);
1593 from = *(rep_up.begin());
1594 ASSERT_TRUE(from >= 0);
1595 auto dc_parent = tmp.crush->get_parent_of_type(from, 8 /* dc */, rno);
1596 if (dc_parent == dc1)
1597 dc_parent = dc2;
1598 else
1599 dc_parent = dc1;
1600 auto rack_parent = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno);
1601 ASSERT_TRUE(dc_parent < 0);
1602 ASSERT_TRUE(rack_parent < 0);
1603 set<int> rack_parents;
1604 for (auto &i: rep_up) {
1605 if (i == from) continue;
1606 auto rack_parent = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno);
1607 rack_parents.insert(rack_parent);
1608 }
1609 for (int i = 0; i < (int)get_num_osds(); i++) {
1610 if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) {
1611 auto dc_p = tmp.crush->get_parent_of_type(i, 8 /* dc */, rno);
1612 auto rack_p = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno);
1613 if (dc_p == dc_parent &&
1614 rack_parents.find(rack_p) == rack_parents.end()) {
1615 to = i;
1616 break;
1617 }
1618 }
1619 }
1620 ASSERT_TRUE(to >= 0);
1621 ASSERT_TRUE(from != to);
1622 std::cout << "from " << from << " to " << to << std::endl;
1623 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
1624 new_pg_upmap_items.push_back(make_pair(from, to));
1625 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
1626 pending_inc.new_pg_upmap_items[rep_pgid] =
1627 mempool::osdmap::vector<pair<int32_t,int32_t>>(
1628 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
1629 tmp.apply_incremental(pending_inc);
1630 ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid));
1631 }
1632 pg_t rep_pg2(2, pool_xeus_993);
1633 pg_t rep_pgid2 = tmp.raw_pg_to_pg(rep_pg2);
1634 {
1635 pg_t rep_pgid = rep_pgid2;
1636 vector<int> from_osds{-1, -1};
1637 vector<int> rep_up;
1638 int rep_up_primary;
1639 tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary);
1640 ASSERT_TRUE(rep_up.size() == 4);
1641 from_osds[0] = *(rep_up.begin());
1642 from_osds[1] = *(rep_up.rbegin());
1643 std::cout << "pgid " << rep_pgid2 << " up " << rep_up << std::endl;
1644 ASSERT_TRUE(*(from_osds.begin()) >= 0);
1645 ASSERT_TRUE(*(from_osds.rbegin()) >= 0);
1646 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
1647 for (auto &from: from_osds) {
1648 int to = -1;
1649 auto dc_parent = tmp.crush->get_parent_of_type(from, 8 /* dc */, rno);
1650 if (dc_parent == dc1)
1651 dc_parent = dc2;
1652 else
1653 dc_parent = dc1;
1654 auto rack_parent = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno);
1655 ASSERT_TRUE(dc_parent < 0);
1656 ASSERT_TRUE(rack_parent < 0);
1657 set<int> rack_parents;
1658 for (auto &i: rep_up) {
1659 if (i == from) continue;
1660 auto rack_parent = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno);
1661 rack_parents.insert(rack_parent);
1662 }
1663 for (auto &i: new_pg_upmap_items) {
1664 auto rack_from = tmp.crush->get_parent_of_type(i.first, 3, rno);
1665 auto rack_to = tmp.crush->get_parent_of_type(i.second, 3, rno);
1666 rack_parents.insert(rack_from);
1667 rack_parents.insert(rack_to);
1668 }
1669 for (int i = 0; i < (int)get_num_osds(); i++) {
1670 if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) {
1671 auto dc_p = tmp.crush->get_parent_of_type(i, 8 /* dc */, rno);
1672 auto rack_p = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno);
1673 if (dc_p == dc_parent &&
1674 rack_parents.find(rack_p) == rack_parents.end()) {
1675 to = i;
1676 break;
1677 }
1678 }
1679 }
1680 ASSERT_TRUE(to >= 0);
1681 ASSERT_TRUE(from != to);
1682 std::cout << "from " << from << " to " << to << std::endl;
1683 new_pg_upmap_items.push_back(make_pair(from, to));
1684 }
1685 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
1686 pending_inc.new_pg_upmap_items[rep_pgid] =
1687 mempool::osdmap::vector<pair<int32_t,int32_t>>(
1688 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
1689 tmp.apply_incremental(pending_inc);
1690 ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid));
1691 }
1692 {
1693 // *maybe_remove_pg_upmaps* should remove the above upmap_item
1694 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
1695 clean_pg_upmaps(g_ceph_context, tmp, pending_inc);
1696 tmp.apply_incremental(pending_inc);
1697 ASSERT_FALSE(tmp.have_pg_upmaps(rep_pgid));
1698 ASSERT_FALSE(tmp.have_pg_upmaps(rep_pgid2));
1699 }
1700 }
1701}
1702
31f18b77
FG
1703TEST(PGTempMap, basic)
1704{
1705 PGTempMap m;
1706 pg_t a(1,1);
1707 for (auto i=3; i<1000; ++i) {
1708 pg_t x(i, 1);
1709 m.set(x, {static_cast<int>(i)});
1710 }
1711 pg_t b(2,1);
1712 m.set(a, {1, 2});
1713 ASSERT_NE(m.find(a), m.end());
1714 ASSERT_EQ(m.find(a), m.begin());
1715 ASSERT_EQ(m.find(b), m.end());
1716 ASSERT_EQ(998u, m.size());
1717}
35e4c445 1718
9f95a23c
TL
1719TEST_F(OSDMapTest, BUG_43124) {
1720 set_up_map(200);
1721 {
1722 // https://tracker.ceph.com/issues/43124
1723
1724 // build a temporary crush topology of 5racks,
1725 // 4 hosts per rack, 10osds per host
1726 OSDMap tmp; // use a tmpmap here, so we do not dirty origin map..
1727 tmp.deepish_copy_from(osdmap);
1728 const int expected_host_num = 20;
1729 int osd_per_host = (int)get_num_osds() / expected_host_num;
1730 ASSERT_GE(osd_per_host, 10);
1731 int host_per_rack = 4;
1732 int index = 0;
1733 int rack_index = 0;
1734 for (int i = 0; i < (int)get_num_osds(); i++) {
1735 if (i && i % osd_per_host == 0) {
1736 ++index;
1737 }
1738 if (i && i % (host_per_rack * osd_per_host) == 0) {
1739 ++rack_index;
1740 }
1741 stringstream osd_name;
1742 stringstream host_name;
1743 stringstream rack_name;
1744 vector<string> move_to;
1745 osd_name << "osd." << i;
1746 host_name << "host-" << index;
1747 rack_name << "rack-" << rack_index;
1748 move_to.push_back("root=default");
1749 string rack_loc = "rack=" + rack_name.str();
1750 move_to.push_back(rack_loc);
1751 string host_loc = "host=" + host_name.str();
1752 move_to.push_back(host_loc);
1753 auto r = crush_move(tmp, osd_name.str(), move_to);
1754 ASSERT_EQ(0, r);
1755 }
1756
1757 // build crush rule
1758 CrushWrapper crush;
1759 get_crush(tmp, crush);
1760 string rule_name = "rule_angel_1944";
1761 int rule_type = pg_pool_t::TYPE_ERASURE;
1762 ASSERT_TRUE(!crush.rule_exists(rule_name));
1763 int rno;
1764 for (rno = 0; rno < crush.get_max_rules(); rno++) {
1765 if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
1766 break;
1767 }
1768 int min_size = 1;
1769 int max_size = 20;
1770 int steps = 6;
1771 string root_name = "default";
1772 int root = crush.get_item_id(root_name);
1773 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
1774 int step = 0;
1775 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
1776 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
1777 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
1778 crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_FIRSTN, 4, 3 /* rack */);
1779 crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_INDEP, 3, 1 /* host */);
1780 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1781 ASSERT_TRUE(step == steps);
1782 auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
1783 ASSERT_TRUE(r >= 0);
1784 crush.set_rule_name(rno, rule_name);
1785 {
1786 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
1787 pending_inc.crush.clear();
1788 crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
1789 tmp.apply_incremental(pending_inc);
1790 }
1791 {
1792 stringstream oss;
1793 crush.dump_tree(&oss, NULL);
1794 std::cout << oss.str() << std::endl;
1795 Formatter *f = Formatter::create("json-pretty");
1796 f->open_object_section("crush_rules");
1797 crush.dump_rules(f);
1798 f->close_section();
1799 f->flush(cout);
1800 delete f;
1801 }
1802 // create a erasuce-coded pool referencing the above rule
1803 int64_t pool_angel_1944;
1804 {
1805 OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1);
1806 new_pool_inc.new_pool_max = tmp.get_pool_max();
1807 new_pool_inc.fsid = tmp.get_fsid();
1808 pg_pool_t empty;
1809 pool_angel_1944 = ++new_pool_inc.new_pool_max;
1810 pg_pool_t *p = new_pool_inc.get_new_pool(pool_angel_1944, &empty);
1811 p->size = 12;
1812 p->set_pg_num(4096);
1813 p->set_pgp_num(4096);
1814 p->type = pg_pool_t::TYPE_ERASURE;
1815 p->crush_rule = rno;
1816 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
1817 new_pool_inc.new_pool_names[pool_angel_1944] = "pool_angel_1944";
1818 tmp.apply_incremental(new_pool_inc);
1819 }
1820
1821 pg_t rep_pg(0, pool_angel_1944);
1822 pg_t rep_pgid = tmp.raw_pg_to_pg(rep_pg);
1823 {
1824 // insert a pg_upmap_item
1825 int from = -1;
1826 int to = -1;
1827 vector<int> rep_up;
1828 int rep_up_primary;
1829 tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary);
1830 std::cout << "pgid " << rep_pgid << " up " << rep_up << std::endl;
1831 ASSERT_TRUE(rep_up.size() == 12);
1832 from = *(rep_up.begin());
1833 ASSERT_TRUE(from >= 0);
1834 auto from_rack = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno);
1835 set<int> failure_domains;
1836 for (auto &osd : rep_up) {
1837 failure_domains.insert(tmp.crush->get_parent_of_type(osd, 1 /* host */, rno));
1838 }
1839 for (int i = 0; i < (int)get_num_osds(); i++) {
1840 if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) {
1841 auto to_rack = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno);
1842 auto to_host = tmp.crush->get_parent_of_type(i, 1 /* host */, rno);
1843 if (to_rack != from_rack && failure_domains.count(to_host) == 0) {
1844 to = i;
1845 break;
1846 }
1847 }
1848 }
1849 ASSERT_TRUE(to >= 0);
1850 ASSERT_TRUE(from != to);
1851 std::cout << "from " << from << " to " << to << std::endl;
1852 vector<pair<int32_t,int32_t>> new_pg_upmap_items;
1853 new_pg_upmap_items.push_back(make_pair(from, to));
1854 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
1855 pending_inc.new_pg_upmap_items[rep_pgid] =
1856 mempool::osdmap::vector<pair<int32_t,int32_t>>(
1857 new_pg_upmap_items.begin(), new_pg_upmap_items.end());
1858 tmp.apply_incremental(pending_inc);
1859 ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid));
1860 }
1861 {
1862 // *maybe_remove_pg_upmaps* should not remove the above upmap_item
1863 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
1864 clean_pg_upmaps(g_ceph_context, tmp, pending_inc);
1865 tmp.apply_incremental(pending_inc);
1866 ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid));
1867 }
1868 }
1869}
f67539c2
TL
1870
1871TEST_F(OSDMapTest, BUG_48884)
1872{
1873
1874 set_up_map(12);
1875
1876 unsigned int host_index = 1;
1877 for (unsigned int x=0; x < get_num_osds();) {
1878 // Create three hosts with four osds each
1879 for (unsigned int y=0; y < 4; y++) {
1880 stringstream osd_name;
1881 stringstream host_name;
1882 vector<string> move_to;
1883 osd_name << "osd." << x;
1884 host_name << "host-" << host_index;
1885 move_to.push_back("root=default");
1886 move_to.push_back("rack=localrack");
1887 string host_loc = "host=" + host_name.str();
1888 move_to.push_back(host_loc);
1889 int r = crush_move(osdmap, osd_name.str(), move_to);
1890 ASSERT_EQ(0, r);
1891 x++;
1892 }
1893 host_index++;
1894 }
1895
1896 CrushWrapper crush;
1897 get_crush(osdmap, crush);
1898 auto host_id = crush.get_item_id("localhost");
1899 crush.remove_item(g_ceph_context, host_id, false);
1900 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
1901 pending_inc.crush.clear();
1902 crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
1903 osdmap.apply_incremental(pending_inc);
1904
1905 PGMap pgmap;
1906 osd_stat_t stats, stats_null;
1907 stats.statfs.total = 500000;
1908 stats.statfs.available = 50000;
1909 stats.statfs.omap_allocated = 50000;
1910 stats.statfs.internal_metadata = 50000;
1911 stats_null.statfs.total = 0;
1912 stats_null.statfs.available = 0;
1913 stats_null.statfs.omap_allocated = 0;
1914 stats_null.statfs.internal_metadata = 0;
1915 for (unsigned int x=0; x < get_num_osds(); x++) {
1916 if (x > 3 && x < 8) {
1917 pgmap.osd_stat.insert({x,stats_null});
1918 } else {
1919 pgmap.osd_stat.insert({x,stats});
1920 }
1921 }
1922
1923 stringstream ss;
1924 boost::scoped_ptr<Formatter> f(Formatter::create("json-pretty"));
1925 print_osd_utilization(osdmap, pgmap, ss, f.get(), true, "root");
1926 JSONParser parser;
1927 parser.parse(ss.str().c_str(), static_cast<int>(ss.str().size()));
1928 auto iter = parser.find_first();
522d829b 1929 for (const auto& bucket : (*iter)->get_array_elements()) {
f67539c2
TL
1930 JSONParser parser2;
1931 parser2.parse(bucket.c_str(), static_cast<int>(bucket.size()));
1932 auto* obj = parser2.find_obj("name");
1933 if (obj->get_data_val().str.compare("localrack") == 0) {
1934 obj = parser2.find_obj("kb");
1935 ASSERT_EQ(obj->get_data_val().str, "3904");
1936 obj = parser2.find_obj("kb_used");
1937 ASSERT_EQ(obj->get_data_val().str, "3512");
1938 obj = parser2.find_obj("kb_used_omap");
1939 ASSERT_EQ(obj->get_data_val().str, "384");
1940 obj = parser2.find_obj("kb_used_meta");
1941 ASSERT_EQ(obj->get_data_val().str, "384");
1942 obj = parser2.find_obj("kb_avail");
1943 ASSERT_EQ(obj->get_data_val().str, "384");
1944 }
1945 }
1946}
a4b75251
TL
1947
1948TEST_P(OSDMapTest, BUG_51842) {
1949 set_up_map(3, true);
1950 OSDMap tmp; // use a tmpmap here, so we do not dirty origin map..
1951 tmp.deepish_copy_from(osdmap);
1952 for (int i = 0; i < (int)get_num_osds(); i++) {
1953 stringstream osd_name;
1954 stringstream host_name;
1955 vector<string> move_to;
1956 osd_name << "osd." << i;
1957 host_name << "host=host-" << i;
1958 move_to.push_back("root=infra-1706");
1959 move_to.push_back(host_name.str());
1960 auto r = crush_move(tmp, osd_name.str(), move_to);
1961 ASSERT_EQ(0, r);
1962 }
1963
1964 // build crush rule
1965 CrushWrapper crush;
1966 get_crush(tmp, crush);
1967 string rule_name = "infra-1706";
1968 int rule_type = pg_pool_t::TYPE_REPLICATED;
1969 ASSERT_TRUE(!crush.rule_exists(rule_name));
1970 int rno;
1971 for (rno = 0; rno < crush.get_max_rules(); rno++) {
1972 if (!crush.rule_exists(rno))
1973 break;
1974 }
1975 string root_bucket = "infra-1706";
1976 int root = crush.get_item_id(root_bucket);
1977 int min_size = 1;
1978 int max_size = 20;
1979 int steps = 5;
1980 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
1981 int step = 0;
1982 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
1983 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
1984 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
1985 // note: it's ok to set like 'step chooseleaf_firstn 0 host'
1986 std::pair<int, int> param = GetParam();
1987 int rep_num = std::get<0>(param);
1988 int domain = std::get<1>(param);
1989 crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, rep_num, domain);
1990 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1991 ASSERT_TRUE(step == steps);
1992 auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
1993 ASSERT_TRUE(r >= 0);
1994 crush.set_rule_name(rno, rule_name);
1995 {
1996 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
1997 pending_inc.crush.clear();
1998 crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
1999 tmp.apply_incremental(pending_inc);
2000 }
2001 {
2002 stringstream oss;
2003 crush.dump_tree(&oss, NULL);
2004 std::cout << oss.str() << std::endl;
2005 Formatter *f = Formatter::create("json-pretty");
2006 f->open_object_section("crush_rules");
2007 crush.dump_rules(f);
2008 f->close_section();
2009 f->flush(cout);
2010 delete f;
2011 }
2012 // create a replicated pool referencing the above rule
2013 int64_t pool_infra_1706;
2014 {
2015 OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1);
2016 new_pool_inc.new_pool_max = tmp.get_pool_max();
2017 new_pool_inc.fsid = tmp.get_fsid();
2018 pg_pool_t empty;
2019 pool_infra_1706 = ++new_pool_inc.new_pool_max;
2020 pg_pool_t *p = new_pool_inc.get_new_pool(pool_infra_1706, &empty);
2021 p->size = 3;
2022 p->min_size = 1;
2023 p->set_pg_num(256);
2024 p->set_pgp_num(256);
2025 p->type = pg_pool_t::TYPE_REPLICATED;
2026 p->crush_rule = rno;
2027 p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
2028 new_pool_inc.new_pool_names[pool_infra_1706] = "pool_infra_1706";
2029 tmp.apply_incremental(new_pool_inc);
2030 }
2031
2032 // add upmaps
2033 pg_t rep_pg(3, pool_infra_1706);
2034 pg_t rep_pgid = tmp.raw_pg_to_pg(rep_pg);
2035 pg_t rep_pg2(4, pool_infra_1706);
2036 pg_t rep_pgid2 = tmp.raw_pg_to_pg(rep_pg2);
2037 pg_t rep_pg3(6, pool_infra_1706);
2038 pg_t rep_pgid3 = tmp.raw_pg_to_pg(rep_pg3);
2039 {
2040 OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
2041 pending_inc.new_pg_upmap[rep_pgid] = mempool::osdmap::vector<int32_t>({1,0,2});
2042 pending_inc.new_pg_upmap[rep_pgid2] = mempool::osdmap::vector<int32_t>({1,2,0});
2043 pending_inc.new_pg_upmap[rep_pgid3] = mempool::osdmap::vector<int32_t>({1,2,0});
2044 tmp.apply_incremental(pending_inc);
2045 ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid));
2046 ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid2));
2047 ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid3));
2048 }
2049
2050 {
2051 // now, set pool size to 1
2052 OSDMap tmpmap;
2053 tmpmap.deepish_copy_from(tmp);
2054 OSDMap::Incremental new_pool_inc(tmpmap.get_epoch() + 1);
2055 pg_pool_t p = *tmpmap.get_pg_pool(pool_infra_1706);
2056 p.size = 1;
2057 p.last_change = new_pool_inc.epoch;
2058 new_pool_inc.new_pools[pool_infra_1706] = p;
2059 tmpmap.apply_incremental(new_pool_inc);
2060
2061 OSDMap::Incremental new_pending_inc(tmpmap.get_epoch() + 1);
2062 clean_pg_upmaps(g_ceph_context, tmpmap, new_pending_inc);
2063 tmpmap.apply_incremental(new_pending_inc);
2064 // check pg upmaps
2065 ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid));
2066 ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid2));
2067 ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid3));
2068 }
2069 {
2070 // now, set pool size to 4
2071 OSDMap tmpmap;
2072 tmpmap.deepish_copy_from(tmp);
2073 OSDMap::Incremental new_pool_inc(tmpmap.get_epoch() + 1);
2074 pg_pool_t p = *tmpmap.get_pg_pool(pool_infra_1706);
2075 p.size = 4;
2076 p.last_change = new_pool_inc.epoch;
2077 new_pool_inc.new_pools[pool_infra_1706] = p;
2078 tmpmap.apply_incremental(new_pool_inc);
2079
2080 OSDMap::Incremental new_pending_inc(tmpmap.get_epoch() + 1);
2081 clean_pg_upmaps(g_ceph_context, tmpmap, new_pending_inc);
2082 tmpmap.apply_incremental(new_pending_inc);
2083 // check pg upmaps
2084 ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid));
2085 ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid2));
2086 ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid3));
2087 }
2088}
2089
2090INSTANTIATE_TEST_CASE_P(
2091 OSDMap,
2092 OSDMapTest,
2093 ::testing::Values(
2094 std::make_pair<int, int>(0, 1), // chooseleaf firstn 0 host
2095 std::make_pair<int, int>(3, 1), // chooseleaf firstn 3 host
2096 std::make_pair<int, int>(0, 0), // chooseleaf firstn 0 osd
2097 std::make_pair<int, int>(3, 0) // chooseleaf firstn 3 osd
2098 )
2099);