]> git.proxmox.com Git - ceph.git/blob - ceph/src/test/osd/TestOSDMap.cc
0d8eec7b561d854c0fa7eddd5176437075ee8793
[ceph.git] / ceph / src / test / osd / TestOSDMap.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 #include "gtest/gtest.h"
3 #include "osd/OSDMap.h"
4 #include "osd/OSDMapMapping.h"
5
6 #include "global/global_context.h"
7 #include "global/global_init.h"
8 #include "common/common_init.h"
9 #include "common/ceph_argparse.h"
10
11 #include <iostream>
12
13 using namespace std;
14
15 int main(int argc, char **argv) {
16 std::vector<const char*> args(argv, argv+argc);
17 env_to_vec(args);
18 auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT,
19 CODE_ENVIRONMENT_UTILITY,
20 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
21 common_init_finish(g_ceph_context);
22 // make sure we have 3 copies, or some tests won't work
23 g_ceph_context->_conf->set_val("osd_pool_default_size", "3", false);
24 // our map is flat, so just try and split across OSDs, not hosts or whatever
25 g_ceph_context->_conf->set_val("osd_crush_chooseleaf_type", "0", false);
26 ::testing::InitGoogleTest(&argc, argv);
27 return RUN_ALL_TESTS();
28 }
29
30 class OSDMapTest : public testing::Test {
31 const static int num_osds = 6;
32 public:
33 OSDMap osdmap;
34 OSDMapMapping mapping;
35
36 OSDMapTest() {}
37
38 void set_up_map() {
39 uuid_d fsid;
40 osdmap.build_simple(g_ceph_context, 0, fsid, num_osds, 6, 6);
41 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
42 pending_inc.fsid = osdmap.get_fsid();
43 entity_addr_t sample_addr;
44 uuid_d sample_uuid;
45 for (int i = 0; i < num_osds; ++i) {
46 sample_uuid.generate_random();
47 sample_addr.nonce = i;
48 pending_inc.new_state[i] = CEPH_OSD_EXISTS | CEPH_OSD_NEW;
49 pending_inc.new_up_client[i] = sample_addr;
50 pending_inc.new_up_cluster[i] = sample_addr;
51 pending_inc.new_hb_back_up[i] = sample_addr;
52 pending_inc.new_hb_front_up[i] = sample_addr;
53 pending_inc.new_weight[i] = CEPH_OSD_IN;
54 pending_inc.new_uuid[i] = sample_uuid;
55 }
56 osdmap.apply_incremental(pending_inc);
57
58 // Create an EC ruleset and a pool using it
59 int r = osdmap.crush->add_simple_rule(
60 "erasure", "default", "osd",
61 "indep", pg_pool_t::TYPE_ERASURE,
62 &cerr);
63
64 OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
65 new_pool_inc.new_pool_max = osdmap.get_pool_max();
66 new_pool_inc.fsid = osdmap.get_fsid();
67 pg_pool_t empty;
68 uint64_t pool_id = ++new_pool_inc.new_pool_max;
69 pg_pool_t *p = new_pool_inc.get_new_pool(pool_id, &empty);
70 p->size = 3;
71 p->set_pg_num(64);
72 p->set_pgp_num(64);
73 p->type = pg_pool_t::TYPE_ERASURE;
74 p->crush_rule = r;
75 new_pool_inc.new_pool_names[pool_id] = "ec";
76 osdmap.apply_incremental(new_pool_inc);
77 }
78 unsigned int get_num_osds() { return num_osds; }
79
80 void test_mappings(int pool,
81 int num,
82 vector<int> *any,
83 vector<int> *first,
84 vector<int> *primary) {
85 mapping.update(osdmap);
86 for (int i=0; i<num; ++i) {
87 vector<int> up, acting;
88 int up_primary, acting_primary;
89 pg_t pgid(i, pool);
90 osdmap.pg_to_up_acting_osds(pgid,
91 &up, &up_primary, &acting, &acting_primary);
92 for (unsigned j=0; j<acting.size(); ++j)
93 (*any)[acting[j]]++;
94 if (!acting.empty())
95 (*first)[acting[0]]++;
96 if (acting_primary >= 0)
97 (*primary)[acting_primary]++;
98
99 // compare to precalc mapping
100 vector<int> up2, acting2;
101 int up_primary2, acting_primary2;
102 pgid = osdmap.raw_pg_to_pg(pgid);
103 mapping.get(pgid, &up2, &up_primary2, &acting2, &acting_primary2);
104 ASSERT_EQ(up, up2);
105 ASSERT_EQ(up_primary, up_primary2);
106 ASSERT_EQ(acting, acting2);
107 ASSERT_EQ(acting_primary, acting_primary2);
108 }
109 }
110 };
111
112 TEST_F(OSDMapTest, Create) {
113 set_up_map();
114 ASSERT_EQ(get_num_osds(), (unsigned)osdmap.get_max_osd());
115 ASSERT_EQ(get_num_osds(), osdmap.get_num_in_osds());
116 }
117
118 TEST_F(OSDMapTest, Features) {
119 // with EC pool
120 set_up_map();
121 uint64_t features = osdmap.get_features(CEPH_ENTITY_TYPE_OSD, NULL);
122 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
123 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
124 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3);
125 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2);
126 ASSERT_TRUE(features & CEPH_FEATURE_OSD_ERASURE_CODES);
127 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
128 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
129
130 // clients have a slightly different view
131 features = osdmap.get_features(CEPH_ENTITY_TYPE_CLIENT, NULL);
132 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
133 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
134 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3);
135 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2);
136 ASSERT_FALSE(features & CEPH_FEATURE_OSD_ERASURE_CODES); // dont' need this
137 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
138 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
139
140 // remove teh EC pool, but leave the rule. add primary affinity.
141 {
142 OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
143 new_pool_inc.old_pools.insert(osdmap.lookup_pg_pool_name("ec"));
144 new_pool_inc.new_primary_affinity[0] = 0x8000;
145 osdmap.apply_incremental(new_pool_inc);
146 }
147
148 features = osdmap.get_features(CEPH_ENTITY_TYPE_MON, NULL);
149 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
150 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
151 ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); // shared bit with primary affinity
152 ASSERT_FALSE(features & CEPH_FEATURE_CRUSH_V2);
153 ASSERT_FALSE(features & CEPH_FEATURE_OSD_ERASURE_CODES);
154 ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
155 ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
156
157 // FIXME: test tiering feature bits
158 }
159
160 TEST_F(OSDMapTest, MapPG) {
161 set_up_map();
162
163 pg_t rawpg(0, 0, -1);
164 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
165 vector<int> up_osds, acting_osds;
166 int up_primary, acting_primary;
167
168 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
169 &acting_osds, &acting_primary);
170
171 vector<int> old_up_osds, old_acting_osds;
172 osdmap.pg_to_up_acting_osds(pgid, old_up_osds, old_acting_osds);
173 ASSERT_EQ(old_up_osds, up_osds);
174 ASSERT_EQ(old_acting_osds, acting_osds);
175
176 ASSERT_EQ(osdmap.get_pg_pool(0)->get_size(), up_osds.size());
177 }
178
179 TEST_F(OSDMapTest, MapFunctionsMatch) {
180 // TODO: make sure pg_to_up_acting_osds and pg_to_acting_osds match
181 set_up_map();
182
183 pg_t rawpg(0, 0, -1);
184 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
185 vector<int> up_osds, acting_osds;
186 int up_primary, acting_primary;
187
188 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
189 &acting_osds, &acting_primary);
190
191 vector<int> up_osds_two, acting_osds_two;
192
193 osdmap.pg_to_up_acting_osds(pgid, up_osds_two, acting_osds_two);
194
195 ASSERT_EQ(up_osds, up_osds_two);
196 ASSERT_EQ(acting_osds, acting_osds_two);
197
198 int acting_primary_two;
199 osdmap.pg_to_acting_osds(pgid, &acting_osds_two, &acting_primary_two);
200 EXPECT_EQ(acting_osds, acting_osds_two);
201 EXPECT_EQ(acting_primary, acting_primary_two);
202 osdmap.pg_to_acting_osds(pgid, acting_osds_two);
203 EXPECT_EQ(acting_osds, acting_osds_two);
204 }
205
206 /** This test must be removed or modified appropriately when we allow
207 * other ways to specify a primary. */
208 TEST_F(OSDMapTest, PrimaryIsFirst) {
209 set_up_map();
210
211 pg_t rawpg(0, 0, -1);
212 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
213 vector<int> up_osds, acting_osds;
214 int up_primary, acting_primary;
215
216 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
217 &acting_osds, &acting_primary);
218 EXPECT_EQ(up_osds[0], up_primary);
219 EXPECT_EQ(acting_osds[0], acting_primary);
220 }
221
222 TEST_F(OSDMapTest, PGTempRespected) {
223 set_up_map();
224
225 pg_t rawpg(0, 0, -1);
226 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
227 vector<int> up_osds, acting_osds;
228 int up_primary, acting_primary;
229
230 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
231 &acting_osds, &acting_primary);
232
233 // copy and swap first and last element in acting_osds
234 vector<int> new_acting_osds(acting_osds);
235 int first = new_acting_osds[0];
236 new_acting_osds[0] = *new_acting_osds.rbegin();
237 *new_acting_osds.rbegin() = first;
238
239 // apply pg_temp to osdmap
240 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
241 pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>(
242 new_acting_osds.begin(), new_acting_osds.end());
243 osdmap.apply_incremental(pgtemp_map);
244
245 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
246 &acting_osds, &acting_primary);
247 EXPECT_EQ(new_acting_osds, acting_osds);
248 }
249
250 TEST_F(OSDMapTest, PrimaryTempRespected) {
251 set_up_map();
252
253 pg_t rawpg(0, 0, -1);
254 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
255 vector<int> up_osds;
256 vector<int> acting_osds;
257 int up_primary, acting_primary;
258
259 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
260 &acting_osds, &acting_primary);
261
262 // make second OSD primary via incremental
263 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
264 pgtemp_map.new_primary_temp[pgid] = acting_osds[1];
265 osdmap.apply_incremental(pgtemp_map);
266
267 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
268 &acting_osds, &acting_primary);
269 EXPECT_EQ(acting_primary, acting_osds[1]);
270 }
271
272 TEST_F(OSDMapTest, CleanTemps) {
273 set_up_map();
274
275 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
276 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 2);
277 pg_t pga = osdmap.raw_pg_to_pg(pg_t(0, 0));
278 {
279 vector<int> up_osds, acting_osds;
280 int up_primary, acting_primary;
281 osdmap.pg_to_up_acting_osds(pga, &up_osds, &up_primary,
282 &acting_osds, &acting_primary);
283 pgtemp_map.new_pg_temp[pga] = mempool::osdmap::vector<int>(
284 up_osds.begin(), up_osds.end());
285 pgtemp_map.new_primary_temp[pga] = up_primary;
286 }
287 pg_t pgb = osdmap.raw_pg_to_pg(pg_t(1, 0));
288 {
289 vector<int> up_osds, acting_osds;
290 int up_primary, acting_primary;
291 osdmap.pg_to_up_acting_osds(pgb, &up_osds, &up_primary,
292 &acting_osds, &acting_primary);
293 pending_inc.new_pg_temp[pgb] = mempool::osdmap::vector<int>(
294 up_osds.begin(), up_osds.end());
295 pending_inc.new_primary_temp[pgb] = up_primary;
296 }
297
298 osdmap.apply_incremental(pgtemp_map);
299
300 OSDMap::clean_temps(g_ceph_context, osdmap, &pending_inc);
301
302 EXPECT_TRUE(pending_inc.new_pg_temp.count(pga) &&
303 pending_inc.new_pg_temp[pga].size() == 0);
304 EXPECT_EQ(-1, pending_inc.new_primary_temp[pga]);
305
306 EXPECT_TRUE(!pending_inc.new_pg_temp.count(pgb) &&
307 !pending_inc.new_primary_temp.count(pgb));
308 }
309
310 TEST_F(OSDMapTest, KeepsNecessaryTemps) {
311 set_up_map();
312
313 pg_t rawpg(0, 0, -1);
314 pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
315 vector<int> up_osds, acting_osds;
316 int up_primary, acting_primary;
317
318 osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
319 &acting_osds, &acting_primary);
320
321 // find unused OSD and stick it in there
322 OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
323 // find an unused osd and put it in place of the first one
324 int i = 0;
325 for(; i != (int)get_num_osds(); ++i) {
326 bool in_use = false;
327 for (vector<int>::iterator osd_it = up_osds.begin();
328 osd_it != up_osds.end();
329 ++osd_it) {
330 if (i == *osd_it) {
331 in_use = true;
332 break;
333 }
334 }
335 if (!in_use) {
336 up_osds[1] = i;
337 break;
338 }
339 }
340 if (i == (int)get_num_osds())
341 FAIL() << "did not find unused OSD for temp mapping";
342
343 pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>(
344 up_osds.begin(), up_osds.end());
345 pgtemp_map.new_primary_temp[pgid] = up_osds[1];
346 osdmap.apply_incremental(pgtemp_map);
347
348 OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
349
350 OSDMap::clean_temps(g_ceph_context, osdmap, &pending_inc);
351 EXPECT_FALSE(pending_inc.new_pg_temp.count(pgid));
352 EXPECT_FALSE(pending_inc.new_primary_temp.count(pgid));
353 }
354
355 TEST_F(OSDMapTest, PrimaryAffinity) {
356 set_up_map();
357
358 int n = get_num_osds();
359 for (map<int64_t,pg_pool_t>::const_iterator p = osdmap.get_pools().begin();
360 p != osdmap.get_pools().end();
361 ++p) {
362 int pool = p->first;
363 cout << "pool " << pool << std::endl;
364 {
365 vector<int> any(n, 0);
366 vector<int> first(n, 0);
367 vector<int> primary(n, 0);
368 test_mappings(0, 10000, &any, &first, &primary);
369 for (int i=0; i<n; ++i) {
370 ASSERT_LT(0, any[i]);
371 ASSERT_LT(0, first[i]);
372 ASSERT_LT(0, primary[i]);
373 }
374 }
375
376 osdmap.set_primary_affinity(0, 0);
377 osdmap.set_primary_affinity(1, 0);
378 {
379 vector<int> any(n, 0);
380 vector<int> first(n, 0);
381 vector<int> primary(n, 0);
382 test_mappings(pool, 10000, &any, &first, &primary);
383 for (int i=0; i<n; ++i) {
384 ASSERT_LT(0, any[i]);
385 if (i >= 2) {
386 ASSERT_LT(0, first[i]);
387 ASSERT_LT(0, primary[i]);
388 } else {
389 if (p->second.is_replicated()) {
390 ASSERT_EQ(0, first[i]);
391 }
392 ASSERT_EQ(0, primary[i]);
393 }
394 }
395 }
396
397 osdmap.set_primary_affinity(0, 0x8000);
398 osdmap.set_primary_affinity(1, 0);
399 {
400 vector<int> any(n, 0);
401 vector<int> first(n, 0);
402 vector<int> primary(n, 0);
403 test_mappings(pool, 10000, &any, &first, &primary);
404 for (int i=0; i<n; ++i) {
405 ASSERT_LT(0, any[i]);
406 if (i >= 2) {
407 ASSERT_LT(0, first[i]);
408 ASSERT_LT(0, primary[i]);
409 } else if (i == 1) {
410 if (p->second.is_replicated()) {
411 ASSERT_EQ(0, first[i]);
412 }
413 ASSERT_EQ(0, primary[i]);
414 } else {
415 ASSERT_LT(10000/6/4, primary[0]);
416 ASSERT_GT(10000/6/4*3, primary[0]);
417 }
418 }
419 }
420
421 osdmap.set_primary_affinity(0, 0x10000);
422 osdmap.set_primary_affinity(1, 0x10000);
423 }
424 }
425
426 TEST(PGTempMap, basic)
427 {
428 PGTempMap m;
429 pg_t a(1,1);
430 for (auto i=3; i<1000; ++i) {
431 pg_t x(i, 1);
432 m.set(x, {static_cast<int>(i)});
433 }
434 pg_t b(2,1);
435 m.set(a, {1, 2});
436 ASSERT_NE(m.find(a), m.end());
437 ASSERT_EQ(m.find(a), m.begin());
438 ASSERT_EQ(m.find(b), m.end());
439 ASSERT_EQ(998u, m.size());
440 }