]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | #include "gtest/gtest.h" | |
3 | #include "osd/OSDMap.h" | |
4 | #include "osd/OSDMapMapping.h" | |
494da23a | 5 | #include "mon/OSDMonitor.h" |
f67539c2 | 6 | #include "mon/PGMap.h" |
7c673cae FG |
7 | |
8 | #include "global/global_context.h" | |
9 | #include "global/global_init.h" | |
10 | #include "common/common_init.h" | |
31f18b77 | 11 | #include "common/ceph_argparse.h" |
f67539c2 | 12 | #include "common/ceph_json.h" |
7c673cae FG |
13 | |
14 | #include <iostream> | |
1e59de90 | 15 | #include <cmath> |
7c673cae FG |
16 | |
17 | using namespace std; | |
18 | ||
19 | int main(int argc, char **argv) { | |
11fdf7f2 TL |
20 | map<string,string> defaults = { |
21 | // make sure we have 3 copies, or some tests won't work | |
22 | { "osd_pool_default_size", "3" }, | |
23 | // our map is flat, so just try and split across OSDs, not hosts or whatever | |
24 | { "osd_crush_chooseleaf_type", "0" }, | |
25 | }; | |
7c673cae | 26 | std::vector<const char*> args(argv, argv+argc); |
11fdf7f2 | 27 | auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT, |
7c673cae FG |
28 | CODE_ENVIRONMENT_UTILITY, |
29 | CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); | |
30 | common_init_finish(g_ceph_context); | |
7c673cae FG |
31 | ::testing::InitGoogleTest(&argc, argv); |
32 | return RUN_ALL_TESTS(); | |
33 | } | |
34 | ||
a4b75251 TL |
35 | class OSDMapTest : public testing::Test, |
36 | public ::testing::WithParamInterface<std::pair<int, int>> { | |
a8e16298 | 37 | int num_osds = 6; |
7c673cae FG |
38 | public: |
39 | OSDMap osdmap; | |
40 | OSDMapMapping mapping; | |
224ce89b WB |
41 | const uint64_t my_ec_pool = 1; |
42 | const uint64_t my_rep_pool = 2; | |
43 | ||
33c7a0ef TL |
44 | // Blacklist testing lists |
45 | // I pulled the first two ranges and their start/end points from | |
46 | // https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing#CIDR_notation | |
47 | static const string range_addrs[]; | |
48 | static const string ip_addrs[]; | |
49 | static const string unblocked_ip_addrs[]; | |
1e59de90 | 50 | const string EC_RULE_NAME = "erasure"; |
7c673cae FG |
51 | |
52 | OSDMapTest() {} | |
53 | ||
a8e16298 TL |
54 | void set_up_map(int new_num_osds = 6, bool no_default_pools = false) { |
55 | num_osds = new_num_osds; | |
7c673cae | 56 | uuid_d fsid; |
224ce89b | 57 | osdmap.build_simple(g_ceph_context, 0, fsid, num_osds); |
7c673cae FG |
58 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); |
59 | pending_inc.fsid = osdmap.get_fsid(); | |
11fdf7f2 TL |
60 | entity_addrvec_t sample_addrs; |
61 | sample_addrs.v.push_back(entity_addr_t()); | |
7c673cae FG |
62 | uuid_d sample_uuid; |
63 | for (int i = 0; i < num_osds; ++i) { | |
64 | sample_uuid.generate_random(); | |
11fdf7f2 | 65 | sample_addrs.v[0].nonce = i; |
7c673cae | 66 | pending_inc.new_state[i] = CEPH_OSD_EXISTS | CEPH_OSD_NEW; |
11fdf7f2 TL |
67 | pending_inc.new_up_client[i] = sample_addrs; |
68 | pending_inc.new_up_cluster[i] = sample_addrs; | |
69 | pending_inc.new_hb_back_up[i] = sample_addrs; | |
70 | pending_inc.new_hb_front_up[i] = sample_addrs; | |
7c673cae FG |
71 | pending_inc.new_weight[i] = CEPH_OSD_IN; |
72 | pending_inc.new_uuid[i] = sample_uuid; | |
73 | } | |
74 | osdmap.apply_incremental(pending_inc); | |
a8e16298 TL |
75 | if (no_default_pools) // do not create any default pool(s) |
76 | return; | |
7c673cae | 77 | |
7c673cae FG |
78 | OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1); |
79 | new_pool_inc.new_pool_max = osdmap.get_pool_max(); | |
80 | new_pool_inc.fsid = osdmap.get_fsid(); | |
224ce89b | 81 | // make an ec pool |
1e59de90 TL |
82 | set_ec_pool("ec", new_pool_inc); |
83 | // and a replicated pool | |
84 | set_rep_pool("reppool",new_pool_inc); | |
85 | osdmap.apply_incremental(new_pool_inc); | |
86 | } | |
87 | int get_ec_crush_rule() { | |
88 | int r = osdmap.crush->get_rule_id(EC_RULE_NAME); | |
89 | if (r < 0) { | |
90 | r = osdmap.crush->add_simple_rule( | |
91 | EC_RULE_NAME, "default", "osd", "", | |
92 | "indep", pg_pool_t::TYPE_ERASURE, | |
93 | &cerr); | |
94 | } | |
95 | return r; | |
96 | } | |
97 | uint64_t set_ec_pool(const string &name, OSDMap::Incremental &new_pool_inc, | |
98 | bool assert_pool_id = true) { | |
99 | pg_pool_t empty; | |
7c673cae | 100 | uint64_t pool_id = ++new_pool_inc.new_pool_max; |
1e59de90 TL |
101 | if (assert_pool_id) |
102 | ceph_assert(pool_id == my_ec_pool); | |
7c673cae FG |
103 | pg_pool_t *p = new_pool_inc.get_new_pool(pool_id, &empty); |
104 | p->size = 3; | |
105 | p->set_pg_num(64); | |
106 | p->set_pgp_num(64); | |
107 | p->type = pg_pool_t::TYPE_ERASURE; | |
1e59de90 TL |
108 | p->crush_rule = get_ec_crush_rule(); |
109 | new_pool_inc.new_pool_names[pool_id] = name;//"ec"; | |
110 | return pool_id; | |
111 | } | |
112 | uint64_t set_rep_pool(const string name, OSDMap::Incremental &new_pool_inc, | |
113 | bool assert_pool_id = true) { | |
114 | pg_pool_t empty; | |
115 | uint64_t pool_id = ++new_pool_inc.new_pool_max; | |
116 | if (assert_pool_id) | |
117 | ceph_assert(pool_id == my_rep_pool); | |
118 | pg_pool_t *p = new_pool_inc.get_new_pool(pool_id, &empty); | |
224ce89b WB |
119 | p->size = 3; |
120 | p->set_pg_num(64); | |
121 | p->set_pgp_num(64); | |
122 | p->type = pg_pool_t::TYPE_REPLICATED; | |
123 | p->crush_rule = 0; | |
124 | p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); | |
1e59de90 TL |
125 | new_pool_inc.new_pool_names[pool_id] = name;//"reppool"; |
126 | return pool_id; | |
7c673cae | 127 | } |
1e59de90 | 128 | |
7c673cae | 129 | unsigned int get_num_osds() { return num_osds; } |
a8e16298 | 130 | void get_crush(const OSDMap& tmap, CrushWrapper& newcrush) { |
94b18763 | 131 | bufferlist bl; |
a8e16298 | 132 | tmap.crush->encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT); |
11fdf7f2 | 133 | auto p = bl.cbegin(); |
94b18763 FG |
134 | newcrush.decode(p); |
135 | } | |
a8e16298 | 136 | int crush_move(OSDMap& tmap, const string &name, const vector<string> &argvec) { |
94b18763 FG |
137 | map<string,string> loc; |
138 | CrushWrapper::parse_loc_map(argvec, &loc); | |
139 | CrushWrapper newcrush; | |
a8e16298 | 140 | get_crush(tmap, newcrush); |
94b18763 FG |
141 | if (!newcrush.name_exists(name)) { |
142 | return -ENOENT; | |
143 | } | |
144 | int id = newcrush.get_item_id(name); | |
145 | int err; | |
146 | if (!newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) { | |
147 | if (id >= 0) { | |
148 | err = newcrush.create_or_move_item(g_ceph_context, id, 0, name, loc); | |
149 | } else { | |
150 | err = newcrush.move_bucket(g_ceph_context, id, loc); | |
151 | } | |
152 | if (err >= 0) { | |
a8e16298 | 153 | OSDMap::Incremental pending_inc(tmap.get_epoch() + 1); |
94b18763 FG |
154 | pending_inc.crush.clear(); |
155 | newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); | |
a8e16298 | 156 | tmap.apply_incremental(pending_inc); |
94b18763 FG |
157 | err = 0; |
158 | } | |
159 | } else { | |
160 | // already there | |
161 | err = 0; | |
162 | } | |
163 | return err; | |
164 | } | |
165 | int crush_rule_create_replicated(const string &name, | |
166 | const string &root, | |
167 | const string &type) { | |
168 | if (osdmap.crush->rule_exists(name)) { | |
169 | return osdmap.crush->get_rule_id(name); | |
170 | } | |
171 | CrushWrapper newcrush; | |
a8e16298 | 172 | get_crush(osdmap, newcrush); |
94b18763 FG |
173 | string device_class; |
174 | stringstream ss; | |
175 | int ruleno = newcrush.add_simple_rule( | |
176 | name, root, type, device_class, | |
177 | "firstn", pg_pool_t::TYPE_REPLICATED, &ss); | |
178 | if (ruleno >= 0) { | |
179 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
180 | pending_inc.crush.clear(); | |
181 | newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); | |
182 | osdmap.apply_incremental(pending_inc); | |
183 | } | |
184 | return ruleno; | |
185 | } | |
7c673cae FG |
186 | void test_mappings(int pool, |
187 | int num, | |
188 | vector<int> *any, | |
189 | vector<int> *first, | |
190 | vector<int> *primary) { | |
191 | mapping.update(osdmap); | |
192 | for (int i=0; i<num; ++i) { | |
193 | vector<int> up, acting; | |
194 | int up_primary, acting_primary; | |
195 | pg_t pgid(i, pool); | |
196 | osdmap.pg_to_up_acting_osds(pgid, | |
197 | &up, &up_primary, &acting, &acting_primary); | |
198 | for (unsigned j=0; j<acting.size(); ++j) | |
199 | (*any)[acting[j]]++; | |
200 | if (!acting.empty()) | |
201 | (*first)[acting[0]]++; | |
202 | if (acting_primary >= 0) | |
203 | (*primary)[acting_primary]++; | |
204 | ||
205 | // compare to precalc mapping | |
206 | vector<int> up2, acting2; | |
207 | int up_primary2, acting_primary2; | |
208 | pgid = osdmap.raw_pg_to_pg(pgid); | |
209 | mapping.get(pgid, &up2, &up_primary2, &acting2, &acting_primary2); | |
210 | ASSERT_EQ(up, up2); | |
211 | ASSERT_EQ(up_primary, up_primary2); | |
212 | ASSERT_EQ(acting, acting2); | |
213 | ASSERT_EQ(acting_primary, acting_primary2); | |
214 | } | |
224ce89b WB |
215 | cout << "any: " << *any << std::endl;; |
216 | cout << "first: " << *first << std::endl;; | |
217 | cout << "primary: " << *primary << std::endl;; | |
7c673cae | 218 | } |
494da23a TL |
219 | void clean_pg_upmaps(CephContext *cct, |
220 | const OSDMap& om, | |
221 | OSDMap::Incremental& pending_inc) { | |
222 | int cpu_num = 8; | |
223 | int pgs_per_chunk = 256; | |
224 | ThreadPool tp(cct, "BUG_40104::clean_upmap_tp", "clean_upmap_tp", cpu_num); | |
225 | tp.start(); | |
226 | ParallelPGMapper mapper(cct, &tp); | |
227 | vector<pg_t> pgs_to_check; | |
228 | om.get_upmap_pgs(&pgs_to_check); | |
229 | OSDMonitor::CleanUpmapJob job(cct, om, pending_inc); | |
230 | mapper.queue(&job, pgs_per_chunk, pgs_to_check); | |
231 | job.wait(); | |
232 | tp.stop(); | |
233 | } | |
1e59de90 TL |
234 | void set_primary_affinity_all(float pa) { |
235 | for (uint i = 0 ; i < get_num_osds() ; i++) { | |
236 | osdmap.set_primary_affinity(i, int(pa * CEPH_OSD_MAX_PRIMARY_AFFINITY)); | |
237 | } | |
238 | } | |
239 | bool score_in_range(float score, uint nosds = 0) { | |
240 | if (nosds == 0) { | |
241 | nosds = get_num_osds(); | |
242 | } | |
243 | return score >= 1.0 && score <= float(nosds); | |
244 | } | |
7c673cae FG |
245 | }; |
246 | ||
247 | TEST_F(OSDMapTest, Create) { | |
248 | set_up_map(); | |
249 | ASSERT_EQ(get_num_osds(), (unsigned)osdmap.get_max_osd()); | |
250 | ASSERT_EQ(get_num_osds(), osdmap.get_num_in_osds()); | |
251 | } | |
252 | ||
253 | TEST_F(OSDMapTest, Features) { | |
254 | // with EC pool | |
255 | set_up_map(); | |
256 | uint64_t features = osdmap.get_features(CEPH_ENTITY_TYPE_OSD, NULL); | |
257 | ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES); | |
258 | ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2); | |
259 | ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); | |
260 | ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2); | |
7c673cae FG |
261 | ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL); |
262 | ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY); | |
263 | ||
264 | // clients have a slightly different view | |
265 | features = osdmap.get_features(CEPH_ENTITY_TYPE_CLIENT, NULL); | |
266 | ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES); | |
267 | ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2); | |
268 | ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); | |
269 | ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2); | |
7c673cae FG |
270 | ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL); |
271 | ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY); | |
272 | ||
273 | // remove teh EC pool, but leave the rule. add primary affinity. | |
274 | { | |
275 | OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1); | |
276 | new_pool_inc.old_pools.insert(osdmap.lookup_pg_pool_name("ec")); | |
277 | new_pool_inc.new_primary_affinity[0] = 0x8000; | |
278 | osdmap.apply_incremental(new_pool_inc); | |
279 | } | |
280 | ||
281 | features = osdmap.get_features(CEPH_ENTITY_TYPE_MON, NULL); | |
282 | ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES); | |
283 | ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2); | |
284 | ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); // shared bit with primary affinity | |
285 | ASSERT_FALSE(features & CEPH_FEATURE_CRUSH_V2); | |
7c673cae FG |
286 | ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL); |
287 | ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY); | |
288 | ||
289 | // FIXME: test tiering feature bits | |
290 | } | |
291 | ||
292 | TEST_F(OSDMapTest, MapPG) { | |
293 | set_up_map(); | |
294 | ||
224ce89b | 295 | std::cerr << " osdmap.pool_max==" << osdmap.get_pool_max() << std::endl; |
11fdf7f2 | 296 | pg_t rawpg(0, my_rep_pool); |
7c673cae FG |
297 | pg_t pgid = osdmap.raw_pg_to_pg(rawpg); |
298 | vector<int> up_osds, acting_osds; | |
299 | int up_primary, acting_primary; | |
300 | ||
301 | osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, | |
302 | &acting_osds, &acting_primary); | |
303 | ||
304 | vector<int> old_up_osds, old_acting_osds; | |
305 | osdmap.pg_to_up_acting_osds(pgid, old_up_osds, old_acting_osds); | |
306 | ASSERT_EQ(old_up_osds, up_osds); | |
307 | ASSERT_EQ(old_acting_osds, acting_osds); | |
308 | ||
224ce89b | 309 | ASSERT_EQ(osdmap.get_pg_pool(my_rep_pool)->get_size(), up_osds.size()); |
7c673cae FG |
310 | } |
311 | ||
312 | TEST_F(OSDMapTest, MapFunctionsMatch) { | |
313 | // TODO: make sure pg_to_up_acting_osds and pg_to_acting_osds match | |
314 | set_up_map(); | |
11fdf7f2 | 315 | pg_t rawpg(0, my_rep_pool); |
7c673cae FG |
316 | pg_t pgid = osdmap.raw_pg_to_pg(rawpg); |
317 | vector<int> up_osds, acting_osds; | |
318 | int up_primary, acting_primary; | |
319 | ||
320 | osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, | |
321 | &acting_osds, &acting_primary); | |
322 | ||
323 | vector<int> up_osds_two, acting_osds_two; | |
324 | ||
325 | osdmap.pg_to_up_acting_osds(pgid, up_osds_two, acting_osds_two); | |
326 | ||
327 | ASSERT_EQ(up_osds, up_osds_two); | |
328 | ASSERT_EQ(acting_osds, acting_osds_two); | |
329 | ||
330 | int acting_primary_two; | |
331 | osdmap.pg_to_acting_osds(pgid, &acting_osds_two, &acting_primary_two); | |
332 | EXPECT_EQ(acting_osds, acting_osds_two); | |
333 | EXPECT_EQ(acting_primary, acting_primary_two); | |
334 | osdmap.pg_to_acting_osds(pgid, acting_osds_two); | |
335 | EXPECT_EQ(acting_osds, acting_osds_two); | |
336 | } | |
337 | ||
338 | /** This test must be removed or modified appropriately when we allow | |
339 | * other ways to specify a primary. */ | |
340 | TEST_F(OSDMapTest, PrimaryIsFirst) { | |
341 | set_up_map(); | |
342 | ||
11fdf7f2 | 343 | pg_t rawpg(0, my_rep_pool); |
7c673cae FG |
344 | pg_t pgid = osdmap.raw_pg_to_pg(rawpg); |
345 | vector<int> up_osds, acting_osds; | |
346 | int up_primary, acting_primary; | |
347 | ||
348 | osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, | |
349 | &acting_osds, &acting_primary); | |
350 | EXPECT_EQ(up_osds[0], up_primary); | |
351 | EXPECT_EQ(acting_osds[0], acting_primary); | |
352 | } | |
353 | ||
354 | TEST_F(OSDMapTest, PGTempRespected) { | |
355 | set_up_map(); | |
356 | ||
11fdf7f2 | 357 | pg_t rawpg(0, my_rep_pool); |
7c673cae FG |
358 | pg_t pgid = osdmap.raw_pg_to_pg(rawpg); |
359 | vector<int> up_osds, acting_osds; | |
360 | int up_primary, acting_primary; | |
361 | ||
362 | osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, | |
363 | &acting_osds, &acting_primary); | |
364 | ||
365 | // copy and swap first and last element in acting_osds | |
366 | vector<int> new_acting_osds(acting_osds); | |
367 | int first = new_acting_osds[0]; | |
368 | new_acting_osds[0] = *new_acting_osds.rbegin(); | |
369 | *new_acting_osds.rbegin() = first; | |
370 | ||
371 | // apply pg_temp to osdmap | |
372 | OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1); | |
373 | pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>( | |
374 | new_acting_osds.begin(), new_acting_osds.end()); | |
375 | osdmap.apply_incremental(pgtemp_map); | |
376 | ||
377 | osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, | |
378 | &acting_osds, &acting_primary); | |
379 | EXPECT_EQ(new_acting_osds, acting_osds); | |
380 | } | |
381 | ||
382 | TEST_F(OSDMapTest, PrimaryTempRespected) { | |
383 | set_up_map(); | |
384 | ||
11fdf7f2 | 385 | pg_t rawpg(0, my_rep_pool); |
7c673cae FG |
386 | pg_t pgid = osdmap.raw_pg_to_pg(rawpg); |
387 | vector<int> up_osds; | |
388 | vector<int> acting_osds; | |
389 | int up_primary, acting_primary; | |
390 | ||
391 | osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, | |
392 | &acting_osds, &acting_primary); | |
393 | ||
394 | // make second OSD primary via incremental | |
395 | OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1); | |
396 | pgtemp_map.new_primary_temp[pgid] = acting_osds[1]; | |
397 | osdmap.apply_incremental(pgtemp_map); | |
398 | ||
399 | osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, | |
400 | &acting_osds, &acting_primary); | |
401 | EXPECT_EQ(acting_primary, acting_osds[1]); | |
402 | } | |
403 | ||
404 | TEST_F(OSDMapTest, CleanTemps) { | |
405 | set_up_map(); | |
406 | ||
407 | OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1); | |
408 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 2); | |
224ce89b | 409 | pg_t pga = osdmap.raw_pg_to_pg(pg_t(0, my_rep_pool)); |
7c673cae FG |
410 | { |
411 | vector<int> up_osds, acting_osds; | |
412 | int up_primary, acting_primary; | |
413 | osdmap.pg_to_up_acting_osds(pga, &up_osds, &up_primary, | |
414 | &acting_osds, &acting_primary); | |
415 | pgtemp_map.new_pg_temp[pga] = mempool::osdmap::vector<int>( | |
416 | up_osds.begin(), up_osds.end()); | |
417 | pgtemp_map.new_primary_temp[pga] = up_primary; | |
418 | } | |
224ce89b | 419 | pg_t pgb = osdmap.raw_pg_to_pg(pg_t(1, my_rep_pool)); |
7c673cae FG |
420 | { |
421 | vector<int> up_osds, acting_osds; | |
422 | int up_primary, acting_primary; | |
423 | osdmap.pg_to_up_acting_osds(pgb, &up_osds, &up_primary, | |
424 | &acting_osds, &acting_primary); | |
425 | pending_inc.new_pg_temp[pgb] = mempool::osdmap::vector<int>( | |
426 | up_osds.begin(), up_osds.end()); | |
427 | pending_inc.new_primary_temp[pgb] = up_primary; | |
428 | } | |
429 | ||
430 | osdmap.apply_incremental(pgtemp_map); | |
431 | ||
11fdf7f2 TL |
432 | OSDMap tmpmap; |
433 | tmpmap.deepish_copy_from(osdmap); | |
434 | tmpmap.apply_incremental(pending_inc); | |
435 | OSDMap::clean_temps(g_ceph_context, osdmap, tmpmap, &pending_inc); | |
7c673cae FG |
436 | |
437 | EXPECT_TRUE(pending_inc.new_pg_temp.count(pga) && | |
438 | pending_inc.new_pg_temp[pga].size() == 0); | |
439 | EXPECT_EQ(-1, pending_inc.new_primary_temp[pga]); | |
440 | ||
441 | EXPECT_TRUE(!pending_inc.new_pg_temp.count(pgb) && | |
442 | !pending_inc.new_primary_temp.count(pgb)); | |
443 | } | |
444 | ||
445 | TEST_F(OSDMapTest, KeepsNecessaryTemps) { | |
446 | set_up_map(); | |
447 | ||
11fdf7f2 | 448 | pg_t rawpg(0, my_rep_pool); |
7c673cae FG |
449 | pg_t pgid = osdmap.raw_pg_to_pg(rawpg); |
450 | vector<int> up_osds, acting_osds; | |
451 | int up_primary, acting_primary; | |
452 | ||
453 | osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, | |
454 | &acting_osds, &acting_primary); | |
455 | ||
456 | // find unused OSD and stick it in there | |
457 | OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1); | |
458 | // find an unused osd and put it in place of the first one | |
459 | int i = 0; | |
460 | for(; i != (int)get_num_osds(); ++i) { | |
461 | bool in_use = false; | |
462 | for (vector<int>::iterator osd_it = up_osds.begin(); | |
463 | osd_it != up_osds.end(); | |
464 | ++osd_it) { | |
465 | if (i == *osd_it) { | |
466 | in_use = true; | |
467 | break; | |
468 | } | |
469 | } | |
470 | if (!in_use) { | |
471 | up_osds[1] = i; | |
472 | break; | |
473 | } | |
474 | } | |
475 | if (i == (int)get_num_osds()) | |
476 | FAIL() << "did not find unused OSD for temp mapping"; | |
477 | ||
478 | pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>( | |
479 | up_osds.begin(), up_osds.end()); | |
480 | pgtemp_map.new_primary_temp[pgid] = up_osds[1]; | |
481 | osdmap.apply_incremental(pgtemp_map); | |
482 | ||
483 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
484 | ||
11fdf7f2 TL |
485 | OSDMap tmpmap; |
486 | tmpmap.deepish_copy_from(osdmap); | |
487 | tmpmap.apply_incremental(pending_inc); | |
488 | OSDMap::clean_temps(g_ceph_context, osdmap, tmpmap, &pending_inc); | |
7c673cae FG |
489 | EXPECT_FALSE(pending_inc.new_pg_temp.count(pgid)); |
490 | EXPECT_FALSE(pending_inc.new_primary_temp.count(pgid)); | |
491 | } | |
492 | ||
493 | TEST_F(OSDMapTest, PrimaryAffinity) { | |
494 | set_up_map(); | |
495 | ||
7c673cae FG |
496 | int n = get_num_osds(); |
497 | for (map<int64_t,pg_pool_t>::const_iterator p = osdmap.get_pools().begin(); | |
498 | p != osdmap.get_pools().end(); | |
499 | ++p) { | |
500 | int pool = p->first; | |
224ce89b WB |
501 | int expect_primary = 10000 / n; |
502 | cout << "pool " << pool << " size " << (int)p->second.size | |
503 | << " expect_primary " << expect_primary << std::endl; | |
7c673cae FG |
504 | { |
505 | vector<int> any(n, 0); | |
506 | vector<int> first(n, 0); | |
507 | vector<int> primary(n, 0); | |
224ce89b | 508 | test_mappings(pool, 10000, &any, &first, &primary); |
7c673cae | 509 | for (int i=0; i<n; ++i) { |
7c673cae FG |
510 | ASSERT_LT(0, any[i]); |
511 | ASSERT_LT(0, first[i]); | |
512 | ASSERT_LT(0, primary[i]); | |
513 | } | |
514 | } | |
515 | ||
516 | osdmap.set_primary_affinity(0, 0); | |
517 | osdmap.set_primary_affinity(1, 0); | |
518 | { | |
519 | vector<int> any(n, 0); | |
520 | vector<int> first(n, 0); | |
521 | vector<int> primary(n, 0); | |
522 | test_mappings(pool, 10000, &any, &first, &primary); | |
523 | for (int i=0; i<n; ++i) { | |
7c673cae FG |
524 | ASSERT_LT(0, any[i]); |
525 | if (i >= 2) { | |
526 | ASSERT_LT(0, first[i]); | |
527 | ASSERT_LT(0, primary[i]); | |
528 | } else { | |
529 | if (p->second.is_replicated()) { | |
530 | ASSERT_EQ(0, first[i]); | |
531 | } | |
532 | ASSERT_EQ(0, primary[i]); | |
533 | } | |
534 | } | |
535 | } | |
536 | ||
537 | osdmap.set_primary_affinity(0, 0x8000); | |
538 | osdmap.set_primary_affinity(1, 0); | |
539 | { | |
540 | vector<int> any(n, 0); | |
541 | vector<int> first(n, 0); | |
542 | vector<int> primary(n, 0); | |
543 | test_mappings(pool, 10000, &any, &first, &primary); | |
224ce89b WB |
544 | int expect = (10000 / (n-2)) / 2; // half weight |
545 | cout << "expect " << expect << std::endl; | |
7c673cae | 546 | for (int i=0; i<n; ++i) { |
7c673cae FG |
547 | ASSERT_LT(0, any[i]); |
548 | if (i >= 2) { | |
549 | ASSERT_LT(0, first[i]); | |
550 | ASSERT_LT(0, primary[i]); | |
551 | } else if (i == 1) { | |
552 | if (p->second.is_replicated()) { | |
553 | ASSERT_EQ(0, first[i]); | |
554 | } | |
555 | ASSERT_EQ(0, primary[i]); | |
556 | } else { | |
224ce89b WB |
557 | ASSERT_LT(expect *2/3, primary[0]); |
558 | ASSERT_GT(expect *4/3, primary[0]); | |
7c673cae FG |
559 | } |
560 | } | |
561 | } | |
562 | ||
563 | osdmap.set_primary_affinity(0, 0x10000); | |
564 | osdmap.set_primary_affinity(1, 0x10000); | |
565 | } | |
566 | } | |
31f18b77 | 567 | |
81eedcae TL |
568 | TEST_F(OSDMapTest, get_osd_crush_node_flags) { |
569 | set_up_map(); | |
570 | ||
571 | for (unsigned i=0; i<get_num_osds(); ++i) { | |
572 | ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(i)); | |
573 | } | |
574 | ||
575 | OSDMap::Incremental inc(osdmap.get_epoch() + 1); | |
576 | inc.new_crush_node_flags[-1] = 123u; | |
577 | osdmap.apply_incremental(inc); | |
578 | for (unsigned i=0; i<get_num_osds(); ++i) { | |
579 | ASSERT_EQ(123u, osdmap.get_osd_crush_node_flags(i)); | |
580 | } | |
581 | ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(1000)); | |
582 | ||
583 | OSDMap::Incremental inc3(osdmap.get_epoch() + 1); | |
584 | inc3.new_crush_node_flags[-1] = 456u; | |
585 | osdmap.apply_incremental(inc3); | |
586 | for (unsigned i=0; i<get_num_osds(); ++i) { | |
587 | ASSERT_EQ(456u, osdmap.get_osd_crush_node_flags(i)); | |
588 | } | |
589 | ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(1000)); | |
590 | ||
591 | OSDMap::Incremental inc2(osdmap.get_epoch() + 1); | |
592 | inc2.new_crush_node_flags[-1] = 0; | |
593 | osdmap.apply_incremental(inc2); | |
594 | for (unsigned i=0; i<get_num_osds(); ++i) { | |
595 | ASSERT_EQ(0u, osdmap.get_crush_node_flags(i)); | |
596 | } | |
597 | } | |
598 | ||
35e4c445 FG |
599 | TEST_F(OSDMapTest, parse_osd_id_list) { |
600 | set_up_map(); | |
601 | set<int> out; | |
602 | set<int> all; | |
603 | osdmap.get_all_osds(all); | |
604 | ||
605 | ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0"}, &out, &cout)); | |
11fdf7f2 | 606 | ASSERT_EQ(1u, out.size()); |
35e4c445 FG |
607 | ASSERT_EQ(0, *out.begin()); |
608 | ||
609 | ASSERT_EQ(0, osdmap.parse_osd_id_list({"1"}, &out, &cout)); | |
11fdf7f2 | 610 | ASSERT_EQ(1u, out.size()); |
35e4c445 FG |
611 | ASSERT_EQ(1, *out.begin()); |
612 | ||
613 | ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","osd.1"}, &out, &cout)); | |
11fdf7f2 | 614 | ASSERT_EQ(2u, out.size()); |
35e4c445 FG |
615 | ASSERT_EQ(0, *out.begin()); |
616 | ASSERT_EQ(1, *out.rbegin()); | |
617 | ||
618 | ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","1"}, &out, &cout)); | |
11fdf7f2 | 619 | ASSERT_EQ(2u, out.size()); |
35e4c445 FG |
620 | ASSERT_EQ(0, *out.begin()); |
621 | ASSERT_EQ(1, *out.rbegin()); | |
622 | ||
623 | ASSERT_EQ(0, osdmap.parse_osd_id_list({"*"}, &out, &cout)); | |
624 | ASSERT_EQ(all.size(), out.size()); | |
625 | ASSERT_EQ(all, out); | |
626 | ||
627 | ASSERT_EQ(0, osdmap.parse_osd_id_list({"all"}, &out, &cout)); | |
628 | ASSERT_EQ(all, out); | |
629 | ||
630 | ASSERT_EQ(0, osdmap.parse_osd_id_list({"any"}, &out, &cout)); | |
631 | ASSERT_EQ(all, out); | |
632 | ||
633 | ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"foo"}, &out, &cout)); | |
634 | ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"-12"}, &out, &cout)); | |
635 | } | |
636 | ||
94b18763 FG |
637 | TEST_F(OSDMapTest, CleanPGUpmaps) { |
638 | set_up_map(); | |
639 | ||
640 | // build a crush rule of type host | |
641 | const int expected_host_num = 3; | |
642 | int osd_per_host = get_num_osds() / expected_host_num; | |
643 | ASSERT_GE(2, osd_per_host); | |
644 | int index = 0; | |
645 | for (int i = 0; i < (int)get_num_osds(); i++) { | |
646 | if (i && i % osd_per_host == 0) { | |
647 | ++index; | |
648 | } | |
649 | stringstream osd_name; | |
650 | stringstream host_name; | |
651 | vector<string> move_to; | |
652 | osd_name << "osd." << i; | |
653 | host_name << "host-" << index; | |
654 | move_to.push_back("root=default"); | |
655 | string host_loc = "host=" + host_name.str(); | |
656 | move_to.push_back(host_loc); | |
a8e16298 | 657 | int r = crush_move(osdmap, osd_name.str(), move_to); |
94b18763 FG |
658 | ASSERT_EQ(0, r); |
659 | } | |
660 | const string upmap_rule = "upmap"; | |
661 | int upmap_rule_no = crush_rule_create_replicated( | |
662 | upmap_rule, "default", "host"); | |
663 | ASSERT_LT(0, upmap_rule_no); | |
664 | ||
665 | // create a replicated pool which references the above rule | |
666 | OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1); | |
667 | new_pool_inc.new_pool_max = osdmap.get_pool_max(); | |
668 | new_pool_inc.fsid = osdmap.get_fsid(); | |
669 | pg_pool_t empty; | |
670 | uint64_t upmap_pool_id = ++new_pool_inc.new_pool_max; | |
671 | pg_pool_t *p = new_pool_inc.get_new_pool(upmap_pool_id, &empty); | |
672 | p->size = 2; | |
673 | p->set_pg_num(64); | |
674 | p->set_pgp_num(64); | |
675 | p->type = pg_pool_t::TYPE_REPLICATED; | |
676 | p->crush_rule = upmap_rule_no; | |
677 | p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); | |
678 | new_pool_inc.new_pool_names[upmap_pool_id] = "upmap_pool"; | |
679 | osdmap.apply_incremental(new_pool_inc); | |
680 | ||
681 | pg_t rawpg(0, upmap_pool_id); | |
682 | pg_t pgid = osdmap.raw_pg_to_pg(rawpg); | |
683 | vector<int> up; | |
684 | int up_primary; | |
685 | osdmap.pg_to_raw_up(pgid, &up, &up_primary); | |
686 | ASSERT_LT(1U, up.size()); | |
687 | { | |
688 | // validate we won't have two OSDs from a same host | |
689 | int parent_0 = osdmap.crush->get_parent_of_type(up[0], | |
690 | osdmap.crush->get_type_id("host")); | |
691 | int parent_1 = osdmap.crush->get_parent_of_type(up[1], | |
692 | osdmap.crush->get_type_id("host")); | |
693 | ASSERT_TRUE(parent_0 != parent_1); | |
694 | } | |
695 | ||
f64942e4 AA |
696 | { |
697 | // cancel stale upmaps | |
698 | osdmap.pg_to_raw_up(pgid, &up, &up_primary); | |
699 | int from = -1; | |
700 | for (int i = 0; i < (int)get_num_osds(); i++) { | |
701 | if (std::find(up.begin(), up.end(), i) == up.end()) { | |
702 | from = i; | |
703 | break; | |
704 | } | |
705 | } | |
706 | ASSERT_TRUE(from >= 0); | |
707 | int to = -1; | |
708 | for (int i = 0; i < (int)get_num_osds(); i++) { | |
709 | if (std::find(up.begin(), up.end(), i) == up.end() && i != from) { | |
710 | to = i; | |
711 | break; | |
712 | } | |
713 | } | |
714 | ASSERT_TRUE(to >= 0); | |
715 | vector<pair<int32_t,int32_t>> new_pg_upmap_items; | |
716 | new_pg_upmap_items.push_back(make_pair(from, to)); | |
717 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
718 | pending_inc.new_pg_upmap_items[pgid] = | |
719 | mempool::osdmap::vector<pair<int32_t,int32_t>>( | |
720 | new_pg_upmap_items.begin(), new_pg_upmap_items.end()); | |
721 | OSDMap nextmap; | |
722 | nextmap.deepish_copy_from(osdmap); | |
723 | nextmap.apply_incremental(pending_inc); | |
724 | ASSERT_TRUE(nextmap.have_pg_upmaps(pgid)); | |
725 | OSDMap::Incremental new_pending_inc(nextmap.get_epoch() + 1); | |
494da23a | 726 | clean_pg_upmaps(g_ceph_context, nextmap, new_pending_inc); |
f64942e4 AA |
727 | nextmap.apply_incremental(new_pending_inc); |
728 | ASSERT_TRUE(!nextmap.have_pg_upmaps(pgid)); | |
729 | } | |
730 | ||
731 | { | |
732 | // https://tracker.ceph.com/issues/37493 | |
733 | pg_t ec_pg(0, my_ec_pool); | |
734 | pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg); | |
735 | OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map.. | |
736 | int from = -1; | |
737 | int to = -1; | |
738 | { | |
739 | // insert a valid pg_upmap_item | |
740 | vector<int> ec_up; | |
741 | int ec_up_primary; | |
742 | osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary); | |
743 | ASSERT_TRUE(!ec_up.empty()); | |
744 | from = *(ec_up.begin()); | |
745 | ASSERT_TRUE(from >= 0); | |
746 | for (int i = 0; i < (int)get_num_osds(); i++) { | |
747 | if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) { | |
748 | to = i; | |
749 | break; | |
750 | } | |
751 | } | |
752 | ASSERT_TRUE(to >= 0); | |
753 | ASSERT_TRUE(from != to); | |
754 | vector<pair<int32_t,int32_t>> new_pg_upmap_items; | |
755 | new_pg_upmap_items.push_back(make_pair(from, to)); | |
756 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
757 | pending_inc.new_pg_upmap_items[ec_pgid] = | |
758 | mempool::osdmap::vector<pair<int32_t,int32_t>>( | |
759 | new_pg_upmap_items.begin(), new_pg_upmap_items.end()); | |
760 | tmpmap.deepish_copy_from(osdmap); | |
761 | tmpmap.apply_incremental(pending_inc); | |
762 | ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); | |
763 | } | |
764 | { | |
765 | // mark one of the target OSDs of the above pg_upmap_item as down | |
766 | OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1); | |
767 | pending_inc.new_state[to] = CEPH_OSD_UP; | |
768 | tmpmap.apply_incremental(pending_inc); | |
769 | ASSERT_TRUE(!tmpmap.is_up(to)); | |
770 | ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); | |
771 | } | |
772 | { | |
494da23a | 773 | // confirm *clean_pg_upmaps* won't do anything bad |
f64942e4 | 774 | OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1); |
494da23a | 775 | clean_pg_upmaps(g_ceph_context, tmpmap, pending_inc); |
f64942e4 AA |
776 | tmpmap.apply_incremental(pending_inc); |
777 | ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); | |
778 | } | |
779 | } | |
780 | ||
781 | { | |
782 | // http://tracker.ceph.com/issues/37501 | |
783 | pg_t ec_pg(0, my_ec_pool); | |
784 | pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg); | |
785 | OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map.. | |
786 | int from = -1; | |
787 | int to = -1; | |
788 | { | |
789 | // insert a valid pg_upmap_item | |
790 | vector<int> ec_up; | |
791 | int ec_up_primary; | |
792 | osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary); | |
793 | ASSERT_TRUE(!ec_up.empty()); | |
794 | from = *(ec_up.begin()); | |
795 | ASSERT_TRUE(from >= 0); | |
796 | for (int i = 0; i < (int)get_num_osds(); i++) { | |
797 | if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) { | |
798 | to = i; | |
799 | break; | |
800 | } | |
801 | } | |
802 | ASSERT_TRUE(to >= 0); | |
803 | ASSERT_TRUE(from != to); | |
804 | vector<pair<int32_t,int32_t>> new_pg_upmap_items; | |
805 | new_pg_upmap_items.push_back(make_pair(from, to)); | |
806 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
807 | pending_inc.new_pg_upmap_items[ec_pgid] = | |
808 | mempool::osdmap::vector<pair<int32_t,int32_t>>( | |
809 | new_pg_upmap_items.begin(), new_pg_upmap_items.end()); | |
810 | tmpmap.deepish_copy_from(osdmap); | |
811 | tmpmap.apply_incremental(pending_inc); | |
812 | ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); | |
813 | } | |
814 | { | |
815 | // mark one of the target OSDs of the above pg_upmap_item as out | |
816 | OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1); | |
817 | pending_inc.new_weight[to] = CEPH_OSD_OUT; | |
818 | tmpmap.apply_incremental(pending_inc); | |
819 | ASSERT_TRUE(tmpmap.is_out(to)); | |
820 | ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); | |
821 | } | |
822 | { | |
494da23a | 823 | // *clean_pg_upmaps* should be able to remove the above *bad* mapping |
f64942e4 | 824 | OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1); |
494da23a | 825 | clean_pg_upmaps(g_ceph_context, tmpmap, pending_inc); |
f64942e4 AA |
826 | tmpmap.apply_incremental(pending_inc); |
827 | ASSERT_TRUE(!tmpmap.have_pg_upmaps(ec_pgid)); | |
828 | } | |
829 | } | |
830 | ||
a8e16298 TL |
831 | { |
832 | // http://tracker.ceph.com/issues/37968 | |
833 | ||
834 | // build a temporary crush topology of 2 hosts, 3 osds per host | |
835 | OSDMap tmp; // use a tmpmap here, so we do not dirty origin map.. | |
836 | tmp.deepish_copy_from(osdmap); | |
837 | const int expected_host_num = 2; | |
838 | int osd_per_host = get_num_osds() / expected_host_num; | |
839 | ASSERT_GE(osd_per_host, 3); | |
840 | int index = 0; | |
841 | for (int i = 0; i < (int)get_num_osds(); i++) { | |
842 | if (i && i % osd_per_host == 0) { | |
843 | ++index; | |
844 | } | |
845 | stringstream osd_name; | |
846 | stringstream host_name; | |
847 | vector<string> move_to; | |
848 | osd_name << "osd." << i; | |
849 | host_name << "host-" << index; | |
850 | move_to.push_back("root=default"); | |
851 | string host_loc = "host=" + host_name.str(); | |
852 | move_to.push_back(host_loc); | |
853 | auto r = crush_move(tmp, osd_name.str(), move_to); | |
854 | ASSERT_EQ(0, r); | |
855 | } | |
856 | ||
857 | // build crush rule | |
858 | CrushWrapper crush; | |
859 | get_crush(tmp, crush); | |
860 | string rule_name = "rule_37968"; | |
861 | int rule_type = pg_pool_t::TYPE_ERASURE; | |
862 | ASSERT_TRUE(!crush.rule_exists(rule_name)); | |
863 | int rno; | |
864 | for (rno = 0; rno < crush.get_max_rules(); rno++) { | |
20effc67 | 865 | if (!crush.rule_exists(rno)) |
a8e16298 TL |
866 | break; |
867 | } | |
868 | string root_name = "default"; | |
869 | int root = crush.get_item_id(root_name); | |
a8e16298 | 870 | int steps = 6; |
20effc67 | 871 | crush_rule *rule = crush_make_rule(steps, rule_type); |
a8e16298 TL |
872 | int step = 0; |
873 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); | |
874 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); | |
875 | crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); | |
876 | crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_INDEP, 2, 1 /* host*/); | |
877 | crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_INDEP, 2, 0 /* osd */); | |
878 | crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); | |
879 | ASSERT_TRUE(step == steps); | |
880 | auto r = crush_add_rule(crush.get_crush_map(), rule, rno); | |
881 | ASSERT_TRUE(r >= 0); | |
882 | crush.set_rule_name(rno, rule_name); | |
883 | { | |
884 | OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); | |
885 | pending_inc.crush.clear(); | |
886 | crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); | |
887 | tmp.apply_incremental(pending_inc); | |
888 | } | |
889 | ||
890 | // create a erasuce-coded pool referencing the above rule | |
891 | int64_t pool_37968; | |
892 | { | |
893 | OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1); | |
894 | new_pool_inc.new_pool_max = tmp.get_pool_max(); | |
895 | new_pool_inc.fsid = tmp.get_fsid(); | |
896 | pg_pool_t empty; | |
897 | pool_37968 = ++new_pool_inc.new_pool_max; | |
898 | pg_pool_t *p = new_pool_inc.get_new_pool(pool_37968, &empty); | |
899 | p->size = 4; | |
900 | p->set_pg_num(8); | |
901 | p->set_pgp_num(8); | |
902 | p->type = pg_pool_t::TYPE_ERASURE; | |
903 | p->crush_rule = rno; | |
904 | p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); | |
905 | new_pool_inc.new_pool_names[pool_37968] = "pool_37968"; | |
906 | tmp.apply_incremental(new_pool_inc); | |
907 | } | |
908 | ||
909 | pg_t ec_pg(0, pool_37968); | |
910 | pg_t ec_pgid = tmp.raw_pg_to_pg(ec_pg); | |
911 | int from = -1; | |
912 | int to = -1; | |
913 | { | |
914 | // insert a valid pg_upmap_item | |
915 | vector<int> ec_up; | |
916 | int ec_up_primary; | |
917 | tmp.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary); | |
918 | ASSERT_TRUE(ec_up.size() == 4); | |
919 | from = *(ec_up.begin()); | |
920 | ASSERT_TRUE(from >= 0); | |
921 | auto parent = tmp.crush->get_parent_of_type(from, 1 /* host */, rno); | |
922 | ASSERT_TRUE(parent < 0); | |
923 | // pick an osd of the same parent with *from* | |
924 | for (int i = 0; i < (int)get_num_osds(); i++) { | |
925 | if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) { | |
926 | auto p = tmp.crush->get_parent_of_type(i, 1 /* host */, rno); | |
927 | if (p == parent) { | |
928 | to = i; | |
929 | break; | |
930 | } | |
931 | } | |
932 | } | |
933 | ASSERT_TRUE(to >= 0); | |
934 | ASSERT_TRUE(from != to); | |
935 | vector<pair<int32_t,int32_t>> new_pg_upmap_items; | |
936 | new_pg_upmap_items.push_back(make_pair(from, to)); | |
937 | OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); | |
938 | pending_inc.new_pg_upmap_items[ec_pgid] = | |
939 | mempool::osdmap::vector<pair<int32_t,int32_t>>( | |
940 | new_pg_upmap_items.begin(), new_pg_upmap_items.end()); | |
941 | tmp.apply_incremental(pending_inc); | |
942 | ASSERT_TRUE(tmp.have_pg_upmaps(ec_pgid)); | |
943 | } | |
944 | { | |
494da23a | 945 | // *clean_pg_upmaps* should not remove the above upmap_item |
a8e16298 | 946 | OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); |
494da23a | 947 | clean_pg_upmaps(g_ceph_context, tmp, pending_inc); |
a8e16298 TL |
948 | tmp.apply_incremental(pending_inc); |
949 | ASSERT_TRUE(tmp.have_pg_upmaps(ec_pgid)); | |
950 | } | |
951 | } | |
952 | ||
94b18763 FG |
953 | { |
954 | // TEST pg_upmap | |
955 | { | |
956 | // STEP-1: enumerate all children of up[0]'s parent, | |
957 | // replace up[1] with one of them (other than up[0]) | |
958 | int parent = osdmap.crush->get_parent_of_type(up[0], | |
959 | osdmap.crush->get_type_id("host")); | |
960 | set<int> candidates; | |
961 | osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent), &candidates); | |
962 | ASSERT_LT(1U, candidates.size()); | |
963 | int replaced_by = -1; | |
964 | for (auto c: candidates) { | |
965 | if (c != up[0]) { | |
966 | replaced_by = c; | |
967 | break; | |
968 | } | |
969 | } | |
91327a77 AA |
970 | { |
971 | // Check we can handle a negative pg_upmap value | |
972 | vector<int32_t> new_pg_upmap; | |
973 | new_pg_upmap.push_back(up[0]); | |
974 | new_pg_upmap.push_back(-823648512); | |
975 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
976 | pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>( | |
977 | new_pg_upmap.begin(), new_pg_upmap.end()); | |
978 | osdmap.apply_incremental(pending_inc); | |
979 | vector<int> new_up; | |
980 | int new_up_primary; | |
981 | // crucial call - _apply_upmap should ignore the negative value | |
982 | osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); | |
983 | } | |
94b18763 FG |
984 | ASSERT_NE(-1, replaced_by); |
985 | // generate a new pg_upmap item and apply | |
986 | vector<int32_t> new_pg_upmap; | |
987 | new_pg_upmap.push_back(up[0]); | |
988 | new_pg_upmap.push_back(replaced_by); // up[1] -> replaced_by | |
989 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
990 | pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>( | |
991 | new_pg_upmap.begin(), new_pg_upmap.end()); | |
992 | osdmap.apply_incremental(pending_inc); | |
993 | { | |
994 | // validate pg_upmap is there | |
995 | vector<int> new_up; | |
996 | int new_up_primary; | |
997 | osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); | |
9f95a23c TL |
998 | ASSERT_EQ(new_up.size(), up.size()); |
999 | ASSERT_EQ(new_up[0], new_pg_upmap[0]); | |
1000 | ASSERT_EQ(new_up[1], new_pg_upmap[1]); | |
94b18763 FG |
1001 | // and we shall have two OSDs from a same host now.. |
1002 | int parent_0 = osdmap.crush->get_parent_of_type(new_up[0], | |
1003 | osdmap.crush->get_type_id("host")); | |
1004 | int parent_1 = osdmap.crush->get_parent_of_type(new_up[1], | |
1005 | osdmap.crush->get_type_id("host")); | |
9f95a23c | 1006 | ASSERT_EQ(parent_0, parent_1); |
94b18763 FG |
1007 | } |
1008 | } | |
1009 | { | |
1010 | // STEP-2: apply cure | |
1011 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
494da23a | 1012 | clean_pg_upmaps(g_ceph_context, osdmap, pending_inc); |
94b18763 FG |
1013 | osdmap.apply_incremental(pending_inc); |
1014 | { | |
1015 | // validate pg_upmap is gone (reverted) | |
1016 | vector<int> new_up; | |
1017 | int new_up_primary; | |
1018 | osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); | |
9f95a23c TL |
1019 | ASSERT_EQ(new_up, up); |
1020 | ASSERT_EQ(new_up_primary, up_primary); | |
94b18763 FG |
1021 | } |
1022 | } | |
1023 | } | |
1024 | ||
1025 | { | |
1026 | // TEST pg_upmap_items | |
1027 | // enumerate all used hosts first | |
1028 | set<int> parents; | |
1029 | for (auto u: up) { | |
1030 | int parent = osdmap.crush->get_parent_of_type(u, | |
1031 | osdmap.crush->get_type_id("host")); | |
1032 | ASSERT_GT(0, parent); | |
1033 | parents.insert(parent); | |
1034 | } | |
1035 | int candidate_parent = 0; | |
1036 | set<int> candidate_children; | |
1037 | vector<int> up_after_out; | |
1038 | { | |
1039 | // STEP-1: try mark out up[1] and all other OSDs from the same host | |
1040 | int parent = osdmap.crush->get_parent_of_type(up[1], | |
1041 | osdmap.crush->get_type_id("host")); | |
1042 | set<int> children; | |
1043 | osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent), | |
1044 | &children); | |
1045 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1046 | for (auto c: children) { | |
1047 | pending_inc.new_weight[c] = CEPH_OSD_OUT; | |
1048 | } | |
1049 | OSDMap tmpmap; | |
1050 | tmpmap.deepish_copy_from(osdmap); | |
1051 | tmpmap.apply_incremental(pending_inc); | |
1052 | vector<int> new_up; | |
1053 | int new_up_primary; | |
1054 | tmpmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); | |
1055 | // verify that we'll have OSDs from a different host.. | |
1056 | int will_choose = -1; | |
1057 | for (auto o: new_up) { | |
1058 | int parent = tmpmap.crush->get_parent_of_type(o, | |
1059 | osdmap.crush->get_type_id("host")); | |
1060 | if (!parents.count(parent)) { | |
1061 | will_choose = o; | |
1062 | candidate_parent = parent; // record | |
1063 | break; | |
1064 | } | |
1065 | } | |
1066 | ASSERT_LT(-1, will_choose); // it is an OSD! | |
9f95a23c | 1067 | ASSERT_NE(candidate_parent, 0); |
94b18763 FG |
1068 | osdmap.crush->get_leaves(osdmap.crush->get_item_name(candidate_parent), |
1069 | &candidate_children); | |
1070 | ASSERT_TRUE(candidate_children.count(will_choose)); | |
1071 | candidate_children.erase(will_choose); | |
9f95a23c | 1072 | ASSERT_FALSE(candidate_children.empty()); |
94b18763 FG |
1073 | up_after_out = new_up; // needed for verification.. |
1074 | } | |
91327a77 AA |
1075 | { |
1076 | // Make sure we can handle a negative pg_upmap_item | |
1077 | int victim = up[0]; | |
1078 | int replaced_by = -823648512; | |
1079 | vector<pair<int32_t,int32_t>> new_pg_upmap_items; | |
1080 | new_pg_upmap_items.push_back(make_pair(victim, replaced_by)); | |
1081 | // apply | |
1082 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1083 | pending_inc.new_pg_upmap_items[pgid] = | |
1084 | mempool::osdmap::vector<pair<int32_t,int32_t>>( | |
1085 | new_pg_upmap_items.begin(), new_pg_upmap_items.end()); | |
1086 | osdmap.apply_incremental(pending_inc); | |
1087 | vector<int> new_up; | |
1088 | int new_up_primary; | |
1089 | // crucial call - _apply_upmap should ignore the negative value | |
1090 | osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); | |
1091 | } | |
94b18763 FG |
1092 | { |
1093 | // STEP-2: generating a new pg_upmap_items entry by | |
1094 | // replacing up[0] with one coming from candidate_children | |
1095 | int victim = up[0]; | |
1096 | int replaced_by = *candidate_children.begin(); | |
1097 | vector<pair<int32_t,int32_t>> new_pg_upmap_items; | |
1098 | new_pg_upmap_items.push_back(make_pair(victim, replaced_by)); | |
1099 | // apply | |
1100 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1101 | pending_inc.new_pg_upmap_items[pgid] = | |
1102 | mempool::osdmap::vector<pair<int32_t,int32_t>>( | |
1103 | new_pg_upmap_items.begin(), new_pg_upmap_items.end()); | |
1104 | osdmap.apply_incremental(pending_inc); | |
1105 | { | |
1106 | // validate pg_upmap_items is there | |
1107 | vector<int> new_up; | |
1108 | int new_up_primary; | |
1109 | osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); | |
9f95a23c | 1110 | ASSERT_EQ(new_up.size(), up.size()); |
94b18763 FG |
1111 | ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), replaced_by) != |
1112 | new_up.end()); | |
1113 | // and up[1] too | |
1114 | ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), up[1]) != | |
1115 | new_up.end()); | |
1116 | } | |
1117 | } | |
1118 | { | |
1119 | // STEP-3: mark out up[1] and all other OSDs from the same host | |
1120 | int parent = osdmap.crush->get_parent_of_type(up[1], | |
1121 | osdmap.crush->get_type_id("host")); | |
1122 | set<int> children; | |
1123 | osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent), | |
1124 | &children); | |
1125 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1126 | for (auto c: children) { | |
1127 | pending_inc.new_weight[c] = CEPH_OSD_OUT; | |
1128 | } | |
1129 | osdmap.apply_incremental(pending_inc); | |
1130 | { | |
1131 | // validate we have two OSDs from the same host now.. | |
1132 | vector<int> new_up; | |
1133 | int new_up_primary; | |
1134 | osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); | |
9f95a23c | 1135 | ASSERT_EQ(up.size(), new_up.size()); |
94b18763 FG |
1136 | int parent_0 = osdmap.crush->get_parent_of_type(new_up[0], |
1137 | osdmap.crush->get_type_id("host")); | |
1138 | int parent_1 = osdmap.crush->get_parent_of_type(new_up[1], | |
1139 | osdmap.crush->get_type_id("host")); | |
9f95a23c | 1140 | ASSERT_EQ(parent_0, parent_1); |
94b18763 FG |
1141 | } |
1142 | } | |
1143 | { | |
1144 | // STEP-4: apply cure | |
1145 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
494da23a | 1146 | clean_pg_upmaps(g_ceph_context, osdmap, pending_inc); |
94b18763 FG |
1147 | osdmap.apply_incremental(pending_inc); |
1148 | { | |
1149 | // validate pg_upmap_items is gone (reverted) | |
1150 | vector<int> new_up; | |
1151 | int new_up_primary; | |
1152 | osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); | |
9f95a23c | 1153 | ASSERT_EQ(new_up, up_after_out); |
94b18763 FG |
1154 | } |
1155 | } | |
1156 | } | |
1157 | } | |
1158 | ||
a8e16298 TL |
1159 | TEST_F(OSDMapTest, BUG_38897) { |
1160 | // http://tracker.ceph.com/issues/38897 | |
1161 | // build a fresh map with 12 OSDs, without any default pools | |
1162 | set_up_map(12, true); | |
1163 | const string pool_1("pool1"); | |
1164 | const string pool_2("pool2"); | |
1165 | int64_t pool_1_id = -1; | |
1166 | ||
1167 | { | |
1168 | // build customized crush rule for "pool1" | |
1169 | string host_name = "host_for_pool_1"; | |
1170 | // build a customized host to capture osd.1~5 | |
1171 | for (int i = 1; i < 5; i++) { | |
1172 | stringstream osd_name; | |
1173 | vector<string> move_to; | |
1174 | osd_name << "osd." << i; | |
1175 | move_to.push_back("root=default"); | |
1176 | string host_loc = "host=" + host_name; | |
1177 | move_to.push_back(host_loc); | |
1178 | auto r = crush_move(osdmap, osd_name.str(), move_to); | |
1179 | ASSERT_EQ(0, r); | |
1180 | } | |
1181 | CrushWrapper crush; | |
1182 | get_crush(osdmap, crush); | |
1183 | auto host_id = crush.get_item_id(host_name); | |
1184 | ASSERT_TRUE(host_id < 0); | |
1185 | string rule_name = "rule_for_pool1"; | |
1186 | int rule_type = pg_pool_t::TYPE_REPLICATED; | |
1187 | ASSERT_TRUE(!crush.rule_exists(rule_name)); | |
1188 | int rno; | |
1189 | for (rno = 0; rno < crush.get_max_rules(); rno++) { | |
20effc67 | 1190 | if (!crush.rule_exists(rno)) |
a8e16298 TL |
1191 | break; |
1192 | } | |
a8e16298 | 1193 | int steps = 7; |
20effc67 | 1194 | crush_rule *rule = crush_make_rule(steps, rule_type); |
a8e16298 TL |
1195 | int step = 0; |
1196 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); | |
1197 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); | |
1198 | // always choose osd.0 | |
1199 | crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0); | |
1200 | crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); | |
1201 | // then pick any other random osds | |
1202 | crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, host_id, 0); | |
1203 | crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 0); | |
1204 | crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); | |
1205 | ASSERT_TRUE(step == steps); | |
1206 | auto r = crush_add_rule(crush.get_crush_map(), rule, rno); | |
1207 | ASSERT_TRUE(r >= 0); | |
1208 | crush.set_rule_name(rno, rule_name); | |
1209 | { | |
1210 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1211 | pending_inc.crush.clear(); | |
1212 | crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); | |
1213 | osdmap.apply_incremental(pending_inc); | |
1214 | } | |
1215 | ||
1216 | // create "pool1" | |
1217 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1218 | pending_inc.new_pool_max = osdmap.get_pool_max(); | |
1219 | auto pool_id = ++pending_inc.new_pool_max; | |
1220 | pool_1_id = pool_id; | |
1221 | pg_pool_t empty; | |
1222 | auto p = pending_inc.get_new_pool(pool_id, &empty); | |
1223 | p->size = 3; | |
1224 | p->min_size = 1; | |
1225 | p->set_pg_num(3); | |
1226 | p->set_pgp_num(3); | |
1227 | p->type = pg_pool_t::TYPE_REPLICATED; | |
1228 | p->crush_rule = rno; | |
1229 | p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); | |
1230 | pending_inc.new_pool_names[pool_id] = pool_1; | |
1231 | osdmap.apply_incremental(pending_inc); | |
1232 | ASSERT_TRUE(osdmap.have_pg_pool(pool_id)); | |
1233 | ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_1); | |
1234 | { | |
1235 | for (unsigned i = 0; i < 3; i++) { | |
1236 | // 1.x -> [1] | |
1237 | pg_t rawpg(i, pool_id); | |
1238 | pg_t pgid = osdmap.raw_pg_to_pg(rawpg); | |
1239 | vector<int> up; | |
1240 | int up_primary; | |
1241 | osdmap.pg_to_raw_up(pgid, &up, &up_primary); | |
1242 | ASSERT_TRUE(up.size() == 3); | |
1243 | ASSERT_TRUE(up[0] == 0); | |
1244 | ||
1245 | // insert a new pg_upmap | |
1246 | vector<int32_t> new_up; | |
1247 | // and remap 1.x to osd.1 only | |
1248 | // this way osd.0 is deemed to be *underfull* | |
1249 | // and osd.1 is deemed to be *overfull* | |
1250 | new_up.push_back(1); | |
1251 | { | |
1252 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1253 | pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>( | |
1254 | new_up.begin(), new_up.end()); | |
1255 | osdmap.apply_incremental(pending_inc); | |
1256 | } | |
1257 | osdmap.pg_to_raw_up(pgid, &up, &up_primary); | |
1258 | ASSERT_TRUE(up.size() == 1); | |
1259 | ASSERT_TRUE(up[0] == 1); | |
1260 | } | |
1261 | } | |
1262 | } | |
1263 | ||
1264 | { | |
1265 | // build customized crush rule for "pool2" | |
1266 | string host_name = "host_for_pool_2"; | |
1267 | // build a customized host to capture osd.6~11 | |
1268 | for (int i = 6; i < (int)get_num_osds(); i++) { | |
1269 | stringstream osd_name; | |
1270 | vector<string> move_to; | |
1271 | osd_name << "osd." << i; | |
1272 | move_to.push_back("root=default"); | |
1273 | string host_loc = "host=" + host_name; | |
1274 | move_to.push_back(host_loc); | |
1275 | auto r = crush_move(osdmap, osd_name.str(), move_to); | |
1276 | ASSERT_EQ(0, r); | |
1277 | } | |
1278 | CrushWrapper crush; | |
1279 | get_crush(osdmap, crush); | |
1280 | auto host_id = crush.get_item_id(host_name); | |
1281 | ASSERT_TRUE(host_id < 0); | |
1282 | string rule_name = "rule_for_pool2"; | |
1283 | int rule_type = pg_pool_t::TYPE_REPLICATED; | |
1284 | ASSERT_TRUE(!crush.rule_exists(rule_name)); | |
1285 | int rno; | |
1286 | for (rno = 0; rno < crush.get_max_rules(); rno++) { | |
20effc67 | 1287 | if (!crush.rule_exists(rno)) |
a8e16298 TL |
1288 | break; |
1289 | } | |
a8e16298 | 1290 | int steps = 7; |
20effc67 | 1291 | crush_rule *rule = crush_make_rule(steps, rule_type); |
a8e16298 TL |
1292 | int step = 0; |
1293 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); | |
1294 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); | |
1295 | // always choose osd.0 | |
1296 | crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0); | |
1297 | crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); | |
1298 | // then pick any other random osds | |
1299 | crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, host_id, 0); | |
1300 | crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 0); | |
1301 | crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); | |
1302 | ASSERT_TRUE(step == steps); | |
1303 | auto r = crush_add_rule(crush.get_crush_map(), rule, rno); | |
1304 | ASSERT_TRUE(r >= 0); | |
1305 | crush.set_rule_name(rno, rule_name); | |
1306 | { | |
1307 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1308 | pending_inc.crush.clear(); | |
1309 | crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); | |
1310 | osdmap.apply_incremental(pending_inc); | |
1311 | } | |
1312 | ||
1313 | // create "pool2" | |
1314 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1315 | pending_inc.new_pool_max = osdmap.get_pool_max(); | |
1316 | auto pool_id = ++pending_inc.new_pool_max; | |
1317 | pg_pool_t empty; | |
1318 | auto p = pending_inc.get_new_pool(pool_id, &empty); | |
1319 | p->size = 3; | |
1320 | // include a single PG | |
1321 | p->set_pg_num(1); | |
1322 | p->set_pgp_num(1); | |
1323 | p->type = pg_pool_t::TYPE_REPLICATED; | |
1324 | p->crush_rule = rno; | |
1325 | p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); | |
1326 | pending_inc.new_pool_names[pool_id] = pool_2; | |
1327 | osdmap.apply_incremental(pending_inc); | |
1328 | ASSERT_TRUE(osdmap.have_pg_pool(pool_id)); | |
1329 | ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_2); | |
1330 | pg_t rawpg(0, pool_id); | |
1331 | pg_t pgid = osdmap.raw_pg_to_pg(rawpg); | |
1332 | EXPECT_TRUE(!osdmap.have_pg_upmaps(pgid)); | |
1333 | vector<int> up; | |
1334 | int up_primary; | |
1335 | osdmap.pg_to_raw_up(pgid, &up, &up_primary); | |
1336 | ASSERT_TRUE(up.size() == 3); | |
1337 | ASSERT_TRUE(up[0] == 0); | |
1338 | ||
1339 | { | |
1340 | // build a pg_upmap_item that will | |
1341 | // remap pg out from *underfull* osd.0 | |
1342 | vector<pair<int32_t,int32_t>> new_pg_upmap_items; | |
1343 | new_pg_upmap_items.push_back(make_pair(0, 10)); // osd.0 -> osd.10 | |
1344 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1345 | pending_inc.new_pg_upmap_items[pgid] = | |
1346 | mempool::osdmap::vector<pair<int32_t,int32_t>>( | |
1347 | new_pg_upmap_items.begin(), new_pg_upmap_items.end()); | |
1348 | osdmap.apply_incremental(pending_inc); | |
1349 | ASSERT_TRUE(osdmap.have_pg_upmaps(pgid)); | |
1350 | vector<int> up; | |
1351 | int up_primary; | |
1352 | osdmap.pg_to_raw_up(pgid, &up, &up_primary); | |
1353 | ASSERT_TRUE(up.size() == 3); | |
1354 | ASSERT_TRUE(up[0] == 10); | |
1355 | } | |
1356 | } | |
1357 | ||
1358 | // ready to go | |
1359 | { | |
a8e16298 TL |
1360 | set<int64_t> only_pools; |
1361 | ASSERT_TRUE(pool_1_id >= 0); | |
1362 | only_pools.insert(pool_1_id); | |
1363 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
92f5a8d4 | 1364 | // require perfect distribution! (max deviation 0) |
a8e16298 TL |
1365 | osdmap.calc_pg_upmaps(g_ceph_context, |
1366 | 0, // so we can force optimizing | |
1367 | 100, | |
1368 | only_pools, | |
1369 | &pending_inc); | |
1370 | osdmap.apply_incremental(pending_inc); | |
1371 | } | |
1372 | } | |
1373 | ||
494da23a TL |
1374 | TEST_F(OSDMapTest, BUG_40104) { |
1375 | // http://tracker.ceph.com/issues/40104 | |
1376 | int big_osd_num = 5000; | |
1377 | int big_pg_num = 10000; | |
1378 | set_up_map(big_osd_num, true); | |
1379 | int pool_id; | |
1380 | { | |
1381 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1382 | pending_inc.new_pool_max = osdmap.get_pool_max(); | |
1383 | pool_id = ++pending_inc.new_pool_max; | |
1384 | pg_pool_t empty; | |
1385 | auto p = pending_inc.get_new_pool(pool_id, &empty); | |
1386 | p->size = 3; | |
1387 | p->min_size = 1; | |
1388 | p->set_pg_num(big_pg_num); | |
1389 | p->set_pgp_num(big_pg_num); | |
1390 | p->type = pg_pool_t::TYPE_REPLICATED; | |
1391 | p->crush_rule = 0; | |
1392 | p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); | |
1393 | pending_inc.new_pool_names[pool_id] = "big_pool"; | |
1394 | osdmap.apply_incremental(pending_inc); | |
1395 | ASSERT_TRUE(osdmap.have_pg_pool(pool_id)); | |
1396 | ASSERT_TRUE(osdmap.get_pool_name(pool_id) == "big_pool"); | |
1397 | } | |
1398 | { | |
1399 | // generate pg_upmap_items for each pg | |
1400 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1401 | for (int i = 0; i < big_pg_num; i++) { | |
1402 | pg_t rawpg(i, pool_id); | |
1403 | pg_t pgid = osdmap.raw_pg_to_pg(rawpg); | |
1404 | vector<int> up; | |
1405 | int up_primary; | |
1406 | osdmap.pg_to_raw_up(pgid, &up, &up_primary); | |
1407 | ASSERT_TRUE(up.size() == 3); | |
1408 | int victim = up[0]; | |
1409 | int replaced_by = random() % big_osd_num; | |
1410 | vector<pair<int32_t,int32_t>> new_pg_upmap_items; | |
1411 | // note that it might or might not be valid, we don't care | |
1412 | new_pg_upmap_items.push_back(make_pair(victim, replaced_by)); | |
1413 | pending_inc.new_pg_upmap_items[pgid] = | |
1414 | mempool::osdmap::vector<pair<int32_t,int32_t>>( | |
1415 | new_pg_upmap_items.begin(), new_pg_upmap_items.end()); | |
1416 | } | |
1417 | osdmap.apply_incremental(pending_inc); | |
1418 | } | |
1419 | { | |
1420 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1421 | auto start = mono_clock::now(); | |
1422 | clean_pg_upmaps(g_ceph_context, osdmap, pending_inc); | |
1423 | auto latency = mono_clock::now() - start; | |
1424 | std::cout << "clean_pg_upmaps (~" << big_pg_num | |
1425 | << " pg_upmap_items) latency:" << timespan_str(latency) | |
1426 | << std::endl; | |
1427 | } | |
1428 | } | |
1429 | ||
eafe8130 TL |
1430 | TEST_F(OSDMapTest, BUG_42052) { |
1431 | // https://tracker.ceph.com/issues/42052 | |
1432 | set_up_map(6, true); | |
1433 | const string pool_name("pool"); | |
1434 | // build customized crush rule for "pool" | |
1435 | CrushWrapper crush; | |
1436 | get_crush(osdmap, crush); | |
1437 | string rule_name = "rule"; | |
1438 | int rule_type = pg_pool_t::TYPE_REPLICATED; | |
1439 | ASSERT_TRUE(!crush.rule_exists(rule_name)); | |
1440 | int rno; | |
1441 | for (rno = 0; rno < crush.get_max_rules(); rno++) { | |
20effc67 | 1442 | if (!crush.rule_exists(rno)) |
eafe8130 TL |
1443 | break; |
1444 | } | |
eafe8130 | 1445 | int steps = 8; |
20effc67 | 1446 | crush_rule *rule = crush_make_rule(steps, rule_type); |
eafe8130 TL |
1447 | int step = 0; |
1448 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); | |
1449 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); | |
1450 | // always choose osd.0, osd.1, osd.2 | |
1451 | crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0); | |
1452 | crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); | |
1453 | crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 1); | |
1454 | crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); | |
1455 | crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 2); | |
1456 | crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); | |
1457 | ASSERT_TRUE(step == steps); | |
1458 | auto r = crush_add_rule(crush.get_crush_map(), rule, rno); | |
1459 | ASSERT_TRUE(r >= 0); | |
1460 | crush.set_rule_name(rno, rule_name); | |
1461 | { | |
1462 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1463 | pending_inc.crush.clear(); | |
1464 | crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); | |
1465 | osdmap.apply_incremental(pending_inc); | |
1466 | } | |
1467 | ||
1468 | // create "pool" | |
1469 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1470 | pending_inc.new_pool_max = osdmap.get_pool_max(); | |
1471 | auto pool_id = ++pending_inc.new_pool_max; | |
1472 | pg_pool_t empty; | |
1473 | auto p = pending_inc.get_new_pool(pool_id, &empty); | |
1474 | p->size = 3; | |
1475 | p->min_size = 1; | |
1476 | p->set_pg_num(1); | |
1477 | p->set_pgp_num(1); | |
1478 | p->type = pg_pool_t::TYPE_REPLICATED; | |
1479 | p->crush_rule = rno; | |
1480 | p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); | |
1481 | pending_inc.new_pool_names[pool_id] = pool_name; | |
1482 | osdmap.apply_incremental(pending_inc); | |
1483 | ASSERT_TRUE(osdmap.have_pg_pool(pool_id)); | |
1484 | ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_name); | |
1485 | pg_t rawpg(0, pool_id); | |
1486 | pg_t pgid = osdmap.raw_pg_to_pg(rawpg); | |
1487 | { | |
1488 | // pg_upmap 1.0 [2,3,5] | |
1489 | vector<int32_t> new_up{2,3,5}; | |
1490 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1491 | pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>( | |
1492 | new_up.begin(), new_up.end()); | |
1493 | osdmap.apply_incremental(pending_inc); | |
1494 | } | |
1495 | { | |
1496 | // pg_upmap_items 1.0 [0,3,4,5] | |
1497 | vector<pair<int32_t,int32_t>> new_pg_upmap_items; | |
1498 | new_pg_upmap_items.push_back(make_pair(0, 3)); | |
1499 | new_pg_upmap_items.push_back(make_pair(4, 5)); | |
1500 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1501 | pending_inc.new_pg_upmap_items[pgid] = | |
1502 | mempool::osdmap::vector<pair<int32_t,int32_t>>( | |
1503 | new_pg_upmap_items.begin(), new_pg_upmap_items.end()); | |
1504 | osdmap.apply_incremental(pending_inc); | |
1505 | } | |
1506 | { | |
1507 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1508 | clean_pg_upmaps(g_ceph_context, osdmap, pending_inc); | |
1509 | osdmap.apply_incremental(pending_inc); | |
1510 | ASSERT_FALSE(osdmap.have_pg_upmaps(pgid)); | |
1511 | } | |
1512 | } | |
1513 | ||
9f95a23c TL |
1514 | TEST_F(OSDMapTest, BUG_42485) { |
1515 | set_up_map(60); | |
1516 | { | |
1517 | // build a temporary crush topology of 2datacenters, 3racks per dc, | |
1518 | // 1host per rack, 10osds per host | |
1519 | OSDMap tmp; // use a tmpmap here, so we do not dirty origin map.. | |
1520 | tmp.deepish_copy_from(osdmap); | |
1521 | const int expected_host_num = 6; | |
1522 | int osd_per_host = (int)get_num_osds() / expected_host_num; | |
1523 | ASSERT_GE(osd_per_host, 10); | |
1524 | int host_per_dc = 3; | |
1525 | int index = 0; | |
1526 | int dc_index = 0; | |
1527 | for (int i = 0; i < (int)get_num_osds(); i++) { | |
1528 | if (i && i % osd_per_host == 0) { | |
1529 | ++index; | |
1530 | } | |
1531 | if (i && i % (host_per_dc * osd_per_host) == 0) { | |
1532 | ++dc_index; | |
1533 | } | |
1534 | stringstream osd_name; | |
1535 | stringstream host_name; | |
1536 | stringstream rack_name; | |
1537 | stringstream dc_name; | |
1538 | vector<string> move_to; | |
1539 | osd_name << "osd." << i; | |
1540 | host_name << "host-" << index; | |
1541 | rack_name << "rack-" << index; | |
1542 | dc_name << "dc-" << dc_index; | |
1543 | move_to.push_back("root=default"); | |
1544 | string dc_loc = "datacenter=" + dc_name.str(); | |
1545 | move_to.push_back(dc_loc); | |
1546 | string rack_loc = "rack=" + rack_name.str(); | |
1547 | move_to.push_back(rack_loc); | |
1548 | string host_loc = "host=" + host_name.str(); | |
1549 | move_to.push_back(host_loc); | |
1550 | auto r = crush_move(tmp, osd_name.str(), move_to); | |
1551 | ASSERT_EQ(0, r); | |
1552 | } | |
1553 | ||
1554 | // build crush rule | |
1555 | CrushWrapper crush; | |
1556 | get_crush(tmp, crush); | |
1557 | string rule_name = "rule_xeus_993_1"; | |
1558 | int rule_type = pg_pool_t::TYPE_REPLICATED; | |
1559 | ASSERT_TRUE(!crush.rule_exists(rule_name)); | |
1560 | int rno; | |
1561 | for (rno = 0; rno < crush.get_max_rules(); rno++) { | |
20effc67 | 1562 | if (!crush.rule_exists(rno)) |
9f95a23c TL |
1563 | break; |
1564 | } | |
1565 | string root_name = "default"; | |
1566 | string dc_1 = "dc-0"; | |
1567 | int dc1 = crush.get_item_id(dc_1); | |
1568 | string dc_2 = "dc-1"; | |
1569 | int dc2 = crush.get_item_id(dc_2); | |
9f95a23c | 1570 | int steps = 8; |
20effc67 | 1571 | crush_rule *rule = crush_make_rule(steps, rule_type); |
9f95a23c TL |
1572 | int step = 0; |
1573 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); | |
1574 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); | |
1575 | crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, dc1, 0); | |
1576 | crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 3 /* rack */); | |
1577 | crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); | |
1578 | crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, dc2, 0); | |
1579 | crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 3 /* rack */); | |
1580 | crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); | |
1581 | ASSERT_TRUE(step == steps); | |
1582 | auto r = crush_add_rule(crush.get_crush_map(), rule, rno); | |
1583 | ASSERT_TRUE(r >= 0); | |
1584 | crush.set_rule_name(rno, rule_name); | |
1585 | { | |
1586 | OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); | |
1587 | pending_inc.crush.clear(); | |
1588 | crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); | |
1589 | tmp.apply_incremental(pending_inc); | |
1590 | } | |
1591 | // create a repliacted pool referencing the above rule | |
1592 | int64_t pool_xeus_993; | |
1593 | { | |
1594 | OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1); | |
1595 | new_pool_inc.new_pool_max = tmp.get_pool_max(); | |
1596 | new_pool_inc.fsid = tmp.get_fsid(); | |
1597 | pg_pool_t empty; | |
1598 | pool_xeus_993 = ++new_pool_inc.new_pool_max; | |
1599 | pg_pool_t *p = new_pool_inc.get_new_pool(pool_xeus_993, &empty); | |
1600 | p->size = 4; | |
1601 | p->set_pg_num(4096); | |
1602 | p->set_pgp_num(4096); | |
1603 | p->type = pg_pool_t::TYPE_REPLICATED; | |
1604 | p->crush_rule = rno; | |
1605 | p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); | |
1606 | new_pool_inc.new_pool_names[pool_xeus_993] = "pool_xeus_993"; | |
1607 | tmp.apply_incremental(new_pool_inc); | |
1608 | } | |
1609 | ||
1610 | pg_t rep_pg(0, pool_xeus_993); | |
1611 | pg_t rep_pgid = tmp.raw_pg_to_pg(rep_pg); | |
1612 | { | |
1613 | int from = -1; | |
1614 | int to = -1; | |
1615 | vector<int> rep_up; | |
1616 | int rep_up_primary; | |
1617 | tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary); | |
1618 | std::cout << "pgid " << rep_up << " up " << rep_up << std::endl; | |
1619 | ASSERT_TRUE(rep_up.size() == 4); | |
1620 | from = *(rep_up.begin()); | |
1621 | ASSERT_TRUE(from >= 0); | |
1622 | auto dc_parent = tmp.crush->get_parent_of_type(from, 8 /* dc */, rno); | |
1623 | if (dc_parent == dc1) | |
1624 | dc_parent = dc2; | |
1625 | else | |
1626 | dc_parent = dc1; | |
1627 | auto rack_parent = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno); | |
1628 | ASSERT_TRUE(dc_parent < 0); | |
1629 | ASSERT_TRUE(rack_parent < 0); | |
1630 | set<int> rack_parents; | |
1631 | for (auto &i: rep_up) { | |
1632 | if (i == from) continue; | |
1633 | auto rack_parent = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); | |
1634 | rack_parents.insert(rack_parent); | |
1635 | } | |
1636 | for (int i = 0; i < (int)get_num_osds(); i++) { | |
1637 | if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) { | |
1638 | auto dc_p = tmp.crush->get_parent_of_type(i, 8 /* dc */, rno); | |
1639 | auto rack_p = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); | |
1640 | if (dc_p == dc_parent && | |
1641 | rack_parents.find(rack_p) == rack_parents.end()) { | |
1642 | to = i; | |
1643 | break; | |
1644 | } | |
1645 | } | |
1646 | } | |
1647 | ASSERT_TRUE(to >= 0); | |
1648 | ASSERT_TRUE(from != to); | |
1649 | std::cout << "from " << from << " to " << to << std::endl; | |
1650 | vector<pair<int32_t,int32_t>> new_pg_upmap_items; | |
1651 | new_pg_upmap_items.push_back(make_pair(from, to)); | |
1652 | OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); | |
1653 | pending_inc.new_pg_upmap_items[rep_pgid] = | |
1654 | mempool::osdmap::vector<pair<int32_t,int32_t>>( | |
1655 | new_pg_upmap_items.begin(), new_pg_upmap_items.end()); | |
1656 | tmp.apply_incremental(pending_inc); | |
1657 | ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); | |
1658 | } | |
1659 | pg_t rep_pg2(2, pool_xeus_993); | |
1660 | pg_t rep_pgid2 = tmp.raw_pg_to_pg(rep_pg2); | |
1661 | { | |
1662 | pg_t rep_pgid = rep_pgid2; | |
1663 | vector<int> from_osds{-1, -1}; | |
1664 | vector<int> rep_up; | |
1665 | int rep_up_primary; | |
1666 | tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary); | |
1667 | ASSERT_TRUE(rep_up.size() == 4); | |
1668 | from_osds[0] = *(rep_up.begin()); | |
1669 | from_osds[1] = *(rep_up.rbegin()); | |
1670 | std::cout << "pgid " << rep_pgid2 << " up " << rep_up << std::endl; | |
1671 | ASSERT_TRUE(*(from_osds.begin()) >= 0); | |
1672 | ASSERT_TRUE(*(from_osds.rbegin()) >= 0); | |
1673 | vector<pair<int32_t,int32_t>> new_pg_upmap_items; | |
1674 | for (auto &from: from_osds) { | |
1675 | int to = -1; | |
1676 | auto dc_parent = tmp.crush->get_parent_of_type(from, 8 /* dc */, rno); | |
1677 | if (dc_parent == dc1) | |
1678 | dc_parent = dc2; | |
1679 | else | |
1680 | dc_parent = dc1; | |
1681 | auto rack_parent = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno); | |
1682 | ASSERT_TRUE(dc_parent < 0); | |
1683 | ASSERT_TRUE(rack_parent < 0); | |
1684 | set<int> rack_parents; | |
1685 | for (auto &i: rep_up) { | |
1686 | if (i == from) continue; | |
1687 | auto rack_parent = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); | |
1688 | rack_parents.insert(rack_parent); | |
1689 | } | |
1690 | for (auto &i: new_pg_upmap_items) { | |
1691 | auto rack_from = tmp.crush->get_parent_of_type(i.first, 3, rno); | |
1692 | auto rack_to = tmp.crush->get_parent_of_type(i.second, 3, rno); | |
1693 | rack_parents.insert(rack_from); | |
1694 | rack_parents.insert(rack_to); | |
1695 | } | |
1696 | for (int i = 0; i < (int)get_num_osds(); i++) { | |
1697 | if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) { | |
1698 | auto dc_p = tmp.crush->get_parent_of_type(i, 8 /* dc */, rno); | |
1699 | auto rack_p = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); | |
1700 | if (dc_p == dc_parent && | |
1701 | rack_parents.find(rack_p) == rack_parents.end()) { | |
1702 | to = i; | |
1703 | break; | |
1704 | } | |
1705 | } | |
1706 | } | |
1707 | ASSERT_TRUE(to >= 0); | |
1708 | ASSERT_TRUE(from != to); | |
1709 | std::cout << "from " << from << " to " << to << std::endl; | |
1710 | new_pg_upmap_items.push_back(make_pair(from, to)); | |
1711 | } | |
1712 | OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); | |
1713 | pending_inc.new_pg_upmap_items[rep_pgid] = | |
1714 | mempool::osdmap::vector<pair<int32_t,int32_t>>( | |
1715 | new_pg_upmap_items.begin(), new_pg_upmap_items.end()); | |
1716 | tmp.apply_incremental(pending_inc); | |
1717 | ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); | |
1718 | } | |
1719 | { | |
1720 | // *maybe_remove_pg_upmaps* should remove the above upmap_item | |
1721 | OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); | |
1722 | clean_pg_upmaps(g_ceph_context, tmp, pending_inc); | |
1723 | tmp.apply_incremental(pending_inc); | |
1724 | ASSERT_FALSE(tmp.have_pg_upmaps(rep_pgid)); | |
1725 | ASSERT_FALSE(tmp.have_pg_upmaps(rep_pgid2)); | |
1726 | } | |
1727 | } | |
1728 | } | |
1729 | ||
31f18b77 FG |
1730 | TEST(PGTempMap, basic) |
1731 | { | |
1732 | PGTempMap m; | |
1733 | pg_t a(1,1); | |
1734 | for (auto i=3; i<1000; ++i) { | |
1735 | pg_t x(i, 1); | |
1736 | m.set(x, {static_cast<int>(i)}); | |
1737 | } | |
1738 | pg_t b(2,1); | |
1739 | m.set(a, {1, 2}); | |
1740 | ASSERT_NE(m.find(a), m.end()); | |
1741 | ASSERT_EQ(m.find(a), m.begin()); | |
1742 | ASSERT_EQ(m.find(b), m.end()); | |
1743 | ASSERT_EQ(998u, m.size()); | |
1744 | } | |
35e4c445 | 1745 | |
9f95a23c TL |
1746 | TEST_F(OSDMapTest, BUG_43124) { |
1747 | set_up_map(200); | |
1748 | { | |
1749 | // https://tracker.ceph.com/issues/43124 | |
1750 | ||
1751 | // build a temporary crush topology of 5racks, | |
1752 | // 4 hosts per rack, 10osds per host | |
1753 | OSDMap tmp; // use a tmpmap here, so we do not dirty origin map.. | |
1754 | tmp.deepish_copy_from(osdmap); | |
1755 | const int expected_host_num = 20; | |
1756 | int osd_per_host = (int)get_num_osds() / expected_host_num; | |
1757 | ASSERT_GE(osd_per_host, 10); | |
1758 | int host_per_rack = 4; | |
1759 | int index = 0; | |
1760 | int rack_index = 0; | |
1761 | for (int i = 0; i < (int)get_num_osds(); i++) { | |
1762 | if (i && i % osd_per_host == 0) { | |
1763 | ++index; | |
1764 | } | |
1765 | if (i && i % (host_per_rack * osd_per_host) == 0) { | |
1766 | ++rack_index; | |
1767 | } | |
1768 | stringstream osd_name; | |
1769 | stringstream host_name; | |
1770 | stringstream rack_name; | |
1771 | vector<string> move_to; | |
1772 | osd_name << "osd." << i; | |
1773 | host_name << "host-" << index; | |
1774 | rack_name << "rack-" << rack_index; | |
1775 | move_to.push_back("root=default"); | |
1776 | string rack_loc = "rack=" + rack_name.str(); | |
1777 | move_to.push_back(rack_loc); | |
1778 | string host_loc = "host=" + host_name.str(); | |
1779 | move_to.push_back(host_loc); | |
1780 | auto r = crush_move(tmp, osd_name.str(), move_to); | |
1781 | ASSERT_EQ(0, r); | |
1782 | } | |
1783 | ||
1784 | // build crush rule | |
1785 | CrushWrapper crush; | |
1786 | get_crush(tmp, crush); | |
1787 | string rule_name = "rule_angel_1944"; | |
1788 | int rule_type = pg_pool_t::TYPE_ERASURE; | |
1789 | ASSERT_TRUE(!crush.rule_exists(rule_name)); | |
1790 | int rno; | |
1791 | for (rno = 0; rno < crush.get_max_rules(); rno++) { | |
20effc67 | 1792 | if (!crush.rule_exists(rno)) |
9f95a23c TL |
1793 | break; |
1794 | } | |
9f95a23c TL |
1795 | int steps = 6; |
1796 | string root_name = "default"; | |
1797 | int root = crush.get_item_id(root_name); | |
20effc67 | 1798 | crush_rule *rule = crush_make_rule(steps, rule_type); |
9f95a23c TL |
1799 | int step = 0; |
1800 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); | |
1801 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); | |
1802 | crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); | |
1803 | crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_FIRSTN, 4, 3 /* rack */); | |
1804 | crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_INDEP, 3, 1 /* host */); | |
1805 | crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); | |
1806 | ASSERT_TRUE(step == steps); | |
1807 | auto r = crush_add_rule(crush.get_crush_map(), rule, rno); | |
1808 | ASSERT_TRUE(r >= 0); | |
1809 | crush.set_rule_name(rno, rule_name); | |
1810 | { | |
1811 | OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); | |
1812 | pending_inc.crush.clear(); | |
1813 | crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); | |
1814 | tmp.apply_incremental(pending_inc); | |
1815 | } | |
1816 | { | |
1817 | stringstream oss; | |
1818 | crush.dump_tree(&oss, NULL); | |
1819 | std::cout << oss.str() << std::endl; | |
1820 | Formatter *f = Formatter::create("json-pretty"); | |
1821 | f->open_object_section("crush_rules"); | |
1822 | crush.dump_rules(f); | |
1823 | f->close_section(); | |
1824 | f->flush(cout); | |
1825 | delete f; | |
1826 | } | |
1827 | // create a erasuce-coded pool referencing the above rule | |
1828 | int64_t pool_angel_1944; | |
1829 | { | |
1830 | OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1); | |
1831 | new_pool_inc.new_pool_max = tmp.get_pool_max(); | |
1832 | new_pool_inc.fsid = tmp.get_fsid(); | |
1833 | pg_pool_t empty; | |
1834 | pool_angel_1944 = ++new_pool_inc.new_pool_max; | |
1835 | pg_pool_t *p = new_pool_inc.get_new_pool(pool_angel_1944, &empty); | |
1836 | p->size = 12; | |
1837 | p->set_pg_num(4096); | |
1838 | p->set_pgp_num(4096); | |
1839 | p->type = pg_pool_t::TYPE_ERASURE; | |
1840 | p->crush_rule = rno; | |
1841 | p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); | |
1842 | new_pool_inc.new_pool_names[pool_angel_1944] = "pool_angel_1944"; | |
1843 | tmp.apply_incremental(new_pool_inc); | |
1844 | } | |
1845 | ||
1846 | pg_t rep_pg(0, pool_angel_1944); | |
1847 | pg_t rep_pgid = tmp.raw_pg_to_pg(rep_pg); | |
1848 | { | |
1849 | // insert a pg_upmap_item | |
1850 | int from = -1; | |
1851 | int to = -1; | |
1852 | vector<int> rep_up; | |
1853 | int rep_up_primary; | |
1854 | tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary); | |
1855 | std::cout << "pgid " << rep_pgid << " up " << rep_up << std::endl; | |
1856 | ASSERT_TRUE(rep_up.size() == 12); | |
1857 | from = *(rep_up.begin()); | |
1858 | ASSERT_TRUE(from >= 0); | |
1859 | auto from_rack = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno); | |
1860 | set<int> failure_domains; | |
1861 | for (auto &osd : rep_up) { | |
1862 | failure_domains.insert(tmp.crush->get_parent_of_type(osd, 1 /* host */, rno)); | |
1863 | } | |
1864 | for (int i = 0; i < (int)get_num_osds(); i++) { | |
1865 | if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) { | |
1866 | auto to_rack = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); | |
1867 | auto to_host = tmp.crush->get_parent_of_type(i, 1 /* host */, rno); | |
1868 | if (to_rack != from_rack && failure_domains.count(to_host) == 0) { | |
1869 | to = i; | |
1870 | break; | |
1871 | } | |
1872 | } | |
1873 | } | |
1874 | ASSERT_TRUE(to >= 0); | |
1875 | ASSERT_TRUE(from != to); | |
1876 | std::cout << "from " << from << " to " << to << std::endl; | |
1877 | vector<pair<int32_t,int32_t>> new_pg_upmap_items; | |
1878 | new_pg_upmap_items.push_back(make_pair(from, to)); | |
1879 | OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); | |
1880 | pending_inc.new_pg_upmap_items[rep_pgid] = | |
1881 | mempool::osdmap::vector<pair<int32_t,int32_t>>( | |
1882 | new_pg_upmap_items.begin(), new_pg_upmap_items.end()); | |
1883 | tmp.apply_incremental(pending_inc); | |
1884 | ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); | |
1885 | } | |
1886 | { | |
1887 | // *maybe_remove_pg_upmaps* should not remove the above upmap_item | |
1888 | OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); | |
1889 | clean_pg_upmaps(g_ceph_context, tmp, pending_inc); | |
1890 | tmp.apply_incremental(pending_inc); | |
1891 | ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); | |
1892 | } | |
1893 | } | |
1894 | } | |
f67539c2 TL |
1895 | |
1896 | TEST_F(OSDMapTest, BUG_48884) | |
1897 | { | |
1898 | ||
1899 | set_up_map(12); | |
1900 | ||
1901 | unsigned int host_index = 1; | |
1902 | for (unsigned int x=0; x < get_num_osds();) { | |
1903 | // Create three hosts with four osds each | |
1904 | for (unsigned int y=0; y < 4; y++) { | |
1905 | stringstream osd_name; | |
1906 | stringstream host_name; | |
1907 | vector<string> move_to; | |
1908 | osd_name << "osd." << x; | |
1909 | host_name << "host-" << host_index; | |
1910 | move_to.push_back("root=default"); | |
1911 | move_to.push_back("rack=localrack"); | |
1912 | string host_loc = "host=" + host_name.str(); | |
1913 | move_to.push_back(host_loc); | |
1914 | int r = crush_move(osdmap, osd_name.str(), move_to); | |
1915 | ASSERT_EQ(0, r); | |
1916 | x++; | |
1917 | } | |
1918 | host_index++; | |
1919 | } | |
1920 | ||
1921 | CrushWrapper crush; | |
1922 | get_crush(osdmap, crush); | |
1923 | auto host_id = crush.get_item_id("localhost"); | |
1924 | crush.remove_item(g_ceph_context, host_id, false); | |
1925 | OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); | |
1926 | pending_inc.crush.clear(); | |
1927 | crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); | |
1928 | osdmap.apply_incremental(pending_inc); | |
1929 | ||
1930 | PGMap pgmap; | |
1931 | osd_stat_t stats, stats_null; | |
1932 | stats.statfs.total = 500000; | |
1933 | stats.statfs.available = 50000; | |
1934 | stats.statfs.omap_allocated = 50000; | |
1935 | stats.statfs.internal_metadata = 50000; | |
1936 | stats_null.statfs.total = 0; | |
1937 | stats_null.statfs.available = 0; | |
1938 | stats_null.statfs.omap_allocated = 0; | |
1939 | stats_null.statfs.internal_metadata = 0; | |
1940 | for (unsigned int x=0; x < get_num_osds(); x++) { | |
1941 | if (x > 3 && x < 8) { | |
1942 | pgmap.osd_stat.insert({x,stats_null}); | |
1943 | } else { | |
1944 | pgmap.osd_stat.insert({x,stats}); | |
1945 | } | |
1946 | } | |
1947 | ||
1948 | stringstream ss; | |
1949 | boost::scoped_ptr<Formatter> f(Formatter::create("json-pretty")); | |
1950 | print_osd_utilization(osdmap, pgmap, ss, f.get(), true, "root"); | |
1951 | JSONParser parser; | |
1952 | parser.parse(ss.str().c_str(), static_cast<int>(ss.str().size())); | |
1953 | auto iter = parser.find_first(); | |
522d829b | 1954 | for (const auto& bucket : (*iter)->get_array_elements()) { |
f67539c2 TL |
1955 | JSONParser parser2; |
1956 | parser2.parse(bucket.c_str(), static_cast<int>(bucket.size())); | |
1957 | auto* obj = parser2.find_obj("name"); | |
20effc67 | 1958 | if (obj->get_data().compare("localrack") == 0) { |
f67539c2 | 1959 | obj = parser2.find_obj("kb"); |
20effc67 | 1960 | ASSERT_EQ(obj->get_data(), "3904"); |
f67539c2 | 1961 | obj = parser2.find_obj("kb_used"); |
20effc67 | 1962 | ASSERT_EQ(obj->get_data(), "3512"); |
f67539c2 | 1963 | obj = parser2.find_obj("kb_used_omap"); |
20effc67 | 1964 | ASSERT_EQ(obj->get_data(), "384"); |
f67539c2 | 1965 | obj = parser2.find_obj("kb_used_meta"); |
20effc67 | 1966 | ASSERT_EQ(obj->get_data(), "384"); |
f67539c2 | 1967 | obj = parser2.find_obj("kb_avail"); |
20effc67 | 1968 | ASSERT_EQ(obj->get_data(), "384"); |
f67539c2 TL |
1969 | } |
1970 | } | |
1971 | } | |
a4b75251 TL |
1972 | |
1973 | TEST_P(OSDMapTest, BUG_51842) { | |
1974 | set_up_map(3, true); | |
1975 | OSDMap tmp; // use a tmpmap here, so we do not dirty origin map.. | |
1976 | tmp.deepish_copy_from(osdmap); | |
1977 | for (int i = 0; i < (int)get_num_osds(); i++) { | |
1978 | stringstream osd_name; | |
1979 | stringstream host_name; | |
1980 | vector<string> move_to; | |
1981 | osd_name << "osd." << i; | |
1982 | host_name << "host=host-" << i; | |
1983 | move_to.push_back("root=infra-1706"); | |
1984 | move_to.push_back(host_name.str()); | |
1985 | auto r = crush_move(tmp, osd_name.str(), move_to); | |
1986 | ASSERT_EQ(0, r); | |
1987 | } | |
1988 | ||
1989 | // build crush rule | |
1990 | CrushWrapper crush; | |
1991 | get_crush(tmp, crush); | |
1992 | string rule_name = "infra-1706"; | |
1993 | int rule_type = pg_pool_t::TYPE_REPLICATED; | |
1994 | ASSERT_TRUE(!crush.rule_exists(rule_name)); | |
1995 | int rno; | |
1996 | for (rno = 0; rno < crush.get_max_rules(); rno++) { | |
1997 | if (!crush.rule_exists(rno)) | |
1998 | break; | |
1999 | } | |
2000 | string root_bucket = "infra-1706"; | |
2001 | int root = crush.get_item_id(root_bucket); | |
a4b75251 | 2002 | int steps = 5; |
20effc67 | 2003 | crush_rule *rule = crush_make_rule(steps, rule_type); |
a4b75251 TL |
2004 | int step = 0; |
2005 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); | |
2006 | crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); | |
2007 | crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); | |
2008 | // note: it's ok to set like 'step chooseleaf_firstn 0 host' | |
2009 | std::pair<int, int> param = GetParam(); | |
2010 | int rep_num = std::get<0>(param); | |
2011 | int domain = std::get<1>(param); | |
2012 | crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, rep_num, domain); | |
2013 | crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); | |
2014 | ASSERT_TRUE(step == steps); | |
2015 | auto r = crush_add_rule(crush.get_crush_map(), rule, rno); | |
2016 | ASSERT_TRUE(r >= 0); | |
2017 | crush.set_rule_name(rno, rule_name); | |
2018 | { | |
2019 | OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); | |
2020 | pending_inc.crush.clear(); | |
2021 | crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); | |
2022 | tmp.apply_incremental(pending_inc); | |
2023 | } | |
2024 | { | |
2025 | stringstream oss; | |
2026 | crush.dump_tree(&oss, NULL); | |
2027 | std::cout << oss.str() << std::endl; | |
2028 | Formatter *f = Formatter::create("json-pretty"); | |
2029 | f->open_object_section("crush_rules"); | |
2030 | crush.dump_rules(f); | |
2031 | f->close_section(); | |
2032 | f->flush(cout); | |
2033 | delete f; | |
2034 | } | |
2035 | // create a replicated pool referencing the above rule | |
2036 | int64_t pool_infra_1706; | |
2037 | { | |
2038 | OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1); | |
2039 | new_pool_inc.new_pool_max = tmp.get_pool_max(); | |
2040 | new_pool_inc.fsid = tmp.get_fsid(); | |
2041 | pg_pool_t empty; | |
2042 | pool_infra_1706 = ++new_pool_inc.new_pool_max; | |
2043 | pg_pool_t *p = new_pool_inc.get_new_pool(pool_infra_1706, &empty); | |
2044 | p->size = 3; | |
2045 | p->min_size = 1; | |
2046 | p->set_pg_num(256); | |
2047 | p->set_pgp_num(256); | |
2048 | p->type = pg_pool_t::TYPE_REPLICATED; | |
2049 | p->crush_rule = rno; | |
2050 | p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); | |
2051 | new_pool_inc.new_pool_names[pool_infra_1706] = "pool_infra_1706"; | |
2052 | tmp.apply_incremental(new_pool_inc); | |
2053 | } | |
2054 | ||
2055 | // add upmaps | |
2056 | pg_t rep_pg(3, pool_infra_1706); | |
2057 | pg_t rep_pgid = tmp.raw_pg_to_pg(rep_pg); | |
2058 | pg_t rep_pg2(4, pool_infra_1706); | |
2059 | pg_t rep_pgid2 = tmp.raw_pg_to_pg(rep_pg2); | |
2060 | pg_t rep_pg3(6, pool_infra_1706); | |
2061 | pg_t rep_pgid3 = tmp.raw_pg_to_pg(rep_pg3); | |
2062 | { | |
2063 | OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); | |
2064 | pending_inc.new_pg_upmap[rep_pgid] = mempool::osdmap::vector<int32_t>({1,0,2}); | |
2065 | pending_inc.new_pg_upmap[rep_pgid2] = mempool::osdmap::vector<int32_t>({1,2,0}); | |
2066 | pending_inc.new_pg_upmap[rep_pgid3] = mempool::osdmap::vector<int32_t>({1,2,0}); | |
2067 | tmp.apply_incremental(pending_inc); | |
2068 | ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); | |
2069 | ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid2)); | |
2070 | ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid3)); | |
2071 | } | |
2072 | ||
2073 | { | |
2074 | // now, set pool size to 1 | |
2075 | OSDMap tmpmap; | |
2076 | tmpmap.deepish_copy_from(tmp); | |
2077 | OSDMap::Incremental new_pool_inc(tmpmap.get_epoch() + 1); | |
2078 | pg_pool_t p = *tmpmap.get_pg_pool(pool_infra_1706); | |
2079 | p.size = 1; | |
2080 | p.last_change = new_pool_inc.epoch; | |
2081 | new_pool_inc.new_pools[pool_infra_1706] = p; | |
2082 | tmpmap.apply_incremental(new_pool_inc); | |
2083 | ||
2084 | OSDMap::Incremental new_pending_inc(tmpmap.get_epoch() + 1); | |
2085 | clean_pg_upmaps(g_ceph_context, tmpmap, new_pending_inc); | |
2086 | tmpmap.apply_incremental(new_pending_inc); | |
2087 | // check pg upmaps | |
2088 | ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid)); | |
2089 | ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid2)); | |
2090 | ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid3)); | |
2091 | } | |
2092 | { | |
2093 | // now, set pool size to 4 | |
2094 | OSDMap tmpmap; | |
2095 | tmpmap.deepish_copy_from(tmp); | |
2096 | OSDMap::Incremental new_pool_inc(tmpmap.get_epoch() + 1); | |
2097 | pg_pool_t p = *tmpmap.get_pg_pool(pool_infra_1706); | |
2098 | p.size = 4; | |
2099 | p.last_change = new_pool_inc.epoch; | |
2100 | new_pool_inc.new_pools[pool_infra_1706] = p; | |
2101 | tmpmap.apply_incremental(new_pool_inc); | |
2102 | ||
2103 | OSDMap::Incremental new_pending_inc(tmpmap.get_epoch() + 1); | |
2104 | clean_pg_upmaps(g_ceph_context, tmpmap, new_pending_inc); | |
2105 | tmpmap.apply_incremental(new_pending_inc); | |
2106 | // check pg upmaps | |
2107 | ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid)); | |
2108 | ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid2)); | |
2109 | ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid3)); | |
2110 | } | |
2111 | } | |
2112 | ||
33c7a0ef TL |
2113 | const string OSDMapTest::range_addrs[] = {"198.51.100.0/22", "10.2.5.102/32", "2001:db8::/48", |
2114 | "3001:db8::/72", "4001:db8::/30", "5001:db8::/64", "6001:db8::/128", "7001:db8::/127"}; | |
2115 | const string OSDMapTest::ip_addrs[] = {"198.51.100.14", "198.51.100.0", "198.51.103.255", | |
2116 | "10.2.5.102", | |
2117 | "2001:db8:0:0:0:0:0:0", "2001:db8:0:0:0:0001:ffff:ffff", | |
2118 | "2001:db8:0:ffff:ffff:ffff:ffff:ffff", | |
2119 | "3001:db8:0:0:0:0:0:0", "3001:db8:0:0:0:0001:ffff:ffff", | |
2120 | "3001:db8:0:0:00ff:ffff:ffff:ffff", | |
2121 | "4001:db8::", "4001:db8:0:0:0:0001:ffff:ffff", | |
2122 | "4001:dbb:ffff:ffff:ffff:ffff:ffff:ffff", | |
2123 | "5001:db8:0:0:0:0:0:0", "5001:db8:0:0:0:0:ffff:ffff", | |
2124 | "5001:db8:0:0:ffff:ffff:ffff:ffff", | |
2125 | "6001:db8:0:0:0:0:0:0", | |
2126 | "7001:db8:0:0:0:0:0:0", "7001:db8:0:0:0:0:0:0001" | |
2127 | }; | |
2128 | const string OSDMapTest::unblocked_ip_addrs[] = { "0.0.0.0", "1.1.1.1", "192.168.1.1", | |
2129 | "198.51.99.255", "198.51.104.0", | |
2130 | "10.2.5.101", "10.2.5.103", | |
2131 | "2001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "2001:db8:0001::", | |
2132 | "3001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "3001:db8:0:0:0100::", | |
2133 | "4001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "4001:dbc::", | |
2134 | "5001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "5001:db8:0:0001:0:0:0:0", | |
2135 | "6001:db8:0:0:0:0:0:0001", | |
2136 | "7001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "7001:db8:0:0:0:0:0:0002" | |
2137 | }; | |
2138 | ||
2139 | TEST_F(OSDMapTest, blocklisting_ips) { | |
2140 | set_up_map(6); //whatever | |
2141 | ||
2142 | OSDMap::Incremental new_blocklist_inc(osdmap.get_epoch() + 1); | |
2143 | for (const auto& a : ip_addrs) { | |
2144 | entity_addr_t addr; | |
2145 | addr.parse(a); | |
2146 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2147 | new_blocklist_inc.new_blocklist[addr] = ceph_clock_now(); | |
2148 | } | |
2149 | osdmap.apply_incremental(new_blocklist_inc); | |
2150 | ||
2151 | for (const auto& a: ip_addrs) { | |
2152 | entity_addr_t addr; | |
2153 | addr.parse(a); | |
2154 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2155 | ASSERT_TRUE(osdmap.is_blocklisted(addr, g_ceph_context)); | |
2156 | } | |
2157 | for (const auto& a: unblocked_ip_addrs) { | |
2158 | entity_addr_t addr; | |
2159 | addr.parse(a); | |
2160 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2161 | ASSERT_FALSE(osdmap.is_blocklisted(addr, g_ceph_context)); | |
2162 | } | |
2163 | ||
2164 | OSDMap::Incremental rm_blocklist_inc(osdmap.get_epoch() + 1); | |
2165 | for (const auto& a : ip_addrs) { | |
2166 | entity_addr_t addr; | |
2167 | addr.parse(a); | |
2168 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2169 | rm_blocklist_inc.old_blocklist.push_back(addr); | |
2170 | } | |
2171 | osdmap.apply_incremental(rm_blocklist_inc); | |
2172 | for (const auto& a: ip_addrs) { | |
2173 | entity_addr_t addr; | |
2174 | addr.parse(a); | |
2175 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2176 | ASSERT_FALSE(osdmap.is_blocklisted(addr, g_ceph_context)); | |
2177 | } | |
2178 | for (const auto& a: unblocked_ip_addrs) { | |
2179 | entity_addr_t addr; | |
2180 | addr.parse(a); | |
2181 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2182 | bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); | |
2183 | if (blocklisted) { | |
2184 | cout << "erroneously blocklisted " << addr << std::endl; | |
2185 | } | |
2186 | EXPECT_FALSE(blocklisted); | |
2187 | } | |
2188 | } | |
2189 | ||
2190 | TEST_F(OSDMapTest, blocklisting_ranges) { | |
2191 | set_up_map(6); //whatever | |
2192 | OSDMap::Incremental range_blocklist_inc(osdmap.get_epoch() + 1); | |
2193 | for (const auto& a : range_addrs) { | |
2194 | entity_addr_t addr; | |
2195 | addr.parse(a); | |
2196 | addr.type = entity_addr_t::TYPE_CIDR; | |
2197 | range_blocklist_inc.new_range_blocklist[addr] = ceph_clock_now(); | |
2198 | } | |
2199 | osdmap.apply_incremental(range_blocklist_inc); | |
2200 | ||
2201 | for (const auto& a: ip_addrs) { | |
2202 | entity_addr_t addr; | |
2203 | addr.parse(a); | |
2204 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2205 | bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); | |
2206 | if (!blocklisted) { | |
2207 | cout << "erroneously not blocklisted " << addr << std::endl; | |
2208 | } | |
2209 | ASSERT_TRUE(blocklisted); | |
2210 | } | |
2211 | for (const auto& a: unblocked_ip_addrs) { | |
2212 | entity_addr_t addr; | |
2213 | addr.parse(a); | |
2214 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2215 | bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); | |
2216 | if (blocklisted) { | |
2217 | cout << "erroneously blocklisted " << addr << std::endl; | |
2218 | } | |
2219 | EXPECT_FALSE(blocklisted); | |
2220 | } | |
2221 | ||
2222 | OSDMap::Incremental rm_range_blocklist(osdmap.get_epoch() + 1); | |
2223 | for (const auto& a : range_addrs) { | |
2224 | entity_addr_t addr; | |
2225 | addr.parse(a); | |
2226 | addr.type = entity_addr_t::TYPE_CIDR; | |
2227 | rm_range_blocklist.old_range_blocklist.push_back(addr); | |
2228 | } | |
2229 | osdmap.apply_incremental(rm_range_blocklist); | |
2230 | ||
2231 | for (const auto& a: ip_addrs) { | |
2232 | entity_addr_t addr; | |
2233 | addr.parse(a); | |
2234 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2235 | ASSERT_FALSE(osdmap.is_blocklisted(addr, g_ceph_context)); | |
2236 | } | |
2237 | for (const auto& a: unblocked_ip_addrs) { | |
2238 | entity_addr_t addr; | |
2239 | addr.parse(a); | |
2240 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2241 | bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); | |
2242 | if (blocklisted) { | |
2243 | cout << "erroneously blocklisted " << addr << std::endl; | |
2244 | } | |
2245 | EXPECT_FALSE(blocklisted); | |
2246 | } | |
2247 | } | |
2248 | ||
2249 | TEST_F(OSDMapTest, blocklisting_everything) { | |
2250 | set_up_map(6); //whatever | |
2251 | OSDMap::Incremental range_blocklist_inc(osdmap.get_epoch() + 1); | |
2252 | entity_addr_t baddr; | |
2253 | baddr.parse("2001:db8::/0"); | |
2254 | baddr.type = entity_addr_t::TYPE_CIDR; | |
2255 | range_blocklist_inc.new_range_blocklist[baddr] = ceph_clock_now(); | |
2256 | osdmap.apply_incremental(range_blocklist_inc); | |
2257 | ||
2258 | for (const auto& a: ip_addrs) { | |
2259 | entity_addr_t addr; | |
2260 | addr.parse(a); | |
2261 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2262 | if (addr.is_ipv4()) continue; | |
2263 | bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); | |
2264 | if (!blocklisted) { | |
2265 | cout << "erroneously not blocklisted " << addr << std::endl; | |
2266 | } | |
2267 | ASSERT_TRUE(blocklisted); | |
2268 | } | |
2269 | for (const auto& a: unblocked_ip_addrs) { | |
2270 | entity_addr_t addr; | |
2271 | addr.parse(a); | |
2272 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2273 | if (addr.is_ipv4()) continue; | |
2274 | bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); | |
2275 | if (!blocklisted) { | |
2276 | cout << "erroneously not blocklisted " << addr << std::endl; | |
2277 | } | |
2278 | ASSERT_TRUE(blocklisted); | |
2279 | } | |
2280 | ||
2281 | OSDMap::Incremental swap_blocklist_inc(osdmap.get_epoch()+1); | |
2282 | swap_blocklist_inc.old_range_blocklist.push_back(baddr); | |
2283 | ||
2284 | entity_addr_t caddr; | |
2285 | caddr.parse("1.1.1.1/0"); | |
2286 | caddr.type = entity_addr_t::TYPE_CIDR; | |
2287 | swap_blocklist_inc.new_range_blocklist[caddr] = ceph_clock_now(); | |
2288 | osdmap.apply_incremental(swap_blocklist_inc); | |
2289 | ||
2290 | for (const auto& a: ip_addrs) { | |
2291 | entity_addr_t addr; | |
2292 | addr.parse(a); | |
2293 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2294 | if (!addr.is_ipv4()) continue; | |
2295 | bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); | |
2296 | if (!blocklisted) { | |
2297 | cout << "erroneously not blocklisted " << addr << std::endl; | |
2298 | } | |
2299 | ASSERT_TRUE(blocklisted); | |
2300 | } | |
2301 | for (const auto& a: unblocked_ip_addrs) { | |
2302 | entity_addr_t addr; | |
2303 | addr.parse(a); | |
2304 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
2305 | if (!addr.is_ipv4()) continue; | |
2306 | bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); | |
2307 | if (!blocklisted) { | |
2308 | cout << "erroneously not blocklisted " << addr << std::endl; | |
2309 | } | |
2310 | ASSERT_TRUE(blocklisted); | |
2311 | } | |
2312 | } | |
2313 | ||
1e59de90 TL |
2314 | TEST_F(OSDMapTest, ReadBalanceScore1) { |
2315 | std::srand ( unsigned ( std::time(0) ) ); | |
2316 | uint osd_rand = rand() % 13; | |
2317 | set_up_map(6 + osd_rand); //whatever | |
2318 | auto pools = osdmap.get_pools(); | |
2319 | for (auto &[pid, pg_pool] : pools) { | |
2320 | const pg_pool_t *pi = osdmap.get_pg_pool(pid); | |
2321 | if (pi->is_replicated()) { | |
2322 | //cout << "pool " << pid << " " << pg_pool << std::endl; | |
2323 | auto replica_count = pi->get_size(); | |
2324 | OSDMap::read_balance_info_t rbi; | |
2325 | auto rc = osdmap.calc_read_balance_score(g_ceph_context, pid, &rbi); | |
2326 | ||
2327 | // "Normal" score is between 1 and num_osds | |
2328 | ASSERT_TRUE(rc == 0); | |
2329 | ASSERT_TRUE(score_in_range(rbi.adjusted_score)); | |
2330 | ASSERT_TRUE(score_in_range(rbi.acting_adj_score)); | |
2331 | ASSERT_TRUE(rbi.err_msg.empty()); | |
2332 | ||
2333 | // When all OSDs have primary_affinity 0, score should be 0 | |
2334 | auto num_osds = get_num_osds(); | |
2335 | set_primary_affinity_all(0.); | |
2336 | ||
2337 | rc = osdmap.calc_read_balance_score(g_ceph_context, pid, &rbi); | |
2338 | ASSERT_TRUE(rc < 0); | |
2339 | ASSERT_TRUE(rbi.adjusted_score == 0.); | |
2340 | ASSERT_TRUE(rbi.acting_adj_score == 0.); | |
2341 | ASSERT_FALSE(rbi.err_msg.empty()); | |
2342 | ||
2343 | std::vector<uint> osds; | |
2344 | for (uint i = 0 ; i < num_osds ; i++) { | |
2345 | osds.push_back(i); | |
2346 | } | |
2347 | ||
2348 | // Change primary_affinity of some OSDs to 1 others are 0 | |
2349 | float fratio = 1. / (float)replica_count; | |
2350 | for (int iter = 0 ; iter < 100 ; iter++) { // run the test 100 times | |
2351 | // Create random shuffle of OSDs | |
aee94f69 TL |
2352 | std::random_device seed; |
2353 | std::default_random_engine generator(seed()); | |
2354 | std::shuffle(osds.begin(), osds.end(), generator); | |
1e59de90 TL |
2355 | for (uint i = 0 ; i < num_osds ; i++) { |
2356 | if ((float(i + 1) / float(num_osds)) < fratio) { | |
2357 | ASSERT_TRUE(osds[i] < num_osds); | |
2358 | osdmap.set_primary_affinity(osds[i], CEPH_OSD_MAX_PRIMARY_AFFINITY); | |
2359 | rc = osdmap.calc_read_balance_score(g_ceph_context, pid, &rbi); | |
2360 | ||
2361 | ASSERT_TRUE(rc < 0); | |
2362 | ASSERT_TRUE(rbi.adjusted_score == 0.); | |
2363 | ASSERT_TRUE(rbi.acting_adj_score == 0.); | |
2364 | ASSERT_FALSE(rbi.err_msg.empty()); | |
2365 | } | |
2366 | else { | |
2367 | if (rc < 0) { | |
2368 | ASSERT_TRUE(rbi.adjusted_score == 0.); | |
2369 | ASSERT_TRUE(rbi.acting_adj_score == 0.); | |
2370 | ASSERT_FALSE(rbi.err_msg.empty()); | |
2371 | } | |
2372 | else { | |
2373 | ASSERT_TRUE(score_in_range(rbi.acting_adj_score, i + 1)); | |
2374 | ASSERT_TRUE(rbi.err_msg.empty()); | |
2375 | } | |
2376 | } | |
2377 | } | |
2378 | set_primary_affinity_all(0.); | |
2379 | } | |
2380 | } | |
2381 | } | |
2382 | ||
2383 | } | |
2384 | ||
2385 | TEST_F(OSDMapTest, ReadBalanceScore2) { | |
2386 | std::srand ( unsigned ( std::time(0) ) ); | |
2387 | uint osd_num = 6 + rand() % 13; | |
2388 | set_up_map(osd_num, true); | |
2389 | for (int i = 0 ; i < 100 ; i++) { //running 100 random tests | |
2390 | uint num_pa_osds = 0; | |
2391 | float pa_sum = 0.; | |
2392 | OSDMap::read_balance_info_t rbi; | |
2393 | ||
2394 | // set pa for all osds | |
2395 | for (uint j = 0 ; j < osd_num ; j++) { | |
2396 | uint pa = 1 + rand() % 100; | |
2397 | if (pa > 80) | |
2398 | pa = 100; | |
2399 | if (pa < 20) | |
2400 | pa = 0; | |
2401 | float fpa = (float)pa / 100.; | |
2402 | if (pa > 0) { | |
2403 | num_pa_osds++; | |
2404 | pa_sum += fpa; | |
2405 | } | |
2406 | osdmap.set_primary_affinity(j, int(fpa * CEPH_OSD_MAX_PRIMARY_AFFINITY)); | |
2407 | } | |
2408 | float pa_ratio = pa_sum / (float) osd_num; | |
2409 | ||
2410 | // create a pool with the current osdmap configuration | |
2411 | OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1); | |
2412 | new_pool_inc.new_pool_max = osdmap.get_pool_max(); | |
2413 | new_pool_inc.fsid = osdmap.get_fsid(); | |
2414 | string pool_name = "rep_pool" + stringify(i); | |
2415 | uint64_t new_pid = set_rep_pool(pool_name, new_pool_inc, false); | |
2416 | ASSERT_TRUE(new_pid > 0); | |
2417 | osdmap.apply_incremental(new_pool_inc); | |
2418 | ||
2419 | // now run the test on the pool. | |
2420 | const pg_pool_t *pi = osdmap.get_pg_pool(new_pid); | |
2421 | ASSERT_NE(pi, nullptr); | |
2422 | ASSERT_TRUE(pi->is_replicated()); | |
2423 | float fratio = 1. / (float)pi->get_size(); | |
2424 | auto rc = osdmap.calc_read_balance_score(g_ceph_context, new_pid, &rbi); | |
2425 | if (pa_ratio < fratio) { | |
2426 | ASSERT_TRUE(rc < 0); | |
2427 | ASSERT_FALSE(rbi.err_msg.empty()); | |
2428 | ASSERT_TRUE(rbi.acting_adj_score == 0.); | |
2429 | ASSERT_TRUE(rbi.adjusted_score == 0.); | |
2430 | } | |
2431 | else { | |
2432 | if (rc < 0) { | |
2433 | ASSERT_TRUE(rbi.adjusted_score == 0.); | |
2434 | ASSERT_TRUE(rbi.acting_adj_score == 0.); | |
2435 | ASSERT_FALSE(rbi.err_msg.empty()); | |
2436 | } | |
2437 | else { | |
2438 | if (rbi.err_msg.empty()) { | |
2439 | ASSERT_TRUE(score_in_range(rbi.acting_adj_score, num_pa_osds)); | |
2440 | } | |
2441 | } | |
2442 | } | |
2443 | ||
2444 | } | |
2445 | //TODO add ReadBalanceScore3 - with weighted osds. | |
2446 | ||
2447 | } | |
2448 | ||
2449 | TEST_F(OSDMapTest, read_balance_small_map) { | |
2450 | // Set up a map with 4 OSDs and default pools | |
2451 | set_up_map(4); | |
2452 | ||
2453 | const vector<string> test_cases = {"basic", "prim_affinity"}; | |
2454 | for (const auto & test : test_cases) { | |
2455 | if (test == "prim_affinity") { | |
2456 | // Make osd.0 off-limits for primaries by giving it prim affinity 0 | |
2457 | OSDMap::Incremental pending_inc0(osdmap.get_epoch() + 1); | |
2458 | pending_inc0.new_primary_affinity[0] = 0; | |
2459 | osdmap.apply_incremental(pending_inc0); | |
2460 | ||
2461 | // Ensure osd.0 has no primaries assigned to it | |
2462 | map<uint64_t,set<pg_t>> prim_pgs_by_osd, acting_prims_by_osd; | |
2463 | osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd, &acting_prims_by_osd); | |
2464 | ASSERT_TRUE(prim_pgs_by_osd[0].size() == 0); | |
2465 | ASSERT_TRUE(acting_prims_by_osd[0].size() == 0); | |
2466 | } | |
2467 | ||
2468 | // Make sure capacity is balanced first | |
2469 | set<int64_t> only_pools; | |
2470 | only_pools.insert(my_rep_pool); | |
2471 | OSDMap::Incremental pending_inc(osdmap.get_epoch()+1); | |
2472 | osdmap.calc_pg_upmaps(g_ceph_context, | |
2473 | 0, | |
2474 | 100, | |
2475 | only_pools, | |
2476 | &pending_inc); | |
2477 | osdmap.apply_incremental(pending_inc); | |
2478 | ||
2479 | // Get read balance score before balancing | |
2480 | OSDMap::read_balance_info_t rb_info; | |
2481 | auto rc = osdmap.calc_read_balance_score(g_ceph_context, my_rep_pool, &rb_info); | |
2482 | ASSERT_TRUE(rc >= 0); | |
2483 | float read_balance_score_before = rb_info.adjusted_score; | |
2484 | ||
2485 | // Calculate desired prim distributions to verify later | |
2486 | map<uint64_t,set<pg_t>> prim_pgs_by_osd_2, acting_prims_by_osd_2; | |
2487 | osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_2, &acting_prims_by_osd_2); | |
2488 | vector<uint64_t> osds_to_check; | |
2489 | for (const auto & [osd, pgs] : prim_pgs_by_osd_2) { | |
2490 | osds_to_check.push_back(osd); | |
2491 | } | |
2492 | map<uint64_t,float> desired_prim_dist; | |
2493 | rc = osdmap.calc_desired_primary_distribution(g_ceph_context, my_rep_pool, | |
2494 | osds_to_check, desired_prim_dist); | |
2495 | ASSERT_TRUE(rc >= 0); | |
2496 | ||
2497 | // Balance reads | |
2498 | OSDMap::Incremental pending_inc_2(osdmap.get_epoch()+1); | |
2499 | int num_changes = osdmap.balance_primaries(g_ceph_context, my_rep_pool, &pending_inc_2, osdmap); | |
2500 | osdmap.apply_incremental(pending_inc_2); | |
2501 | ||
2502 | if (test == "prim_affinity") { | |
2503 | // Ensure osd.0 still has no primaries assigned to it | |
2504 | map<uint64_t,set<pg_t>> prim_pgs_by_osd_3, acting_prims_by_osd_3; | |
2505 | osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_3, &acting_prims_by_osd_3); | |
2506 | ASSERT_TRUE(prim_pgs_by_osd_3[0].size() == 0); | |
2507 | ASSERT_TRUE(acting_prims_by_osd_3[0].size() == 0); | |
2508 | } | |
2509 | ||
2510 | // Get read balance score after balancing | |
2511 | rc = osdmap.calc_read_balance_score(g_ceph_context, my_rep_pool, &rb_info); | |
2512 | ASSERT_TRUE(rc >= 0); | |
2513 | float read_balance_score_after = rb_info.adjusted_score; | |
2514 | ||
2515 | // Ensure the score hasn't gotten worse | |
2516 | ASSERT_TRUE(read_balance_score_after <= read_balance_score_before); | |
2517 | ||
2518 | // Check for improvements | |
2519 | if (num_changes > 0) { | |
2520 | ASSERT_TRUE(read_balance_score_after < read_balance_score_before); | |
2521 | ||
2522 | // Check num primaries for each OSD is within range | |
2523 | map<uint64_t,set<pg_t>> prim_pgs_by_osd_4, acting_prims_by_osd_4; | |
2524 | osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_4, &acting_prims_by_osd_4); | |
2525 | for (const auto & [osd, primaries] : prim_pgs_by_osd_4) { | |
2526 | ASSERT_TRUE(primaries.size() >= floor(desired_prim_dist[osd] - 1)); | |
2527 | ASSERT_TRUE(primaries.size() <= ceil(desired_prim_dist[osd] + 1)); | |
2528 | } | |
2529 | } | |
2530 | } | |
2531 | } | |
2532 | ||
2533 | TEST_F(OSDMapTest, read_balance_large_map) { | |
2534 | // Set up a map with 60 OSDs and default pools | |
2535 | set_up_map(60); | |
2536 | ||
2537 | const vector<string> test_cases = {"basic", "prim_affinity"}; | |
2538 | for (const auto & test : test_cases) { | |
2539 | if (test == "prim_affinity") { | |
2540 | // Make osd.0 off-limits for primaries by giving it prim affinity 0 | |
2541 | OSDMap::Incremental pending_inc0(osdmap.get_epoch() + 1); | |
2542 | pending_inc0.new_primary_affinity[0] = 0; | |
2543 | osdmap.apply_incremental(pending_inc0); | |
2544 | ||
2545 | // Ensure osd.0 has no primaries assigned to it | |
2546 | map<uint64_t,set<pg_t>> prim_pgs_by_osd, acting_prims_by_osd; | |
2547 | osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd, &acting_prims_by_osd); | |
2548 | ASSERT_TRUE(prim_pgs_by_osd[0].size() == 0); | |
2549 | ASSERT_TRUE(acting_prims_by_osd[0].size() == 0); | |
2550 | } | |
2551 | ||
2552 | // Make sure capacity is balanced first | |
2553 | set<int64_t> only_pools; | |
2554 | only_pools.insert(my_rep_pool); | |
2555 | OSDMap::Incremental pending_inc(osdmap.get_epoch()+1); | |
2556 | osdmap.calc_pg_upmaps(g_ceph_context, | |
2557 | 0, | |
2558 | 100, | |
2559 | only_pools, | |
2560 | &pending_inc); | |
2561 | osdmap.apply_incremental(pending_inc); | |
2562 | ||
2563 | // Get read balance score before balancing | |
2564 | OSDMap::read_balance_info_t rb_info; | |
2565 | auto rc = osdmap.calc_read_balance_score(g_ceph_context, my_rep_pool, &rb_info); | |
2566 | ASSERT_TRUE(rc >= 0); | |
2567 | float read_balance_score_before = rb_info.adjusted_score; | |
2568 | ||
2569 | // Calculate desired prim distributions to verify later | |
2570 | map<uint64_t,set<pg_t>> prim_pgs_by_osd_2, acting_prims_by_osd_2; | |
2571 | osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_2, &acting_prims_by_osd_2); | |
2572 | vector<uint64_t> osds_to_check; | |
2573 | for (auto [osd, pgs] : prim_pgs_by_osd_2) { | |
2574 | osds_to_check.push_back(osd); | |
2575 | } | |
2576 | map<uint64_t,float> desired_prim_dist; | |
2577 | rc = osdmap.calc_desired_primary_distribution(g_ceph_context, my_rep_pool, | |
2578 | osds_to_check, desired_prim_dist); | |
2579 | ASSERT_TRUE(rc >= 0); | |
2580 | ||
2581 | // Balance reads | |
2582 | OSDMap::Incremental pending_inc_2(osdmap.get_epoch()+1); | |
2583 | int num_changes = osdmap.balance_primaries(g_ceph_context, my_rep_pool, &pending_inc_2, osdmap); | |
2584 | osdmap.apply_incremental(pending_inc_2); | |
2585 | ||
2586 | if (test == "prim_affinity") { | |
2587 | // Ensure osd.0 still has no primaries assigned to it | |
2588 | map<uint64_t,set<pg_t>> prim_pgs_by_osd_3, acting_prims_by_osd_3; | |
2589 | osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_3, &acting_prims_by_osd_3); | |
2590 | ASSERT_TRUE(prim_pgs_by_osd_3[0].size() == 0); | |
2591 | ASSERT_TRUE(acting_prims_by_osd_3[0].size() == 0); | |
2592 | } | |
2593 | ||
2594 | // Get read balance score after balancing | |
2595 | rc = osdmap.calc_read_balance_score(g_ceph_context, my_rep_pool, &rb_info); | |
2596 | ASSERT_TRUE(rc >= 0); | |
2597 | float read_balance_score_after = rb_info.adjusted_score; | |
2598 | ||
2599 | // Ensure the score hasn't gotten worse | |
2600 | ASSERT_TRUE(read_balance_score_after <= read_balance_score_before); | |
2601 | ||
2602 | // Check for improvements | |
2603 | if (num_changes > 0) { | |
2604 | ASSERT_TRUE(read_balance_score_after < read_balance_score_before); | |
2605 | ||
2606 | // Check num primaries for each OSD is within range | |
2607 | map<uint64_t,set<pg_t>> prim_pgs_by_osd_4, acting_prims_by_osd_4; | |
2608 | osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_4, &acting_prims_by_osd_4); | |
2609 | for (const auto & [osd, primaries] : prim_pgs_by_osd_4) { | |
2610 | ASSERT_TRUE(primaries.size() >= floor(desired_prim_dist[osd] - 1)); | |
2611 | ASSERT_TRUE(primaries.size() <= ceil(desired_prim_dist[osd] + 1)); | |
2612 | } | |
2613 | } | |
2614 | } | |
2615 | } | |
2616 | ||
2617 | TEST_F(OSDMapTest, read_balance_random_map) { | |
2618 | // Set up map with random number of OSDs | |
2619 | std::srand ( unsigned ( std::time(0) ) ); | |
2620 | uint num_osds = 3 + (rand() % 10); | |
2621 | ASSERT_TRUE(num_osds >= 3); | |
2622 | set_up_map(num_osds); | |
2623 | ||
2624 | const vector<string> test_cases = {"basic", "prim_affinity"}; | |
2625 | for (const auto & test : test_cases) { | |
2626 | uint rand_osd = rand() % num_osds; | |
2627 | if (test == "prim_affinity") { | |
2628 | // Make a random OSD off-limits for primaries by giving it prim affinity 0 | |
2629 | ASSERT_TRUE(rand_osd < num_osds); | |
2630 | OSDMap::Incremental pending_inc0(osdmap.get_epoch() + 1); | |
2631 | pending_inc0.new_primary_affinity[rand_osd] = 0; | |
2632 | osdmap.apply_incremental(pending_inc0); | |
2633 | ||
2634 | // Ensure the random OSD has no primaries assigned to it | |
2635 | map<uint64_t,set<pg_t>> prim_pgs_by_osd, acting_prims_by_osd; | |
2636 | osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd, &acting_prims_by_osd); | |
2637 | ASSERT_TRUE(prim_pgs_by_osd[rand_osd].size() == 0); | |
2638 | ASSERT_TRUE(acting_prims_by_osd[rand_osd].size() == 0); | |
2639 | } | |
2640 | ||
2641 | // Make sure capacity is balanced first | |
2642 | set<int64_t> only_pools; | |
2643 | only_pools.insert(my_rep_pool); | |
2644 | OSDMap::Incremental pending_inc(osdmap.get_epoch()+1); | |
2645 | osdmap.calc_pg_upmaps(g_ceph_context, | |
2646 | 0, | |
2647 | 100, | |
2648 | only_pools, | |
2649 | &pending_inc); | |
2650 | osdmap.apply_incremental(pending_inc); | |
2651 | ||
2652 | // Get read balance score before balancing | |
2653 | OSDMap::read_balance_info_t rb_info; | |
2654 | auto rc = osdmap.calc_read_balance_score(g_ceph_context, my_rep_pool, &rb_info); | |
2655 | ASSERT_TRUE(rc >= 0); | |
2656 | float read_balance_score_before = rb_info.adjusted_score; | |
2657 | ||
2658 | // Calculate desired prim distributions to verify later | |
2659 | map<uint64_t,set<pg_t>> prim_pgs_by_osd_2, acting_prims_by_osd_2; | |
2660 | osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_2, &acting_prims_by_osd_2); | |
2661 | vector<uint64_t> osds_to_check; | |
2662 | for (const auto & [osd, pgs] : prim_pgs_by_osd_2) { | |
2663 | osds_to_check.push_back(osd); | |
2664 | } | |
2665 | map<uint64_t,float> desired_prim_dist; | |
2666 | rc = osdmap.calc_desired_primary_distribution(g_ceph_context, my_rep_pool, | |
2667 | osds_to_check, desired_prim_dist); | |
2668 | ASSERT_TRUE(rc >= 0); | |
2669 | ||
2670 | // Balance reads | |
2671 | OSDMap::Incremental pending_inc_2(osdmap.get_epoch()+1); | |
2672 | int num_changes = osdmap.balance_primaries(g_ceph_context, my_rep_pool, &pending_inc_2, osdmap); | |
2673 | osdmap.apply_incremental(pending_inc_2); | |
2674 | ||
2675 | if (test == "prim_affinity") { | |
2676 | // Ensure the random OSD still has no primaries assigned to it | |
2677 | map<uint64_t,set<pg_t>> prim_pgs_by_osd_3, acting_prims_by_osd_3; | |
2678 | osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_3, &acting_prims_by_osd_3); | |
2679 | ASSERT_TRUE(prim_pgs_by_osd_3[rand_osd].size() == 0); | |
2680 | ASSERT_TRUE(acting_prims_by_osd_3[rand_osd].size() == 0); | |
2681 | } | |
2682 | ||
2683 | // Get read balance score after balancing | |
2684 | rc = osdmap.calc_read_balance_score(g_ceph_context, my_rep_pool, &rb_info); | |
2685 | ASSERT_TRUE(rc >= 0); | |
2686 | float read_balance_score_after = rb_info.adjusted_score; | |
2687 | ||
2688 | // Ensure the score hasn't gotten worse | |
2689 | ASSERT_TRUE(read_balance_score_after <= read_balance_score_before); | |
2690 | ||
2691 | // Check for improvements | |
2692 | if (num_changes > 0) { | |
2693 | ASSERT_TRUE(read_balance_score_after < read_balance_score_before); | |
2694 | ||
2695 | // Check num primaries for each OSD is within range | |
2696 | map<uint64_t,set<pg_t>> prim_pgs_by_osd_4, acting_prims_by_osd_4; | |
2697 | osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_4, &acting_prims_by_osd_4); | |
2698 | for (auto [osd, primaries] : prim_pgs_by_osd_4) { | |
2699 | ASSERT_TRUE(primaries.size() >= floor(desired_prim_dist[osd] - 1)); | |
2700 | ASSERT_TRUE(primaries.size() <= ceil(desired_prim_dist[osd] + 1)); | |
2701 | } | |
2702 | for (auto [osd, primaries] : prim_pgs_by_osd_4) { | |
2703 | ASSERT_TRUE(primaries.size() >= floor(desired_prim_dist[osd] - 1)); | |
2704 | ASSERT_TRUE(primaries.size() <= ceil(desired_prim_dist[osd] + 1)); | |
2705 | } | |
2706 | } | |
2707 | } | |
2708 | } | |
2709 | ||
20effc67 | 2710 | INSTANTIATE_TEST_SUITE_P( |
a4b75251 TL |
2711 | OSDMap, |
2712 | OSDMapTest, | |
2713 | ::testing::Values( | |
2714 | std::make_pair<int, int>(0, 1), // chooseleaf firstn 0 host | |
1e59de90 | 2715 | std::make_pair<int, int>(3, 1), // chooseleaf firstn 3 host |
a4b75251 TL |
2716 | std::make_pair<int, int>(0, 0), // chooseleaf firstn 0 osd |
2717 | std::make_pair<int, int>(3, 0) // chooseleaf firstn 3 osd | |
2718 | ) | |
2719 | ); |