]> git.proxmox.com Git - ceph.git/blob - ceph/src/test/crush/crush.cc
2d87958b383c8d2a0d067d6ee1ae63af02d48677
[ceph.git] / ceph / src / test / crush / crush.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013 Inktank <info@inktank.com>
7 *
8 * LGPL-2.1 (see COPYING-LGPL2.1) or later
9 */
10
11 #include <gtest/gtest.h>
12 #include <iostream>
13 #include <memory>
14 #include <set>
15
16 #include "common/ceph_argparse.h"
17 #include "common/common_init.h"
18 #include "include/stringify.h"
19
20 #include "crush/CrushWrapper.h"
21 #include "osd/osd_types.h"
22
23 using namespace std;
24
25 std::unique_ptr<CrushWrapper> build_indep_map(CephContext *cct, int num_rack,
26 int num_host, int num_osd)
27 {
28 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
29 c->create();
30
31 c->set_type_name(5, "root");
32 c->set_type_name(4, "row");
33 c->set_type_name(3, "rack");
34 c->set_type_name(2, "chasis");
35 c->set_type_name(1, "host");
36 c->set_type_name(0, "osd");
37
38 int rootno;
39 c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
40 5, 0, NULL, NULL, &rootno);
41 c->set_item_name(rootno, "default");
42
43 map<string,string> loc;
44 loc["root"] = "default";
45
46 int osd = 0;
47 for (int r=0; r<num_rack; ++r) {
48 loc["rack"] = string("rack-") + stringify(r);
49 for (int h=0; h<num_host; ++h) {
50 loc["host"] = string("host-") + stringify(r) + string("-") + stringify(h);
51 for (int o=0; o<num_osd; ++o, ++osd) {
52 c->insert_item(cct, osd, 1.0, string("osd.") + stringify(osd), loc);
53 }
54 }
55 }
56 int ret;
57 int ruleno = 0;
58 ret = c->add_rule(ruleno, 4, 123);
59 ceph_assert(ret == ruleno);
60 ret = c->set_rule_step(ruleno, 0, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 10, 0);
61 ceph_assert(ret == 0);
62 ret = c->set_rule_step(ruleno, 1, CRUSH_RULE_TAKE, rootno, 0);
63 ceph_assert(ret == 0);
64 ret = c->set_rule_step(ruleno, 2, CRUSH_RULE_CHOOSELEAF_INDEP, CRUSH_CHOOSE_N, 1);
65 ceph_assert(ret == 0);
66 ret = c->set_rule_step(ruleno, 3, CRUSH_RULE_EMIT, 0, 0);
67 ceph_assert(ret == 0);
68 c->set_rule_name(ruleno, "data");
69
70 c->finalize();
71
72 if (false) {
73 Formatter *f = Formatter::create("json-pretty");
74 f->open_object_section("crush_map");
75 c->dump(f);
76 f->close_section();
77 f->flush(cout);
78 delete f;
79 }
80
81 return c;
82 }
83
84 int get_num_dups(const vector<int>& v)
85 {
86 std::set<int> s;
87 int dups = 0;
88 for (auto n : v) {
89 if (s.count(n))
90 ++dups;
91 else if (n != CRUSH_ITEM_NONE)
92 s.insert(n);
93 }
94 return dups;
95 }
96
97 class CRUSHTest : public ::testing::Test
98 {
99 public:
100 void SetUp() final
101 {
102 CephInitParameters params(CEPH_ENTITY_TYPE_CLIENT);
103 cct = common_preinit(params, CODE_ENVIRONMENT_UTILITY,
104 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
105 }
106 void TearDown() final
107 {
108 cct->put();
109 cct = nullptr;
110 }
111 protected:
112 CephContext *cct = nullptr;
113 };
114
115 TEST_F(CRUSHTest, indep_toosmall) {
116 std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 1, 3, 1));
117 vector<__u32> weight(c->get_max_devices(), 0x10000);
118 c->dump_tree(&cout, NULL);
119
120 for (int x = 0; x < 100; ++x) {
121 vector<int> out;
122 c->do_rule(0, x, out, 5, weight, 0);
123 cout << x << " -> " << out << std::endl;
124 int num_none = 0;
125 for (unsigned i=0; i<out.size(); ++i) {
126 if (out[i] == CRUSH_ITEM_NONE)
127 num_none++;
128 }
129 ASSERT_EQ(2, num_none);
130 ASSERT_EQ(0, get_num_dups(out));
131 }
132 }
133
134 TEST_F(CRUSHTest, indep_basic) {
135 std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
136 vector<__u32> weight(c->get_max_devices(), 0x10000);
137 c->dump_tree(&cout, NULL);
138
139 for (int x = 0; x < 100; ++x) {
140 vector<int> out;
141 c->do_rule(0, x, out, 5, weight, 0);
142 cout << x << " -> " << out << std::endl;
143 int num_none = 0;
144 for (unsigned i=0; i<out.size(); ++i) {
145 if (out[i] == CRUSH_ITEM_NONE)
146 num_none++;
147 }
148 ASSERT_EQ(0, num_none);
149 ASSERT_EQ(0, get_num_dups(out));
150 }
151 }
152
153 TEST_F(CRUSHTest, indep_out_alt) {
154 std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
155 vector<__u32> weight(c->get_max_devices(), 0x10000);
156
157 // mark a bunch of osds out
158 int num = 3*3*3;
159 for (int i=0; i<num / 2; ++i)
160 weight[i*2] = 0;
161 c->dump_tree(&cout, NULL);
162
163 // need more retries to get 9/9 hosts for x in 0..99
164 c->set_choose_total_tries(100);
165 for (int x = 0; x < 100; ++x) {
166 vector<int> out;
167 c->do_rule(0, x, out, 9, weight, 0);
168 cout << x << " -> " << out << std::endl;
169 int num_none = 0;
170 for (unsigned i=0; i<out.size(); ++i) {
171 if (out[i] == CRUSH_ITEM_NONE)
172 num_none++;
173 }
174 ASSERT_EQ(0, num_none);
175 ASSERT_EQ(0, get_num_dups(out));
176 }
177 }
178
179 TEST_F(CRUSHTest, indep_out_contig) {
180 std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
181 vector<__u32> weight(c->get_max_devices(), 0x10000);
182
183 // mark a bunch of osds out
184 int num = 3*3*3;
185 for (int i=0; i<num / 3; ++i)
186 weight[i] = 0;
187 c->dump_tree(&cout, NULL);
188
189 c->set_choose_total_tries(100);
190 for (int x = 0; x < 100; ++x) {
191 vector<int> out;
192 c->do_rule(0, x, out, 7, weight, 0);
193 cout << x << " -> " << out << std::endl;
194 int num_none = 0;
195 for (unsigned i=0; i<out.size(); ++i) {
196 if (out[i] == CRUSH_ITEM_NONE)
197 num_none++;
198 }
199 ASSERT_EQ(1, num_none);
200 ASSERT_EQ(0, get_num_dups(out));
201 }
202 }
203
204
205 TEST_F(CRUSHTest, indep_out_progressive) {
206 std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
207 c->set_choose_total_tries(100);
208 vector<__u32> tweight(c->get_max_devices(), 0x10000);
209 c->dump_tree(&cout, NULL);
210
211 int tchanged = 0;
212 for (int x = 1; x < 5; ++x) {
213 vector<__u32> weight(c->get_max_devices(), 0x10000);
214
215 std::map<int,unsigned> pos;
216 vector<int> prev;
217 for (unsigned i=0; i<weight.size(); ++i) {
218 vector<int> out;
219 c->do_rule(0, x, out, 7, weight, 0);
220 cout << "(" << i << "/" << weight.size() << " out) "
221 << x << " -> " << out << std::endl;
222 int num_none = 0;
223 for (unsigned k=0; k<out.size(); ++k) {
224 if (out[k] == CRUSH_ITEM_NONE)
225 num_none++;
226 }
227 ASSERT_EQ(0, get_num_dups(out));
228
229 // make sure nothing moved
230 int moved = 0;
231 int changed = 0;
232 for (unsigned j=0; j<out.size(); ++j) {
233 if (i && out[j] != prev[j]) {
234 ++changed;
235 ++tchanged;
236 }
237 if (out[j] == CRUSH_ITEM_NONE) {
238 continue;
239 }
240 if (i && pos.count(out[j])) {
241 // result shouldn't have moved position
242 if (j != pos[out[j]]) {
243 cout << " " << out[j] << " moved from " << pos[out[j]] << " to " << j << std::endl;
244 ++moved;
245 }
246 //ASSERT_EQ(j, pos[out[j]]);
247 }
248 }
249 if (moved || changed)
250 cout << " " << moved << " moved, " << changed << " changed" << std::endl;
251 ASSERT_LE(moved, 1);
252 ASSERT_LE(changed, 3);
253
254 // mark another osd out
255 weight[i] = 0;
256 prev = out;
257 pos.clear();
258 for (unsigned j=0; j<out.size(); ++j) {
259 if (out[j] != CRUSH_ITEM_NONE)
260 pos[out[j]] = j;
261 }
262 }
263 }
264 cout << tchanged << " total changed" << std::endl;
265
266 }
267
268 TEST_F(CRUSHTest, straw_zero) {
269 // zero weight items should have no effect on placement.
270
271 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
272 const int ROOT_TYPE = 1;
273 c->set_type_name(ROOT_TYPE, "root");
274 const int OSD_TYPE = 0;
275 c->set_type_name(OSD_TYPE, "osd");
276
277 int n = 5;
278 int items[n], weights[n];
279 for (int i=0; i <n; ++i) {
280 items[i] = i;
281 weights[i] = 0x10000 * (n-i-1);
282 }
283
284 c->set_max_devices(n);
285
286 string root_name0("root0");
287 int root0;
288 EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
289 ROOT_TYPE, n, items, weights, &root0));
290 EXPECT_EQ(0, c->set_item_name(root0, root_name0));
291
292 string name0("rule0");
293 int rule0 = c->add_simple_rule(name0, root_name0, "osd", "",
294 "firstn", pg_pool_t::TYPE_REPLICATED);
295 EXPECT_EQ(0, rule0);
296
297 string root_name1("root1");
298 int root1;
299 EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
300 ROOT_TYPE, n-1, items, weights, &root1));
301 EXPECT_EQ(0, c->set_item_name(root1, root_name1));
302
303 string name1("rule1");
304 int rule1 = c->add_simple_rule(name1, root_name1, "osd", "",
305 "firstn", pg_pool_t::TYPE_REPLICATED);
306 EXPECT_EQ(1, rule1);
307
308 c->finalize();
309
310 vector<unsigned> reweight(n, 0x10000);
311 for (int i=0; i<10000; ++i) {
312 vector<int> out0, out1;
313 c->do_rule(rule0, i, out0, 1, reweight, 0);
314 ASSERT_EQ(1u, out0.size());
315 c->do_rule(rule1, i, out1, 1, reweight, 0);
316 ASSERT_EQ(1u, out1.size());
317 ASSERT_EQ(out0[0], out1[0]);
318 //cout << i << "\t" << out0 << "\t" << out1 << std::endl;
319 }
320 }
321
322 TEST_F(CRUSHTest, straw_same) {
323 // items with the same weight should map about the same as items
324 // with very similar weights.
325 //
326 // give the 0 vector a paired stair pattern, with dup weights. note
327 // that the original straw flaw does not appear when there are 2 of
328 // the initial weight, but it does when there is just 1.
329 //
330 // give the 1 vector a similar stair pattern, but make the same
331 // steps weights slightly different (no dups). this works.
332 //
333 // compare the result and verify that the resulting mapping is
334 // almost identical.
335
336 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
337 const int ROOT_TYPE = 1;
338 c->set_type_name(ROOT_TYPE, "root");
339 const int OSD_TYPE = 0;
340 c->set_type_name(OSD_TYPE, "osd");
341
342 int n = 10;
343 int items[n], weights[n];
344 for (int i=0; i <n; ++i) {
345 items[i] = i;
346 weights[i] = 0x10000 * ((i+1)/2 + 1);
347 }
348
349 c->set_max_devices(n);
350
351 string root_name0("root0");
352 int root0;
353 EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
354 ROOT_TYPE, n, items, weights, &root0));
355 EXPECT_EQ(0, c->set_item_name(root0, root_name0));
356
357 string name0("rule0");
358 int rule0 = c->add_simple_rule(name0, root_name0, "osd", "",
359 "firstn", pg_pool_t::TYPE_REPLICATED);
360 EXPECT_EQ(0, rule0);
361
362 for (int i=0; i <n; ++i) {
363 items[i] = i;
364 weights[i] = 0x10000 * ((i+1)/2 + 1) + (i%2)*100;
365 }
366
367 string root_name1("root1");
368 int root1;
369 EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
370 ROOT_TYPE, n, items, weights, &root1));
371 EXPECT_EQ(0, c->set_item_name(root1, root_name1));
372
373 string name1("rule1");
374 int rule1 = c->add_simple_rule(name1, root_name1, "osd", "",
375 "firstn", pg_pool_t::TYPE_REPLICATED);
376 EXPECT_EQ(1, rule1);
377
378 if (0) {
379 crush_bucket_straw *sb0 = reinterpret_cast<crush_bucket_straw*>(c->get_crush_map()->buckets[-1-root0]);
380 crush_bucket_straw *sb1 = reinterpret_cast<crush_bucket_straw*>(c->get_crush_map()->buckets[-1-root1]);
381
382 for (int i=0; i<n; ++i) {
383 cout << i
384 << "\t" << sb0->item_weights[i]
385 << "\t" << sb1->item_weights[i]
386 << "\t"
387 << "\t" << sb0->straws[i]
388 << "\t" << sb1->straws[i]
389 << std::endl;
390 }
391 }
392
393 if (0) {
394 JSONFormatter jf(true);
395 jf.open_object_section("crush");
396 c->dump(&jf);
397 jf.close_section();
398 jf.flush(cout);
399 }
400
401 c->finalize();
402
403 vector<int> sum0(n, 0), sum1(n, 0);
404 vector<unsigned> reweight(n, 0x10000);
405 int different = 0;
406 int max = 100000;
407 for (int i=0; i<max; ++i) {
408 vector<int> out0, out1;
409 c->do_rule(rule0, i, out0, 1, reweight, 0);
410 ASSERT_EQ(1u, out0.size());
411 c->do_rule(rule1, i, out1, 1, reweight, 0);
412 ASSERT_EQ(1u, out1.size());
413 sum0[out0[0]]++;
414 sum1[out1[0]]++;
415 if (out0[0] != out1[0])
416 different++;
417 }
418 for (int i=0; i<n; ++i) {
419 cout << i
420 << "\t" << ((double)weights[i] / (double)weights[0])
421 << "\t" << sum0[i] << "\t" << ((double)sum0[i]/(double)sum0[0])
422 << "\t" << sum1[i] << "\t" << ((double)sum1[i]/(double)sum1[0])
423 << std::endl;
424 }
425 double ratio = ((double)different / (double)max);
426 cout << different << " of " << max << " = "
427 << ratio
428 << " different" << std::endl;
429 ASSERT_LT(ratio, .001);
430 }
431
432 double calc_straw2_stddev(int *weights, int n, bool verbose)
433 {
434 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
435 const int ROOT_TYPE = 2;
436 c->set_type_name(ROOT_TYPE, "root");
437 const int HOST_TYPE = 1;
438 c->set_type_name(HOST_TYPE, "host");
439 const int OSD_TYPE = 0;
440 c->set_type_name(OSD_TYPE, "osd");
441
442 int items[n];
443 for (int i=0; i <n; ++i) {
444 items[i] = i;
445 }
446
447 c->set_max_devices(n);
448
449 string root_name0("root0");
450 int root0;
451 crush_bucket *b0 = crush_make_bucket(c->get_crush_map(),
452 CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1,
453 ROOT_TYPE, n, items, weights);
454 crush_add_bucket(c->get_crush_map(), 0, b0, &root0);
455 c->set_item_name(root0, root_name0);
456
457 string name0("rule0");
458 int rule0 = c->add_simple_rule(name0, root_name0, "osd", "",
459 "firstn", pg_pool_t::TYPE_REPLICATED);
460
461 int sum[n];
462 double totalweight = 0;
463 vector<unsigned> reweight(n);
464 for (int i=0; i<n; ++i) {
465 sum[i] = 0;
466 reweight[i] = 0x10000;
467 totalweight += weights[i];
468 }
469 totalweight /= (double)0x10000;
470 double avgweight = totalweight / n;
471
472 c->finalize();
473
474 int total = 1000000;
475 for (int i=0; i<total; ++i) {
476 vector<int> out;
477 c->do_rule(rule0, i, out, 1, reweight, 0);
478 sum[out[0]]++;
479 }
480
481 double expected = (double)total / (double)n;
482 if (verbose)
483 cout << "expect\t\t\t" << expected << std::endl;
484 double stddev = 0;
485 double exptotal = 0;
486 if (verbose)
487 cout << "osd\tweight\tcount\tadjusted\n";
488 std::streamsize p = cout.precision();
489 cout << std::setprecision(4);
490 for (int i=0; i<n; ++i) {
491 double w = (double)weights[i] / (double)0x10000;
492 double adj = (double)sum[i] * avgweight / w;
493 stddev += (adj - expected) * (adj - expected);
494 exptotal += adj;
495 if (verbose)
496 cout << i
497 << "\t" << w
498 << "\t" << sum[i]
499 << "\t" << (int)adj
500 << std::endl;
501 }
502 cout << std::setprecision(p);
503 {
504 stddev = sqrt(stddev / (double)n);
505 if (verbose)
506 cout << "std dev " << stddev << std::endl;
507
508 double p = 1.0 / (double)n;
509 double estddev = sqrt(exptotal * p * (1.0 - p));
510 if (verbose)
511 cout << " vs " << estddev << "\t(expected)" << std::endl;
512 }
513 return stddev;
514 }
515
516 TEST_F(CRUSHTest, straw2_stddev)
517 {
518 int n = 15;
519 int weights[n];
520 cout << "maxskew\tstddev\n";
521 for (double step = 1.0; step < 2; step += .25) {
522 int w = 0x10000;
523 for (int i = 0; i < n; ++i) {
524 weights[i] = w;
525 w *= step;
526 }
527 double stddev = calc_straw2_stddev(weights, n, true);
528 cout << ((double)weights[n-1]/(double)weights[0])
529 << "\t" << stddev << std::endl;
530 }
531 }
532
533 TEST_F(CRUSHTest, straw2_reweight) {
534 // when we adjust the weight of an item in a straw2 bucket,
535 // we should *only* see movement from or to that item, never
536 // between other items.
537 int weights[] = {
538 0x10000,
539 0x10000,
540 0x20000,
541 0x20000,
542 0x30000,
543 0x50000,
544 0x8000,
545 0x20000,
546 0x10000,
547 0x10000,
548 0x20000,
549 0x10000,
550 0x10000,
551 0x20000,
552 0x300000,
553 0x10000,
554 0x20000
555 };
556 int n = 15;
557
558 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
559 const int ROOT_TYPE = 2;
560 c->set_type_name(ROOT_TYPE, "root");
561 const int HOST_TYPE = 1;
562 c->set_type_name(HOST_TYPE, "host");
563 const int OSD_TYPE = 0;
564 c->set_type_name(OSD_TYPE, "osd");
565
566 int items[n];
567 for (int i=0; i <n; ++i) {
568 items[i] = i;
569 //weights[i] = 0x10000;
570 }
571
572 c->set_max_devices(n);
573
574 string root_name0("root0");
575 int root0;
576 crush_bucket *b0 = crush_make_bucket(c->get_crush_map(),
577 CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1,
578 ROOT_TYPE, n, items, weights);
579 EXPECT_EQ(0, crush_add_bucket(c->get_crush_map(), 0, b0, &root0));
580 EXPECT_EQ(0, c->set_item_name(root0, root_name0));
581
582 string name0("rule0");
583 int rule0 = c->add_simple_rule(name0, root_name0, "osd", "",
584 "firstn", pg_pool_t::TYPE_REPLICATED);
585 EXPECT_EQ(0, rule0);
586
587 int changed = 1;
588 weights[changed] = weights[changed] / 10 * (rand() % 10);
589
590 string root_name1("root1");
591 int root1;
592 crush_bucket *b1 = crush_make_bucket(c->get_crush_map(),
593 CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1,
594 ROOT_TYPE, n, items, weights);
595 EXPECT_EQ(0, crush_add_bucket(c->get_crush_map(), 0, b1, &root1));
596 EXPECT_EQ(0, c->set_item_name(root1, root_name1));
597
598 string name1("rule1");
599 int rule1 = c->add_simple_rule(name1, root_name1, "osd", "",
600 "firstn", pg_pool_t::TYPE_REPLICATED);
601 EXPECT_EQ(1, rule1);
602
603 int sum[n];
604 double totalweight = 0;
605 vector<unsigned> reweight(n);
606 for (int i=0; i<n; ++i) {
607 sum[i] = 0;
608 reweight[i] = 0x10000;
609 totalweight += weights[i];
610 }
611 totalweight /= (double)0x10000;
612 double avgweight = totalweight / n;
613
614 c->finalize();
615
616 int total = 1000000;
617 for (int i=0; i<total; ++i) {
618 vector<int> out0, out1;
619 c->do_rule(rule0, i, out0, 1, reweight, 0);
620 ASSERT_EQ(1u, out0.size());
621
622 c->do_rule(rule1, i, out1, 1, reweight, 0);
623 ASSERT_EQ(1u, out1.size());
624
625 sum[out1[0]]++;
626 //sum[rand()%n]++;
627
628 if (out1[0] == changed) {
629 ASSERT_EQ(changed, out0[0]);
630 } else if (out0[0] != changed) {
631 ASSERT_EQ(out0[0], out1[0]);
632 }
633 }
634
635 double expected = (double)total / (double)n;
636 cout << "expect\t\t\t" << expected << std::endl;
637 double stddev = 0;
638 cout << "osd\tweight\tcount\tadjusted\n";
639 std::streamsize p = cout.precision();
640 cout << std::setprecision(4);
641 for (int i=0; i<n; ++i) {
642 double w = (double)weights[i] / (double)0x10000;
643 double adj = (double)sum[i] * avgweight / w;
644 stddev += (adj - expected) * (adj - expected);
645 cout << i
646 << "\t" << w
647 << "\t" << sum[i]
648 << "\t" << (int)adj
649 << std::endl;
650 }
651 cout << std::setprecision(p);
652 {
653 stddev = sqrt(stddev / (double)n);
654 cout << "std dev " << stddev << std::endl;
655
656 double p = 1.0 / (double)n;
657 double estddev = sqrt((double)total * p * (1.0 - p));
658 cout << " vs " << estddev << std::endl;
659 }
660 }