]> git.proxmox.com Git - ceph.git/blob - ceph/src/test/crush/crush.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / test / crush / crush.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013 Inktank <info@inktank.com>
7 *
8 * LGPL-2.1 (see COPYING-LGPL2.1) or later
9 */
10
11 #include <gtest/gtest.h>
12 #include <iostream>
13 #include <memory>
14 #include <set>
15
16 #include "common/ceph_argparse.h"
17 #include "common/common_init.h"
18 #include "include/stringify.h"
19
20 #include "crush/CrushWrapper.h"
21 #include "osd/osd_types.h"
22
23 std::unique_ptr<CrushWrapper> build_indep_map(CephContext *cct, int num_rack,
24 int num_host, int num_osd)
25 {
26 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
27 c->create();
28
29 c->set_type_name(5, "root");
30 c->set_type_name(4, "row");
31 c->set_type_name(3, "rack");
32 c->set_type_name(2, "chasis");
33 c->set_type_name(1, "host");
34 c->set_type_name(0, "osd");
35
36 int rootno;
37 c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
38 5, 0, NULL, NULL, &rootno);
39 c->set_item_name(rootno, "default");
40
41 map<string,string> loc;
42 loc["root"] = "default";
43
44 int osd = 0;
45 for (int r=0; r<num_rack; ++r) {
46 loc["rack"] = string("rack-") + stringify(r);
47 for (int h=0; h<num_host; ++h) {
48 loc["host"] = string("host-") + stringify(r) + string("-") + stringify(h);
49 for (int o=0; o<num_osd; ++o, ++osd) {
50 c->insert_item(cct, osd, 1.0, string("osd.") + stringify(osd), loc);
51 }
52 }
53 }
54 int ret;
55 int ruleno = 0;
56 ret = c->add_rule(ruleno, 4, 123, 1, 20);
57 ceph_assert(ret == ruleno);
58 ret = c->set_rule_step(ruleno, 0, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 10, 0);
59 ceph_assert(ret == 0);
60 ret = c->set_rule_step(ruleno, 1, CRUSH_RULE_TAKE, rootno, 0);
61 ceph_assert(ret == 0);
62 ret = c->set_rule_step(ruleno, 2, CRUSH_RULE_CHOOSELEAF_INDEP, CRUSH_CHOOSE_N, 1);
63 ceph_assert(ret == 0);
64 ret = c->set_rule_step(ruleno, 3, CRUSH_RULE_EMIT, 0, 0);
65 ceph_assert(ret == 0);
66 c->set_rule_name(ruleno, "data");
67
68 c->finalize();
69
70 if (false) {
71 Formatter *f = Formatter::create("json-pretty");
72 f->open_object_section("crush_map");
73 c->dump(f);
74 f->close_section();
75 f->flush(cout);
76 delete f;
77 }
78
79 return c;
80 }
81
82 int get_num_dups(const vector<int>& v)
83 {
84 std::set<int> s;
85 int dups = 0;
86 for (auto n : v) {
87 if (s.count(n))
88 ++dups;
89 else if (n != CRUSH_ITEM_NONE)
90 s.insert(n);
91 }
92 return dups;
93 }
94
95 class CRUSHTest : public ::testing::Test
96 {
97 public:
98 void SetUp() final
99 {
100 CephInitParameters params(CEPH_ENTITY_TYPE_CLIENT);
101 cct = common_preinit(params, CODE_ENVIRONMENT_UTILITY,
102 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
103 }
104 void TearDown() final
105 {
106 cct->put();
107 cct = nullptr;
108 }
109 protected:
110 CephContext *cct = nullptr;
111 };
112
113 TEST_F(CRUSHTest, indep_toosmall) {
114 std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 1, 3, 1));
115 vector<__u32> weight(c->get_max_devices(), 0x10000);
116 c->dump_tree(&cout, NULL);
117
118 for (int x = 0; x < 100; ++x) {
119 vector<int> out;
120 c->do_rule(0, x, out, 5, weight, 0);
121 cout << x << " -> " << out << std::endl;
122 int num_none = 0;
123 for (unsigned i=0; i<out.size(); ++i) {
124 if (out[i] == CRUSH_ITEM_NONE)
125 num_none++;
126 }
127 ASSERT_EQ(2, num_none);
128 ASSERT_EQ(0, get_num_dups(out));
129 }
130 }
131
132 TEST_F(CRUSHTest, indep_basic) {
133 std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
134 vector<__u32> weight(c->get_max_devices(), 0x10000);
135 c->dump_tree(&cout, NULL);
136
137 for (int x = 0; x < 100; ++x) {
138 vector<int> out;
139 c->do_rule(0, x, out, 5, weight, 0);
140 cout << x << " -> " << out << std::endl;
141 int num_none = 0;
142 for (unsigned i=0; i<out.size(); ++i) {
143 if (out[i] == CRUSH_ITEM_NONE)
144 num_none++;
145 }
146 ASSERT_EQ(0, num_none);
147 ASSERT_EQ(0, get_num_dups(out));
148 }
149 }
150
151 TEST_F(CRUSHTest, indep_out_alt) {
152 std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
153 vector<__u32> weight(c->get_max_devices(), 0x10000);
154
155 // mark a bunch of osds out
156 int num = 3*3*3;
157 for (int i=0; i<num / 2; ++i)
158 weight[i*2] = 0;
159 c->dump_tree(&cout, NULL);
160
161 // need more retries to get 9/9 hosts for x in 0..99
162 c->set_choose_total_tries(100);
163 for (int x = 0; x < 100; ++x) {
164 vector<int> out;
165 c->do_rule(0, x, out, 9, weight, 0);
166 cout << x << " -> " << out << std::endl;
167 int num_none = 0;
168 for (unsigned i=0; i<out.size(); ++i) {
169 if (out[i] == CRUSH_ITEM_NONE)
170 num_none++;
171 }
172 ASSERT_EQ(0, num_none);
173 ASSERT_EQ(0, get_num_dups(out));
174 }
175 }
176
177 TEST_F(CRUSHTest, indep_out_contig) {
178 std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
179 vector<__u32> weight(c->get_max_devices(), 0x10000);
180
181 // mark a bunch of osds out
182 int num = 3*3*3;
183 for (int i=0; i<num / 3; ++i)
184 weight[i] = 0;
185 c->dump_tree(&cout, NULL);
186
187 c->set_choose_total_tries(100);
188 for (int x = 0; x < 100; ++x) {
189 vector<int> out;
190 c->do_rule(0, x, out, 7, weight, 0);
191 cout << x << " -> " << out << std::endl;
192 int num_none = 0;
193 for (unsigned i=0; i<out.size(); ++i) {
194 if (out[i] == CRUSH_ITEM_NONE)
195 num_none++;
196 }
197 ASSERT_EQ(1, num_none);
198 ASSERT_EQ(0, get_num_dups(out));
199 }
200 }
201
202
203 TEST_F(CRUSHTest, indep_out_progressive) {
204 std::unique_ptr<CrushWrapper> c(build_indep_map(cct, 3, 3, 3));
205 c->set_choose_total_tries(100);
206 vector<__u32> tweight(c->get_max_devices(), 0x10000);
207 c->dump_tree(&cout, NULL);
208
209 int tchanged = 0;
210 for (int x = 1; x < 5; ++x) {
211 vector<__u32> weight(c->get_max_devices(), 0x10000);
212
213 std::map<int,unsigned> pos;
214 vector<int> prev;
215 for (unsigned i=0; i<weight.size(); ++i) {
216 vector<int> out;
217 c->do_rule(0, x, out, 7, weight, 0);
218 cout << "(" << i << "/" << weight.size() << " out) "
219 << x << " -> " << out << std::endl;
220 int num_none = 0;
221 for (unsigned k=0; k<out.size(); ++k) {
222 if (out[k] == CRUSH_ITEM_NONE)
223 num_none++;
224 }
225 ASSERT_EQ(0, get_num_dups(out));
226
227 // make sure nothing moved
228 int moved = 0;
229 int changed = 0;
230 for (unsigned j=0; j<out.size(); ++j) {
231 if (i && out[j] != prev[j]) {
232 ++changed;
233 ++tchanged;
234 }
235 if (out[j] == CRUSH_ITEM_NONE) {
236 continue;
237 }
238 if (i && pos.count(out[j])) {
239 // result shouldn't have moved position
240 if (j != pos[out[j]]) {
241 cout << " " << out[j] << " moved from " << pos[out[j]] << " to " << j << std::endl;
242 ++moved;
243 }
244 //ASSERT_EQ(j, pos[out[j]]);
245 }
246 }
247 if (moved || changed)
248 cout << " " << moved << " moved, " << changed << " changed" << std::endl;
249 ASSERT_LE(moved, 1);
250 ASSERT_LE(changed, 3);
251
252 // mark another osd out
253 weight[i] = 0;
254 prev = out;
255 pos.clear();
256 for (unsigned j=0; j<out.size(); ++j) {
257 if (out[j] != CRUSH_ITEM_NONE)
258 pos[out[j]] = j;
259 }
260 }
261 }
262 cout << tchanged << " total changed" << std::endl;
263
264 }
265
266 TEST_F(CRUSHTest, straw_zero) {
267 // zero weight items should have no effect on placement.
268
269 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
270 const int ROOT_TYPE = 1;
271 c->set_type_name(ROOT_TYPE, "root");
272 const int OSD_TYPE = 0;
273 c->set_type_name(OSD_TYPE, "osd");
274
275 int n = 5;
276 int items[n], weights[n];
277 for (int i=0; i <n; ++i) {
278 items[i] = i;
279 weights[i] = 0x10000 * (n-i-1);
280 }
281
282 c->set_max_devices(n);
283
284 string root_name0("root0");
285 int root0;
286 EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
287 ROOT_TYPE, n, items, weights, &root0));
288 EXPECT_EQ(0, c->set_item_name(root0, root_name0));
289
290 string name0("rule0");
291 int rule0 = c->add_simple_rule(name0, root_name0, "osd", "",
292 "firstn", pg_pool_t::TYPE_REPLICATED);
293 EXPECT_EQ(0, rule0);
294
295 string root_name1("root1");
296 int root1;
297 EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
298 ROOT_TYPE, n-1, items, weights, &root1));
299 EXPECT_EQ(0, c->set_item_name(root1, root_name1));
300
301 string name1("rule1");
302 int rule1 = c->add_simple_rule(name1, root_name1, "osd", "",
303 "firstn", pg_pool_t::TYPE_REPLICATED);
304 EXPECT_EQ(1, rule1);
305
306 c->finalize();
307
308 vector<unsigned> reweight(n, 0x10000);
309 for (int i=0; i<10000; ++i) {
310 vector<int> out0, out1;
311 c->do_rule(rule0, i, out0, 1, reweight, 0);
312 ASSERT_EQ(1u, out0.size());
313 c->do_rule(rule1, i, out1, 1, reweight, 0);
314 ASSERT_EQ(1u, out1.size());
315 ASSERT_EQ(out0[0], out1[0]);
316 //cout << i << "\t" << out0 << "\t" << out1 << std::endl;
317 }
318 }
319
320 TEST_F(CRUSHTest, straw_same) {
321 // items with the same weight should map about the same as items
322 // with very similar weights.
323 //
324 // give the 0 vector a paired stair pattern, with dup weights. note
325 // that the original straw flaw does not appear when there are 2 of
326 // the initial weight, but it does when there is just 1.
327 //
328 // give the 1 vector a similar stair pattern, but make the same
329 // steps weights slightly different (no dups). this works.
330 //
331 // compare the result and verify that the resulting mapping is
332 // almost identical.
333
334 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
335 const int ROOT_TYPE = 1;
336 c->set_type_name(ROOT_TYPE, "root");
337 const int OSD_TYPE = 0;
338 c->set_type_name(OSD_TYPE, "osd");
339
340 int n = 10;
341 int items[n], weights[n];
342 for (int i=0; i <n; ++i) {
343 items[i] = i;
344 weights[i] = 0x10000 * ((i+1)/2 + 1);
345 }
346
347 c->set_max_devices(n);
348
349 string root_name0("root0");
350 int root0;
351 EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
352 ROOT_TYPE, n, items, weights, &root0));
353 EXPECT_EQ(0, c->set_item_name(root0, root_name0));
354
355 string name0("rule0");
356 int rule0 = c->add_simple_rule(name0, root_name0, "osd", "",
357 "firstn", pg_pool_t::TYPE_REPLICATED);
358 EXPECT_EQ(0, rule0);
359
360 for (int i=0; i <n; ++i) {
361 items[i] = i;
362 weights[i] = 0x10000 * ((i+1)/2 + 1) + (i%2)*100;
363 }
364
365 string root_name1("root1");
366 int root1;
367 EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
368 ROOT_TYPE, n, items, weights, &root1));
369 EXPECT_EQ(0, c->set_item_name(root1, root_name1));
370
371 string name1("rule1");
372 int rule1 = c->add_simple_rule(name1, root_name1, "osd", "",
373 "firstn", pg_pool_t::TYPE_REPLICATED);
374 EXPECT_EQ(1, rule1);
375
376 if (0) {
377 crush_bucket_straw *sb0 = reinterpret_cast<crush_bucket_straw*>(c->get_crush_map()->buckets[-1-root0]);
378 crush_bucket_straw *sb1 = reinterpret_cast<crush_bucket_straw*>(c->get_crush_map()->buckets[-1-root1]);
379
380 for (int i=0; i<n; ++i) {
381 cout << i
382 << "\t" << sb0->item_weights[i]
383 << "\t" << sb1->item_weights[i]
384 << "\t"
385 << "\t" << sb0->straws[i]
386 << "\t" << sb1->straws[i]
387 << std::endl;
388 }
389 }
390
391 if (0) {
392 JSONFormatter jf(true);
393 jf.open_object_section("crush");
394 c->dump(&jf);
395 jf.close_section();
396 jf.flush(cout);
397 }
398
399 c->finalize();
400
401 vector<int> sum0(n, 0), sum1(n, 0);
402 vector<unsigned> reweight(n, 0x10000);
403 int different = 0;
404 int max = 100000;
405 for (int i=0; i<max; ++i) {
406 vector<int> out0, out1;
407 c->do_rule(rule0, i, out0, 1, reweight, 0);
408 ASSERT_EQ(1u, out0.size());
409 c->do_rule(rule1, i, out1, 1, reweight, 0);
410 ASSERT_EQ(1u, out1.size());
411 sum0[out0[0]]++;
412 sum1[out1[0]]++;
413 if (out0[0] != out1[0])
414 different++;
415 }
416 for (int i=0; i<n; ++i) {
417 cout << i
418 << "\t" << ((double)weights[i] / (double)weights[0])
419 << "\t" << sum0[i] << "\t" << ((double)sum0[i]/(double)sum0[0])
420 << "\t" << sum1[i] << "\t" << ((double)sum1[i]/(double)sum1[0])
421 << std::endl;
422 }
423 double ratio = ((double)different / (double)max);
424 cout << different << " of " << max << " = "
425 << ratio
426 << " different" << std::endl;
427 ASSERT_LT(ratio, .001);
428 }
429
430 double calc_straw2_stddev(int *weights, int n, bool verbose)
431 {
432 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
433 const int ROOT_TYPE = 2;
434 c->set_type_name(ROOT_TYPE, "root");
435 const int HOST_TYPE = 1;
436 c->set_type_name(HOST_TYPE, "host");
437 const int OSD_TYPE = 0;
438 c->set_type_name(OSD_TYPE, "osd");
439
440 int items[n];
441 for (int i=0; i <n; ++i) {
442 items[i] = i;
443 }
444
445 c->set_max_devices(n);
446
447 string root_name0("root0");
448 int root0;
449 crush_bucket *b0 = crush_make_bucket(c->get_crush_map(),
450 CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1,
451 ROOT_TYPE, n, items, weights);
452 crush_add_bucket(c->get_crush_map(), 0, b0, &root0);
453 c->set_item_name(root0, root_name0);
454
455 string name0("rule0");
456 int rule0 = c->add_simple_rule(name0, root_name0, "osd", "",
457 "firstn", pg_pool_t::TYPE_REPLICATED);
458
459 int sum[n];
460 double totalweight = 0;
461 vector<unsigned> reweight(n);
462 for (int i=0; i<n; ++i) {
463 sum[i] = 0;
464 reweight[i] = 0x10000;
465 totalweight += weights[i];
466 }
467 totalweight /= (double)0x10000;
468 double avgweight = totalweight / n;
469
470 c->finalize();
471
472 int total = 1000000;
473 for (int i=0; i<total; ++i) {
474 vector<int> out;
475 c->do_rule(rule0, i, out, 1, reweight, 0);
476 sum[out[0]]++;
477 }
478
479 double expected = (double)total / (double)n;
480 if (verbose)
481 cout << "expect\t\t\t" << expected << std::endl;
482 double stddev = 0;
483 double exptotal = 0;
484 if (verbose)
485 cout << "osd\tweight\tcount\tadjusted\n";
486 std::streamsize p = cout.precision();
487 cout << std::setprecision(4);
488 for (int i=0; i<n; ++i) {
489 double w = (double)weights[i] / (double)0x10000;
490 double adj = (double)sum[i] * avgweight / w;
491 stddev += (adj - expected) * (adj - expected);
492 exptotal += adj;
493 if (verbose)
494 cout << i
495 << "\t" << w
496 << "\t" << sum[i]
497 << "\t" << (int)adj
498 << std::endl;
499 }
500 cout << std::setprecision(p);
501 {
502 stddev = sqrt(stddev / (double)n);
503 if (verbose)
504 cout << "std dev " << stddev << std::endl;
505
506 double p = 1.0 / (double)n;
507 double estddev = sqrt(exptotal * p * (1.0 - p));
508 if (verbose)
509 cout << " vs " << estddev << "\t(expected)" << std::endl;
510 }
511 return stddev;
512 }
513
514 TEST_F(CRUSHTest, straw2_stddev)
515 {
516 int n = 15;
517 int weights[n];
518 cout << "maxskew\tstddev\n";
519 for (double step = 1.0; step < 2; step += .25) {
520 int w = 0x10000;
521 for (int i = 0; i < n; ++i) {
522 weights[i] = w;
523 w *= step;
524 }
525 double stddev = calc_straw2_stddev(weights, n, true);
526 cout << ((double)weights[n-1]/(double)weights[0])
527 << "\t" << stddev << std::endl;
528 }
529 }
530
531 TEST_F(CRUSHTest, straw2_reweight) {
532 // when we adjust the weight of an item in a straw2 bucket,
533 // we should *only* see movement from or to that item, never
534 // between other items.
535 int weights[] = {
536 0x10000,
537 0x10000,
538 0x20000,
539 0x20000,
540 0x30000,
541 0x50000,
542 0x8000,
543 0x20000,
544 0x10000,
545 0x10000,
546 0x20000,
547 0x10000,
548 0x10000,
549 0x20000,
550 0x300000,
551 0x10000,
552 0x20000
553 };
554 int n = 15;
555
556 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
557 const int ROOT_TYPE = 2;
558 c->set_type_name(ROOT_TYPE, "root");
559 const int HOST_TYPE = 1;
560 c->set_type_name(HOST_TYPE, "host");
561 const int OSD_TYPE = 0;
562 c->set_type_name(OSD_TYPE, "osd");
563
564 int items[n];
565 for (int i=0; i <n; ++i) {
566 items[i] = i;
567 //weights[i] = 0x10000;
568 }
569
570 c->set_max_devices(n);
571
572 string root_name0("root0");
573 int root0;
574 crush_bucket *b0 = crush_make_bucket(c->get_crush_map(),
575 CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1,
576 ROOT_TYPE, n, items, weights);
577 EXPECT_EQ(0, crush_add_bucket(c->get_crush_map(), 0, b0, &root0));
578 EXPECT_EQ(0, c->set_item_name(root0, root_name0));
579
580 string name0("rule0");
581 int rule0 = c->add_simple_rule(name0, root_name0, "osd", "",
582 "firstn", pg_pool_t::TYPE_REPLICATED);
583 EXPECT_EQ(0, rule0);
584
585 int changed = 1;
586 weights[changed] = weights[changed] / 10 * (rand() % 10);
587
588 string root_name1("root1");
589 int root1;
590 crush_bucket *b1 = crush_make_bucket(c->get_crush_map(),
591 CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1,
592 ROOT_TYPE, n, items, weights);
593 EXPECT_EQ(0, crush_add_bucket(c->get_crush_map(), 0, b1, &root1));
594 EXPECT_EQ(0, c->set_item_name(root1, root_name1));
595
596 string name1("rule1");
597 int rule1 = c->add_simple_rule(name1, root_name1, "osd", "",
598 "firstn", pg_pool_t::TYPE_REPLICATED);
599 EXPECT_EQ(1, rule1);
600
601 int sum[n];
602 double totalweight = 0;
603 vector<unsigned> reweight(n);
604 for (int i=0; i<n; ++i) {
605 sum[i] = 0;
606 reweight[i] = 0x10000;
607 totalweight += weights[i];
608 }
609 totalweight /= (double)0x10000;
610 double avgweight = totalweight / n;
611
612 c->finalize();
613
614 int total = 1000000;
615 for (int i=0; i<total; ++i) {
616 vector<int> out0, out1;
617 c->do_rule(rule0, i, out0, 1, reweight, 0);
618 ASSERT_EQ(1u, out0.size());
619
620 c->do_rule(rule1, i, out1, 1, reweight, 0);
621 ASSERT_EQ(1u, out1.size());
622
623 sum[out1[0]]++;
624 //sum[rand()%n]++;
625
626 if (out1[0] == changed) {
627 ASSERT_EQ(changed, out0[0]);
628 } else if (out0[0] != changed) {
629 ASSERT_EQ(out0[0], out1[0]);
630 }
631 }
632
633 double expected = (double)total / (double)n;
634 cout << "expect\t\t\t" << expected << std::endl;
635 double stddev = 0;
636 cout << "osd\tweight\tcount\tadjusted\n";
637 std::streamsize p = cout.precision();
638 cout << std::setprecision(4);
639 for (int i=0; i<n; ++i) {
640 double w = (double)weights[i] / (double)0x10000;
641 double adj = (double)sum[i] * avgweight / w;
642 stddev += (adj - expected) * (adj - expected);
643 cout << i
644 << "\t" << w
645 << "\t" << sum[i]
646 << "\t" << (int)adj
647 << std::endl;
648 }
649 cout << std::setprecision(p);
650 {
651 stddev = sqrt(stddev / (double)n);
652 cout << "std dev " << stddev << std::endl;
653
654 double p = 1.0 / (double)n;
655 double estddev = sqrt((double)total * p * (1.0 - p));
656 cout << " vs " << estddev << std::endl;
657 }
658 }