1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2013 Inktank <info@inktank.com>
8 * LGPL-2.1 (see COPYING-LGPL2.1) or later
11 #include <gtest/gtest.h>
16 #include "common/ceph_argparse.h"
17 #include "common/common_init.h"
18 #include "include/stringify.h"
20 #include "crush/CrushWrapper.h"
21 #include "osd/osd_types.h"
25 std::unique_ptr
<CrushWrapper
> build_indep_map(CephContext
*cct
, int num_rack
,
26 int num_host
, int num_osd
)
28 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
31 c
->set_type_name(5, "root");
32 c
->set_type_name(4, "row");
33 c
->set_type_name(3, "rack");
34 c
->set_type_name(2, "chasis");
35 c
->set_type_name(1, "host");
36 c
->set_type_name(0, "osd");
39 c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
40 5, 0, NULL
, NULL
, &rootno
);
41 c
->set_item_name(rootno
, "default");
43 map
<string
,string
> loc
;
44 loc
["root"] = "default";
47 for (int r
=0; r
<num_rack
; ++r
) {
48 loc
["rack"] = string("rack-") + stringify(r
);
49 for (int h
=0; h
<num_host
; ++h
) {
50 loc
["host"] = string("host-") + stringify(r
) + string("-") + stringify(h
);
51 for (int o
=0; o
<num_osd
; ++o
, ++osd
) {
52 c
->insert_item(cct
, osd
, 1.0, string("osd.") + stringify(osd
), loc
);
58 ret
= c
->add_rule(ruleno
, 4, 123);
59 ceph_assert(ret
== ruleno
);
60 ret
= c
->set_rule_step(ruleno
, 0, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 10, 0);
61 ceph_assert(ret
== 0);
62 ret
= c
->set_rule_step(ruleno
, 1, CRUSH_RULE_TAKE
, rootno
, 0);
63 ceph_assert(ret
== 0);
64 ret
= c
->set_rule_step(ruleno
, 2, CRUSH_RULE_CHOOSELEAF_INDEP
, CRUSH_CHOOSE_N
, 1);
65 ceph_assert(ret
== 0);
66 ret
= c
->set_rule_step(ruleno
, 3, CRUSH_RULE_EMIT
, 0, 0);
67 ceph_assert(ret
== 0);
68 c
->set_rule_name(ruleno
, "data");
73 Formatter
*f
= Formatter::create("json-pretty");
74 f
->open_object_section("crush_map");
84 int get_num_dups(const vector
<int>& v
)
91 else if (n
!= CRUSH_ITEM_NONE
)
97 class CRUSHTest
: public ::testing::Test
102 CephInitParameters
params(CEPH_ENTITY_TYPE_CLIENT
);
103 cct
= common_preinit(params
, CODE_ENVIRONMENT_UTILITY
,
104 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE
);
106 void TearDown() final
112 CephContext
*cct
= nullptr;
115 TEST_F(CRUSHTest
, indep_toosmall
) {
116 std::unique_ptr
<CrushWrapper
> c(build_indep_map(cct
, 1, 3, 1));
117 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
118 c
->dump_tree(&cout
, NULL
);
120 for (int x
= 0; x
< 100; ++x
) {
122 c
->do_rule(0, x
, out
, 5, weight
, 0);
123 cout
<< x
<< " -> " << out
<< std::endl
;
125 for (unsigned i
=0; i
<out
.size(); ++i
) {
126 if (out
[i
] == CRUSH_ITEM_NONE
)
129 ASSERT_EQ(2, num_none
);
130 ASSERT_EQ(0, get_num_dups(out
));
134 TEST_F(CRUSHTest
, indep_basic
) {
135 std::unique_ptr
<CrushWrapper
> c(build_indep_map(cct
, 3, 3, 3));
136 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
137 c
->dump_tree(&cout
, NULL
);
139 for (int x
= 0; x
< 100; ++x
) {
141 c
->do_rule(0, x
, out
, 5, weight
, 0);
142 cout
<< x
<< " -> " << out
<< std::endl
;
144 for (unsigned i
=0; i
<out
.size(); ++i
) {
145 if (out
[i
] == CRUSH_ITEM_NONE
)
148 ASSERT_EQ(0, num_none
);
149 ASSERT_EQ(0, get_num_dups(out
));
153 TEST_F(CRUSHTest
, indep_out_alt
) {
154 std::unique_ptr
<CrushWrapper
> c(build_indep_map(cct
, 3, 3, 3));
155 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
157 // mark a bunch of osds out
159 for (int i
=0; i
<num
/ 2; ++i
)
161 c
->dump_tree(&cout
, NULL
);
163 // need more retries to get 9/9 hosts for x in 0..99
164 c
->set_choose_total_tries(100);
165 for (int x
= 0; x
< 100; ++x
) {
167 c
->do_rule(0, x
, out
, 9, weight
, 0);
168 cout
<< x
<< " -> " << out
<< std::endl
;
170 for (unsigned i
=0; i
<out
.size(); ++i
) {
171 if (out
[i
] == CRUSH_ITEM_NONE
)
174 ASSERT_EQ(0, num_none
);
175 ASSERT_EQ(0, get_num_dups(out
));
179 TEST_F(CRUSHTest
, indep_out_contig
) {
180 std::unique_ptr
<CrushWrapper
> c(build_indep_map(cct
, 3, 3, 3));
181 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
183 // mark a bunch of osds out
185 for (int i
=0; i
<num
/ 3; ++i
)
187 c
->dump_tree(&cout
, NULL
);
189 c
->set_choose_total_tries(100);
190 for (int x
= 0; x
< 100; ++x
) {
192 c
->do_rule(0, x
, out
, 7, weight
, 0);
193 cout
<< x
<< " -> " << out
<< std::endl
;
195 for (unsigned i
=0; i
<out
.size(); ++i
) {
196 if (out
[i
] == CRUSH_ITEM_NONE
)
199 ASSERT_EQ(1, num_none
);
200 ASSERT_EQ(0, get_num_dups(out
));
205 TEST_F(CRUSHTest
, indep_out_progressive
) {
206 std::unique_ptr
<CrushWrapper
> c(build_indep_map(cct
, 3, 3, 3));
207 c
->set_choose_total_tries(100);
208 vector
<__u32
> tweight(c
->get_max_devices(), 0x10000);
209 c
->dump_tree(&cout
, NULL
);
212 for (int x
= 1; x
< 5; ++x
) {
213 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
215 std::map
<int,unsigned> pos
;
217 for (unsigned i
=0; i
<weight
.size(); ++i
) {
219 c
->do_rule(0, x
, out
, 7, weight
, 0);
220 cout
<< "(" << i
<< "/" << weight
.size() << " out) "
221 << x
<< " -> " << out
<< std::endl
;
223 for (unsigned k
=0; k
<out
.size(); ++k
) {
224 if (out
[k
] == CRUSH_ITEM_NONE
)
227 ASSERT_EQ(0, get_num_dups(out
));
229 // make sure nothing moved
232 for (unsigned j
=0; j
<out
.size(); ++j
) {
233 if (i
&& out
[j
] != prev
[j
]) {
237 if (out
[j
] == CRUSH_ITEM_NONE
) {
240 if (i
&& pos
.count(out
[j
])) {
241 // result shouldn't have moved position
242 if (j
!= pos
[out
[j
]]) {
243 cout
<< " " << out
[j
] << " moved from " << pos
[out
[j
]] << " to " << j
<< std::endl
;
246 //ASSERT_EQ(j, pos[out[j]]);
249 if (moved
|| changed
)
250 cout
<< " " << moved
<< " moved, " << changed
<< " changed" << std::endl
;
252 ASSERT_LE(changed
, 3);
254 // mark another osd out
258 for (unsigned j
=0; j
<out
.size(); ++j
) {
259 if (out
[j
] != CRUSH_ITEM_NONE
)
264 cout
<< tchanged
<< " total changed" << std::endl
;
268 TEST_F(CRUSHTest
, straw_zero
) {
269 // zero weight items should have no effect on placement.
271 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
272 const int ROOT_TYPE
= 1;
273 c
->set_type_name(ROOT_TYPE
, "root");
274 const int OSD_TYPE
= 0;
275 c
->set_type_name(OSD_TYPE
, "osd");
278 int items
[n
], weights
[n
];
279 for (int i
=0; i
<n
; ++i
) {
281 weights
[i
] = 0x10000 * (n
-i
-1);
284 c
->set_max_devices(n
);
286 string
root_name0("root0");
288 EXPECT_EQ(0, c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
289 ROOT_TYPE
, n
, items
, weights
, &root0
));
290 EXPECT_EQ(0, c
->set_item_name(root0
, root_name0
));
292 string
name0("rule0");
293 int rule0
= c
->add_simple_rule(name0
, root_name0
, "osd", "",
294 "firstn", pg_pool_t::TYPE_REPLICATED
);
297 string
root_name1("root1");
299 EXPECT_EQ(0, c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
300 ROOT_TYPE
, n
-1, items
, weights
, &root1
));
301 EXPECT_EQ(0, c
->set_item_name(root1
, root_name1
));
303 string
name1("rule1");
304 int rule1
= c
->add_simple_rule(name1
, root_name1
, "osd", "",
305 "firstn", pg_pool_t::TYPE_REPLICATED
);
310 vector
<unsigned> reweight(n
, 0x10000);
311 for (int i
=0; i
<10000; ++i
) {
312 vector
<int> out0
, out1
;
313 c
->do_rule(rule0
, i
, out0
, 1, reweight
, 0);
314 ASSERT_EQ(1u, out0
.size());
315 c
->do_rule(rule1
, i
, out1
, 1, reweight
, 0);
316 ASSERT_EQ(1u, out1
.size());
317 ASSERT_EQ(out0
[0], out1
[0]);
318 //cout << i << "\t" << out0 << "\t" << out1 << std::endl;
322 TEST_F(CRUSHTest
, straw_same
) {
323 // items with the same weight should map about the same as items
324 // with very similar weights.
326 // give the 0 vector a paired stair pattern, with dup weights. note
327 // that the original straw flaw does not appear when there are 2 of
328 // the initial weight, but it does when there is just 1.
330 // give the 1 vector a similar stair pattern, but make the same
331 // steps weights slightly different (no dups). this works.
333 // compare the result and verify that the resulting mapping is
336 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
337 const int ROOT_TYPE
= 1;
338 c
->set_type_name(ROOT_TYPE
, "root");
339 const int OSD_TYPE
= 0;
340 c
->set_type_name(OSD_TYPE
, "osd");
343 int items
[n
], weights
[n
];
344 for (int i
=0; i
<n
; ++i
) {
346 weights
[i
] = 0x10000 * ((i
+1)/2 + 1);
349 c
->set_max_devices(n
);
351 string
root_name0("root0");
353 EXPECT_EQ(0, c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
354 ROOT_TYPE
, n
, items
, weights
, &root0
));
355 EXPECT_EQ(0, c
->set_item_name(root0
, root_name0
));
357 string
name0("rule0");
358 int rule0
= c
->add_simple_rule(name0
, root_name0
, "osd", "",
359 "firstn", pg_pool_t::TYPE_REPLICATED
);
362 for (int i
=0; i
<n
; ++i
) {
364 weights
[i
] = 0x10000 * ((i
+1)/2 + 1) + (i
%2)*100;
367 string
root_name1("root1");
369 EXPECT_EQ(0, c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
370 ROOT_TYPE
, n
, items
, weights
, &root1
));
371 EXPECT_EQ(0, c
->set_item_name(root1
, root_name1
));
373 string
name1("rule1");
374 int rule1
= c
->add_simple_rule(name1
, root_name1
, "osd", "",
375 "firstn", pg_pool_t::TYPE_REPLICATED
);
379 crush_bucket_straw
*sb0
= reinterpret_cast<crush_bucket_straw
*>(c
->get_crush_map()->buckets
[-1-root0
]);
380 crush_bucket_straw
*sb1
= reinterpret_cast<crush_bucket_straw
*>(c
->get_crush_map()->buckets
[-1-root1
]);
382 for (int i
=0; i
<n
; ++i
) {
384 << "\t" << sb0
->item_weights
[i
]
385 << "\t" << sb1
->item_weights
[i
]
387 << "\t" << sb0
->straws
[i
]
388 << "\t" << sb1
->straws
[i
]
394 JSONFormatter
jf(true);
395 jf
.open_object_section("crush");
403 vector
<int> sum0(n
, 0), sum1(n
, 0);
404 vector
<unsigned> reweight(n
, 0x10000);
407 for (int i
=0; i
<max
; ++i
) {
408 vector
<int> out0
, out1
;
409 c
->do_rule(rule0
, i
, out0
, 1, reweight
, 0);
410 ASSERT_EQ(1u, out0
.size());
411 c
->do_rule(rule1
, i
, out1
, 1, reweight
, 0);
412 ASSERT_EQ(1u, out1
.size());
415 if (out0
[0] != out1
[0])
418 for (int i
=0; i
<n
; ++i
) {
420 << "\t" << ((double)weights
[i
] / (double)weights
[0])
421 << "\t" << sum0
[i
] << "\t" << ((double)sum0
[i
]/(double)sum0
[0])
422 << "\t" << sum1
[i
] << "\t" << ((double)sum1
[i
]/(double)sum1
[0])
425 double ratio
= ((double)different
/ (double)max
);
426 cout
<< different
<< " of " << max
<< " = "
428 << " different" << std::endl
;
429 ASSERT_LT(ratio
, .001);
432 double calc_straw2_stddev(int *weights
, int n
, bool verbose
)
434 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
435 const int ROOT_TYPE
= 2;
436 c
->set_type_name(ROOT_TYPE
, "root");
437 const int HOST_TYPE
= 1;
438 c
->set_type_name(HOST_TYPE
, "host");
439 const int OSD_TYPE
= 0;
440 c
->set_type_name(OSD_TYPE
, "osd");
443 for (int i
=0; i
<n
; ++i
) {
447 c
->set_max_devices(n
);
449 string
root_name0("root0");
451 crush_bucket
*b0
= crush_make_bucket(c
->get_crush_map(),
452 CRUSH_BUCKET_STRAW2
, CRUSH_HASH_RJENKINS1
,
453 ROOT_TYPE
, n
, items
, weights
);
454 crush_add_bucket(c
->get_crush_map(), 0, b0
, &root0
);
455 c
->set_item_name(root0
, root_name0
);
457 string
name0("rule0");
458 int rule0
= c
->add_simple_rule(name0
, root_name0
, "osd", "",
459 "firstn", pg_pool_t::TYPE_REPLICATED
);
462 double totalweight
= 0;
463 vector
<unsigned> reweight(n
);
464 for (int i
=0; i
<n
; ++i
) {
466 reweight
[i
] = 0x10000;
467 totalweight
+= weights
[i
];
469 totalweight
/= (double)0x10000;
470 double avgweight
= totalweight
/ n
;
475 for (int i
=0; i
<total
; ++i
) {
477 c
->do_rule(rule0
, i
, out
, 1, reweight
, 0);
481 double expected
= (double)total
/ (double)n
;
483 cout
<< "expect\t\t\t" << expected
<< std::endl
;
487 cout
<< "osd\tweight\tcount\tadjusted\n";
488 std::streamsize p
= cout
.precision();
489 cout
<< std::setprecision(4);
490 for (int i
=0; i
<n
; ++i
) {
491 double w
= (double)weights
[i
] / (double)0x10000;
492 double adj
= (double)sum
[i
] * avgweight
/ w
;
493 stddev
+= (adj
- expected
) * (adj
- expected
);
502 cout
<< std::setprecision(p
);
504 stddev
= sqrt(stddev
/ (double)n
);
506 cout
<< "std dev " << stddev
<< std::endl
;
508 double p
= 1.0 / (double)n
;
509 double estddev
= sqrt(exptotal
* p
* (1.0 - p
));
511 cout
<< " vs " << estddev
<< "\t(expected)" << std::endl
;
516 TEST_F(CRUSHTest
, straw2_stddev
)
520 cout
<< "maxskew\tstddev\n";
521 for (double step
= 1.0; step
< 2; step
+= .25) {
523 for (int i
= 0; i
< n
; ++i
) {
527 double stddev
= calc_straw2_stddev(weights
, n
, true);
528 cout
<< ((double)weights
[n
-1]/(double)weights
[0])
529 << "\t" << stddev
<< std::endl
;
533 TEST_F(CRUSHTest
, straw2_reweight
) {
534 // when we adjust the weight of an item in a straw2 bucket,
535 // we should *only* see movement from or to that item, never
536 // between other items.
558 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
559 const int ROOT_TYPE
= 2;
560 c
->set_type_name(ROOT_TYPE
, "root");
561 const int HOST_TYPE
= 1;
562 c
->set_type_name(HOST_TYPE
, "host");
563 const int OSD_TYPE
= 0;
564 c
->set_type_name(OSD_TYPE
, "osd");
567 for (int i
=0; i
<n
; ++i
) {
569 //weights[i] = 0x10000;
572 c
->set_max_devices(n
);
574 string
root_name0("root0");
576 crush_bucket
*b0
= crush_make_bucket(c
->get_crush_map(),
577 CRUSH_BUCKET_STRAW2
, CRUSH_HASH_RJENKINS1
,
578 ROOT_TYPE
, n
, items
, weights
);
579 EXPECT_EQ(0, crush_add_bucket(c
->get_crush_map(), 0, b0
, &root0
));
580 EXPECT_EQ(0, c
->set_item_name(root0
, root_name0
));
582 string
name0("rule0");
583 int rule0
= c
->add_simple_rule(name0
, root_name0
, "osd", "",
584 "firstn", pg_pool_t::TYPE_REPLICATED
);
588 weights
[changed
] = weights
[changed
] / 10 * (rand() % 10);
590 string
root_name1("root1");
592 crush_bucket
*b1
= crush_make_bucket(c
->get_crush_map(),
593 CRUSH_BUCKET_STRAW2
, CRUSH_HASH_RJENKINS1
,
594 ROOT_TYPE
, n
, items
, weights
);
595 EXPECT_EQ(0, crush_add_bucket(c
->get_crush_map(), 0, b1
, &root1
));
596 EXPECT_EQ(0, c
->set_item_name(root1
, root_name1
));
598 string
name1("rule1");
599 int rule1
= c
->add_simple_rule(name1
, root_name1
, "osd", "",
600 "firstn", pg_pool_t::TYPE_REPLICATED
);
604 double totalweight
= 0;
605 vector
<unsigned> reweight(n
);
606 for (int i
=0; i
<n
; ++i
) {
608 reweight
[i
] = 0x10000;
609 totalweight
+= weights
[i
];
611 totalweight
/= (double)0x10000;
612 double avgweight
= totalweight
/ n
;
617 for (int i
=0; i
<total
; ++i
) {
618 vector
<int> out0
, out1
;
619 c
->do_rule(rule0
, i
, out0
, 1, reweight
, 0);
620 ASSERT_EQ(1u, out0
.size());
622 c
->do_rule(rule1
, i
, out1
, 1, reweight
, 0);
623 ASSERT_EQ(1u, out1
.size());
628 if (out1
[0] == changed
) {
629 ASSERT_EQ(changed
, out0
[0]);
630 } else if (out0
[0] != changed
) {
631 ASSERT_EQ(out0
[0], out1
[0]);
635 double expected
= (double)total
/ (double)n
;
636 cout
<< "expect\t\t\t" << expected
<< std::endl
;
638 cout
<< "osd\tweight\tcount\tadjusted\n";
639 std::streamsize p
= cout
.precision();
640 cout
<< std::setprecision(4);
641 for (int i
=0; i
<n
; ++i
) {
642 double w
= (double)weights
[i
] / (double)0x10000;
643 double adj
= (double)sum
[i
] * avgweight
/ w
;
644 stddev
+= (adj
- expected
) * (adj
- expected
);
651 cout
<< std::setprecision(p
);
653 stddev
= sqrt(stddev
/ (double)n
);
654 cout
<< "std dev " << stddev
<< std::endl
;
656 double p
= 1.0 / (double)n
;
657 double estddev
= sqrt((double)total
* p
* (1.0 - p
));
658 cout
<< " vs " << estddev
<< std::endl
;