1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2013 Inktank <info@inktank.com>
8 * LGPL-2.1 (see COPYING-LGPL2.1) or later
11 #include <gtest/gtest.h>
16 #include "common/ceph_argparse.h"
17 #include "common/common_init.h"
18 #include "include/stringify.h"
20 #include "crush/CrushWrapper.h"
21 #include "osd/osd_types.h"
23 std::unique_ptr
<CrushWrapper
> build_indep_map(CephContext
*cct
, int num_rack
,
24 int num_host
, int num_osd
)
26 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
29 c
->set_type_name(5, "root");
30 c
->set_type_name(4, "row");
31 c
->set_type_name(3, "rack");
32 c
->set_type_name(2, "chasis");
33 c
->set_type_name(1, "host");
34 c
->set_type_name(0, "osd");
37 c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
38 5, 0, NULL
, NULL
, &rootno
);
39 c
->set_item_name(rootno
, "default");
41 map
<string
,string
> loc
;
42 loc
["root"] = "default";
45 for (int r
=0; r
<num_rack
; ++r
) {
46 loc
["rack"] = string("rack-") + stringify(r
);
47 for (int h
=0; h
<num_host
; ++h
) {
48 loc
["host"] = string("host-") + stringify(r
) + string("-") + stringify(h
);
49 for (int o
=0; o
<num_osd
; ++o
, ++osd
) {
50 c
->insert_item(cct
, osd
, 1.0, string("osd.") + stringify(osd
), loc
);
56 ret
= c
->add_rule(ruleno
, 4, 123, 1, 20);
57 ceph_assert(ret
== ruleno
);
58 ret
= c
->set_rule_step(ruleno
, 0, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 10, 0);
59 ceph_assert(ret
== 0);
60 ret
= c
->set_rule_step(ruleno
, 1, CRUSH_RULE_TAKE
, rootno
, 0);
61 ceph_assert(ret
== 0);
62 ret
= c
->set_rule_step(ruleno
, 2, CRUSH_RULE_CHOOSELEAF_INDEP
, CRUSH_CHOOSE_N
, 1);
63 ceph_assert(ret
== 0);
64 ret
= c
->set_rule_step(ruleno
, 3, CRUSH_RULE_EMIT
, 0, 0);
65 ceph_assert(ret
== 0);
66 c
->set_rule_name(ruleno
, "data");
71 Formatter
*f
= Formatter::create("json-pretty");
72 f
->open_object_section("crush_map");
82 int get_num_dups(const vector
<int>& v
)
89 else if (n
!= CRUSH_ITEM_NONE
)
95 class CRUSHTest
: public ::testing::Test
100 CephInitParameters
params(CEPH_ENTITY_TYPE_CLIENT
);
101 cct
= common_preinit(params
, CODE_ENVIRONMENT_UTILITY
,
102 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE
);
104 void TearDown() final
110 CephContext
*cct
= nullptr;
113 TEST_F(CRUSHTest
, indep_toosmall
) {
114 std::unique_ptr
<CrushWrapper
> c(build_indep_map(cct
, 1, 3, 1));
115 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
116 c
->dump_tree(&cout
, NULL
);
118 for (int x
= 0; x
< 100; ++x
) {
120 c
->do_rule(0, x
, out
, 5, weight
, 0);
121 cout
<< x
<< " -> " << out
<< std::endl
;
123 for (unsigned i
=0; i
<out
.size(); ++i
) {
124 if (out
[i
] == CRUSH_ITEM_NONE
)
127 ASSERT_EQ(2, num_none
);
128 ASSERT_EQ(0, get_num_dups(out
));
132 TEST_F(CRUSHTest
, indep_basic
) {
133 std::unique_ptr
<CrushWrapper
> c(build_indep_map(cct
, 3, 3, 3));
134 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
135 c
->dump_tree(&cout
, NULL
);
137 for (int x
= 0; x
< 100; ++x
) {
139 c
->do_rule(0, x
, out
, 5, weight
, 0);
140 cout
<< x
<< " -> " << out
<< std::endl
;
142 for (unsigned i
=0; i
<out
.size(); ++i
) {
143 if (out
[i
] == CRUSH_ITEM_NONE
)
146 ASSERT_EQ(0, num_none
);
147 ASSERT_EQ(0, get_num_dups(out
));
151 TEST_F(CRUSHTest
, indep_out_alt
) {
152 std::unique_ptr
<CrushWrapper
> c(build_indep_map(cct
, 3, 3, 3));
153 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
155 // mark a bunch of osds out
157 for (int i
=0; i
<num
/ 2; ++i
)
159 c
->dump_tree(&cout
, NULL
);
161 // need more retries to get 9/9 hosts for x in 0..99
162 c
->set_choose_total_tries(100);
163 for (int x
= 0; x
< 100; ++x
) {
165 c
->do_rule(0, x
, out
, 9, weight
, 0);
166 cout
<< x
<< " -> " << out
<< std::endl
;
168 for (unsigned i
=0; i
<out
.size(); ++i
) {
169 if (out
[i
] == CRUSH_ITEM_NONE
)
172 ASSERT_EQ(0, num_none
);
173 ASSERT_EQ(0, get_num_dups(out
));
177 TEST_F(CRUSHTest
, indep_out_contig
) {
178 std::unique_ptr
<CrushWrapper
> c(build_indep_map(cct
, 3, 3, 3));
179 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
181 // mark a bunch of osds out
183 for (int i
=0; i
<num
/ 3; ++i
)
185 c
->dump_tree(&cout
, NULL
);
187 c
->set_choose_total_tries(100);
188 for (int x
= 0; x
< 100; ++x
) {
190 c
->do_rule(0, x
, out
, 7, weight
, 0);
191 cout
<< x
<< " -> " << out
<< std::endl
;
193 for (unsigned i
=0; i
<out
.size(); ++i
) {
194 if (out
[i
] == CRUSH_ITEM_NONE
)
197 ASSERT_EQ(1, num_none
);
198 ASSERT_EQ(0, get_num_dups(out
));
203 TEST_F(CRUSHTest
, indep_out_progressive
) {
204 std::unique_ptr
<CrushWrapper
> c(build_indep_map(cct
, 3, 3, 3));
205 c
->set_choose_total_tries(100);
206 vector
<__u32
> tweight(c
->get_max_devices(), 0x10000);
207 c
->dump_tree(&cout
, NULL
);
210 for (int x
= 1; x
< 5; ++x
) {
211 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
213 std::map
<int,unsigned> pos
;
215 for (unsigned i
=0; i
<weight
.size(); ++i
) {
217 c
->do_rule(0, x
, out
, 7, weight
, 0);
218 cout
<< "(" << i
<< "/" << weight
.size() << " out) "
219 << x
<< " -> " << out
<< std::endl
;
221 for (unsigned k
=0; k
<out
.size(); ++k
) {
222 if (out
[k
] == CRUSH_ITEM_NONE
)
225 ASSERT_EQ(0, get_num_dups(out
));
227 // make sure nothing moved
230 for (unsigned j
=0; j
<out
.size(); ++j
) {
231 if (i
&& out
[j
] != prev
[j
]) {
235 if (out
[j
] == CRUSH_ITEM_NONE
) {
238 if (i
&& pos
.count(out
[j
])) {
239 // result shouldn't have moved position
240 if (j
!= pos
[out
[j
]]) {
241 cout
<< " " << out
[j
] << " moved from " << pos
[out
[j
]] << " to " << j
<< std::endl
;
244 //ASSERT_EQ(j, pos[out[j]]);
247 if (moved
|| changed
)
248 cout
<< " " << moved
<< " moved, " << changed
<< " changed" << std::endl
;
250 ASSERT_LE(changed
, 3);
252 // mark another osd out
256 for (unsigned j
=0; j
<out
.size(); ++j
) {
257 if (out
[j
] != CRUSH_ITEM_NONE
)
262 cout
<< tchanged
<< " total changed" << std::endl
;
266 TEST_F(CRUSHTest
, straw_zero
) {
267 // zero weight items should have no effect on placement.
269 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
270 const int ROOT_TYPE
= 1;
271 c
->set_type_name(ROOT_TYPE
, "root");
272 const int OSD_TYPE
= 0;
273 c
->set_type_name(OSD_TYPE
, "osd");
276 int items
[n
], weights
[n
];
277 for (int i
=0; i
<n
; ++i
) {
279 weights
[i
] = 0x10000 * (n
-i
-1);
282 c
->set_max_devices(n
);
284 string
root_name0("root0");
286 EXPECT_EQ(0, c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
287 ROOT_TYPE
, n
, items
, weights
, &root0
));
288 EXPECT_EQ(0, c
->set_item_name(root0
, root_name0
));
290 string
name0("rule0");
291 int rule0
= c
->add_simple_rule(name0
, root_name0
, "osd", "",
292 "firstn", pg_pool_t::TYPE_REPLICATED
);
295 string
root_name1("root1");
297 EXPECT_EQ(0, c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
298 ROOT_TYPE
, n
-1, items
, weights
, &root1
));
299 EXPECT_EQ(0, c
->set_item_name(root1
, root_name1
));
301 string
name1("rule1");
302 int rule1
= c
->add_simple_rule(name1
, root_name1
, "osd", "",
303 "firstn", pg_pool_t::TYPE_REPLICATED
);
308 vector
<unsigned> reweight(n
, 0x10000);
309 for (int i
=0; i
<10000; ++i
) {
310 vector
<int> out0
, out1
;
311 c
->do_rule(rule0
, i
, out0
, 1, reweight
, 0);
312 ASSERT_EQ(1u, out0
.size());
313 c
->do_rule(rule1
, i
, out1
, 1, reweight
, 0);
314 ASSERT_EQ(1u, out1
.size());
315 ASSERT_EQ(out0
[0], out1
[0]);
316 //cout << i << "\t" << out0 << "\t" << out1 << std::endl;
320 TEST_F(CRUSHTest
, straw_same
) {
321 // items with the same weight should map about the same as items
322 // with very similar weights.
324 // give the 0 vector a paired stair pattern, with dup weights. note
325 // that the original straw flaw does not appear when there are 2 of
326 // the initial weight, but it does when there is just 1.
328 // give the 1 vector a similar stair pattern, but make the same
329 // steps weights slightly different (no dups). this works.
331 // compare the result and verify that the resulting mapping is
334 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
335 const int ROOT_TYPE
= 1;
336 c
->set_type_name(ROOT_TYPE
, "root");
337 const int OSD_TYPE
= 0;
338 c
->set_type_name(OSD_TYPE
, "osd");
341 int items
[n
], weights
[n
];
342 for (int i
=0; i
<n
; ++i
) {
344 weights
[i
] = 0x10000 * ((i
+1)/2 + 1);
347 c
->set_max_devices(n
);
349 string
root_name0("root0");
351 EXPECT_EQ(0, c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
352 ROOT_TYPE
, n
, items
, weights
, &root0
));
353 EXPECT_EQ(0, c
->set_item_name(root0
, root_name0
));
355 string
name0("rule0");
356 int rule0
= c
->add_simple_rule(name0
, root_name0
, "osd", "",
357 "firstn", pg_pool_t::TYPE_REPLICATED
);
360 for (int i
=0; i
<n
; ++i
) {
362 weights
[i
] = 0x10000 * ((i
+1)/2 + 1) + (i
%2)*100;
365 string
root_name1("root1");
367 EXPECT_EQ(0, c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
368 ROOT_TYPE
, n
, items
, weights
, &root1
));
369 EXPECT_EQ(0, c
->set_item_name(root1
, root_name1
));
371 string
name1("rule1");
372 int rule1
= c
->add_simple_rule(name1
, root_name1
, "osd", "",
373 "firstn", pg_pool_t::TYPE_REPLICATED
);
377 crush_bucket_straw
*sb0
= reinterpret_cast<crush_bucket_straw
*>(c
->get_crush_map()->buckets
[-1-root0
]);
378 crush_bucket_straw
*sb1
= reinterpret_cast<crush_bucket_straw
*>(c
->get_crush_map()->buckets
[-1-root1
]);
380 for (int i
=0; i
<n
; ++i
) {
382 << "\t" << sb0
->item_weights
[i
]
383 << "\t" << sb1
->item_weights
[i
]
385 << "\t" << sb0
->straws
[i
]
386 << "\t" << sb1
->straws
[i
]
392 JSONFormatter
jf(true);
393 jf
.open_object_section("crush");
401 vector
<int> sum0(n
, 0), sum1(n
, 0);
402 vector
<unsigned> reweight(n
, 0x10000);
405 for (int i
=0; i
<max
; ++i
) {
406 vector
<int> out0
, out1
;
407 c
->do_rule(rule0
, i
, out0
, 1, reweight
, 0);
408 ASSERT_EQ(1u, out0
.size());
409 c
->do_rule(rule1
, i
, out1
, 1, reweight
, 0);
410 ASSERT_EQ(1u, out1
.size());
413 if (out0
[0] != out1
[0])
416 for (int i
=0; i
<n
; ++i
) {
418 << "\t" << ((double)weights
[i
] / (double)weights
[0])
419 << "\t" << sum0
[i
] << "\t" << ((double)sum0
[i
]/(double)sum0
[0])
420 << "\t" << sum1
[i
] << "\t" << ((double)sum1
[i
]/(double)sum1
[0])
423 double ratio
= ((double)different
/ (double)max
);
424 cout
<< different
<< " of " << max
<< " = "
426 << " different" << std::endl
;
427 ASSERT_LT(ratio
, .001);
430 double calc_straw2_stddev(int *weights
, int n
, bool verbose
)
432 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
433 const int ROOT_TYPE
= 2;
434 c
->set_type_name(ROOT_TYPE
, "root");
435 const int HOST_TYPE
= 1;
436 c
->set_type_name(HOST_TYPE
, "host");
437 const int OSD_TYPE
= 0;
438 c
->set_type_name(OSD_TYPE
, "osd");
441 for (int i
=0; i
<n
; ++i
) {
445 c
->set_max_devices(n
);
447 string
root_name0("root0");
449 crush_bucket
*b0
= crush_make_bucket(c
->get_crush_map(),
450 CRUSH_BUCKET_STRAW2
, CRUSH_HASH_RJENKINS1
,
451 ROOT_TYPE
, n
, items
, weights
);
452 crush_add_bucket(c
->get_crush_map(), 0, b0
, &root0
);
453 c
->set_item_name(root0
, root_name0
);
455 string
name0("rule0");
456 int rule0
= c
->add_simple_rule(name0
, root_name0
, "osd", "",
457 "firstn", pg_pool_t::TYPE_REPLICATED
);
460 double totalweight
= 0;
461 vector
<unsigned> reweight(n
);
462 for (int i
=0; i
<n
; ++i
) {
464 reweight
[i
] = 0x10000;
465 totalweight
+= weights
[i
];
467 totalweight
/= (double)0x10000;
468 double avgweight
= totalweight
/ n
;
473 for (int i
=0; i
<total
; ++i
) {
475 c
->do_rule(rule0
, i
, out
, 1, reweight
, 0);
479 double expected
= (double)total
/ (double)n
;
481 cout
<< "expect\t\t\t" << expected
<< std::endl
;
485 cout
<< "osd\tweight\tcount\tadjusted\n";
486 std::streamsize p
= cout
.precision();
487 cout
<< std::setprecision(4);
488 for (int i
=0; i
<n
; ++i
) {
489 double w
= (double)weights
[i
] / (double)0x10000;
490 double adj
= (double)sum
[i
] * avgweight
/ w
;
491 stddev
+= (adj
- expected
) * (adj
- expected
);
500 cout
<< std::setprecision(p
);
502 stddev
= sqrt(stddev
/ (double)n
);
504 cout
<< "std dev " << stddev
<< std::endl
;
506 double p
= 1.0 / (double)n
;
507 double estddev
= sqrt(exptotal
* p
* (1.0 - p
));
509 cout
<< " vs " << estddev
<< "\t(expected)" << std::endl
;
514 TEST_F(CRUSHTest
, straw2_stddev
)
518 cout
<< "maxskew\tstddev\n";
519 for (double step
= 1.0; step
< 2; step
+= .25) {
521 for (int i
= 0; i
< n
; ++i
) {
525 double stddev
= calc_straw2_stddev(weights
, n
, true);
526 cout
<< ((double)weights
[n
-1]/(double)weights
[0])
527 << "\t" << stddev
<< std::endl
;
531 TEST_F(CRUSHTest
, straw2_reweight
) {
532 // when we adjust the weight of an item in a straw2 bucket,
533 // we should *only* see movement from or to that item, never
534 // between other items.
556 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
557 const int ROOT_TYPE
= 2;
558 c
->set_type_name(ROOT_TYPE
, "root");
559 const int HOST_TYPE
= 1;
560 c
->set_type_name(HOST_TYPE
, "host");
561 const int OSD_TYPE
= 0;
562 c
->set_type_name(OSD_TYPE
, "osd");
565 for (int i
=0; i
<n
; ++i
) {
567 //weights[i] = 0x10000;
570 c
->set_max_devices(n
);
572 string
root_name0("root0");
574 crush_bucket
*b0
= crush_make_bucket(c
->get_crush_map(),
575 CRUSH_BUCKET_STRAW2
, CRUSH_HASH_RJENKINS1
,
576 ROOT_TYPE
, n
, items
, weights
);
577 EXPECT_EQ(0, crush_add_bucket(c
->get_crush_map(), 0, b0
, &root0
));
578 EXPECT_EQ(0, c
->set_item_name(root0
, root_name0
));
580 string
name0("rule0");
581 int rule0
= c
->add_simple_rule(name0
, root_name0
, "osd", "",
582 "firstn", pg_pool_t::TYPE_REPLICATED
);
586 weights
[changed
] = weights
[changed
] / 10 * (rand() % 10);
588 string
root_name1("root1");
590 crush_bucket
*b1
= crush_make_bucket(c
->get_crush_map(),
591 CRUSH_BUCKET_STRAW2
, CRUSH_HASH_RJENKINS1
,
592 ROOT_TYPE
, n
, items
, weights
);
593 EXPECT_EQ(0, crush_add_bucket(c
->get_crush_map(), 0, b1
, &root1
));
594 EXPECT_EQ(0, c
->set_item_name(root1
, root_name1
));
596 string
name1("rule1");
597 int rule1
= c
->add_simple_rule(name1
, root_name1
, "osd", "",
598 "firstn", pg_pool_t::TYPE_REPLICATED
);
602 double totalweight
= 0;
603 vector
<unsigned> reweight(n
);
604 for (int i
=0; i
<n
; ++i
) {
606 reweight
[i
] = 0x10000;
607 totalweight
+= weights
[i
];
609 totalweight
/= (double)0x10000;
610 double avgweight
= totalweight
/ n
;
615 for (int i
=0; i
<total
; ++i
) {
616 vector
<int> out0
, out1
;
617 c
->do_rule(rule0
, i
, out0
, 1, reweight
, 0);
618 ASSERT_EQ(1u, out0
.size());
620 c
->do_rule(rule1
, i
, out1
, 1, reweight
, 0);
621 ASSERT_EQ(1u, out1
.size());
626 if (out1
[0] == changed
) {
627 ASSERT_EQ(changed
, out0
[0]);
628 } else if (out0
[0] != changed
) {
629 ASSERT_EQ(out0
[0], out1
[0]);
633 double expected
= (double)total
/ (double)n
;
634 cout
<< "expect\t\t\t" << expected
<< std::endl
;
636 cout
<< "osd\tweight\tcount\tadjusted\n";
637 std::streamsize p
= cout
.precision();
638 cout
<< std::setprecision(4);
639 for (int i
=0; i
<n
; ++i
) {
640 double w
= (double)weights
[i
] / (double)0x10000;
641 double adj
= (double)sum
[i
] * avgweight
/ w
;
642 stddev
+= (adj
- expected
) * (adj
- expected
);
649 cout
<< std::setprecision(p
);
651 stddev
= sqrt(stddev
/ (double)n
);
652 cout
<< "std dev " << stddev
<< std::endl
;
654 double p
= 1.0 / (double)n
;
655 double estddev
= sqrt((double)total
* p
* (1.0 - p
));
656 cout
<< " vs " << estddev
<< std::endl
;