1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2013 Inktank <info@inktank.com>
8 * LGPL2.1 (see COPYING-LGPL2.1) or later
13 #include <gtest/gtest.h>
15 #include "include/stringify.h"
17 #include "crush/CrushWrapper.h"
18 #include "osd/osd_types.h"
22 std::unique_ptr
<CrushWrapper
> build_indep_map(CephContext
*cct
, int num_rack
,
23 int num_host
, int num_osd
)
25 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
28 c
->set_type_name(5, "root");
29 c
->set_type_name(4, "row");
30 c
->set_type_name(3, "rack");
31 c
->set_type_name(2, "chasis");
32 c
->set_type_name(1, "host");
33 c
->set_type_name(0, "osd");
36 c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
37 5, 0, NULL
, NULL
, &rootno
);
38 c
->set_item_name(rootno
, "default");
40 map
<string
,string
> loc
;
41 loc
["root"] = "default";
44 for (int r
=0; r
<num_rack
; ++r
) {
45 loc
["rack"] = string("rack-") + stringify(r
);
46 for (int h
=0; h
<num_host
; ++h
) {
47 loc
["host"] = string("host-") + stringify(r
) + string("-") + stringify(h
);
48 for (int o
=0; o
<num_osd
; ++o
, ++osd
) {
49 c
->insert_item(cct
, osd
, 1.0, string("osd.") + stringify(osd
), loc
);
57 ret
= c
->add_rule(4, rule
, 123, 1, 20, ruleno
);
58 assert(ret
== ruleno
);
59 ret
= c
->set_rule_step(ruleno
, 0, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 10, 0);
61 ret
= c
->set_rule_step(ruleno
, 1, CRUSH_RULE_TAKE
, rootno
, 0);
63 ret
= c
->set_rule_step(ruleno
, 2, CRUSH_RULE_CHOOSELEAF_INDEP
, CRUSH_CHOOSE_N
, 1);
65 ret
= c
->set_rule_step(ruleno
, 3, CRUSH_RULE_EMIT
, 0, 0);
67 c
->set_rule_name(ruleno
, "data");
72 Formatter
*f
= Formatter::create("json-pretty");
73 f
->open_object_section("crush_map");
83 int get_num_dups(const vector
<int>& v
)
87 for (unsigned i
=0; i
<v
.size(); ++i
) {
90 else if (v
[i
] != CRUSH_ITEM_NONE
)
96 TEST(CRUSH
, indep_toosmall
) {
97 std::unique_ptr
<CrushWrapper
> c(build_indep_map(g_ceph_context
, 1, 3, 1));
98 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
99 c
->dump_tree(&cout
, NULL
);
101 for (int x
= 0; x
< 100; ++x
) {
103 c
->do_rule(0, x
, out
, 5, weight
, 0);
104 cout
<< x
<< " -> " << out
<< std::endl
;
106 for (unsigned i
=0; i
<out
.size(); ++i
) {
107 if (out
[i
] == CRUSH_ITEM_NONE
)
110 ASSERT_EQ(2, num_none
);
111 ASSERT_EQ(0, get_num_dups(out
));
115 TEST(CRUSH
, indep_basic
) {
116 std::unique_ptr
<CrushWrapper
> c(build_indep_map(g_ceph_context
, 3, 3, 3));
117 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
118 c
->dump_tree(&cout
, NULL
);
120 for (int x
= 0; x
< 100; ++x
) {
122 c
->do_rule(0, x
, out
, 5, weight
, 0);
123 cout
<< x
<< " -> " << out
<< std::endl
;
125 for (unsigned i
=0; i
<out
.size(); ++i
) {
126 if (out
[i
] == CRUSH_ITEM_NONE
)
129 ASSERT_EQ(0, num_none
);
130 ASSERT_EQ(0, get_num_dups(out
));
134 TEST(CRUSH
, indep_out_alt
) {
135 std::unique_ptr
<CrushWrapper
> c(build_indep_map(g_ceph_context
, 3, 3, 3));
136 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
138 // mark a bunch of osds out
140 for (int i
=0; i
<num
/ 2; ++i
)
142 c
->dump_tree(&cout
, NULL
);
144 // need more retries to get 9/9 hosts for x in 0..99
145 c
->set_choose_total_tries(100);
146 for (int x
= 0; x
< 100; ++x
) {
148 c
->do_rule(0, x
, out
, 9, weight
, 0);
149 cout
<< x
<< " -> " << out
<< std::endl
;
151 for (unsigned i
=0; i
<out
.size(); ++i
) {
152 if (out
[i
] == CRUSH_ITEM_NONE
)
155 ASSERT_EQ(0, num_none
);
156 ASSERT_EQ(0, get_num_dups(out
));
160 TEST(CRUSH
, indep_out_contig
) {
161 std::unique_ptr
<CrushWrapper
> c(build_indep_map(g_ceph_context
, 3, 3, 3));
162 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
164 // mark a bunch of osds out
166 for (int i
=0; i
<num
/ 3; ++i
)
168 c
->dump_tree(&cout
, NULL
);
170 c
->set_choose_total_tries(100);
171 for (int x
= 0; x
< 100; ++x
) {
173 c
->do_rule(0, x
, out
, 7, weight
, 0);
174 cout
<< x
<< " -> " << out
<< std::endl
;
176 for (unsigned i
=0; i
<out
.size(); ++i
) {
177 if (out
[i
] == CRUSH_ITEM_NONE
)
180 ASSERT_EQ(1, num_none
);
181 ASSERT_EQ(0, get_num_dups(out
));
186 TEST(CRUSH
, indep_out_progressive
) {
187 std::unique_ptr
<CrushWrapper
> c(build_indep_map(g_ceph_context
, 3, 3, 3));
188 c
->set_choose_total_tries(100);
189 vector
<__u32
> tweight(c
->get_max_devices(), 0x10000);
190 c
->dump_tree(&cout
, NULL
);
193 for (int x
= 1; x
< 5; ++x
) {
194 vector
<__u32
> weight(c
->get_max_devices(), 0x10000);
196 std::map
<int,unsigned> pos
;
198 for (unsigned i
=0; i
<weight
.size(); ++i
) {
200 c
->do_rule(0, x
, out
, 7, weight
, 0);
201 cout
<< "(" << i
<< "/" << weight
.size() << " out) "
202 << x
<< " -> " << out
<< std::endl
;
204 for (unsigned k
=0; k
<out
.size(); ++k
) {
205 if (out
[k
] == CRUSH_ITEM_NONE
)
208 ASSERT_EQ(0, get_num_dups(out
));
210 // make sure nothing moved
213 for (unsigned j
=0; j
<out
.size(); ++j
) {
214 if (i
&& out
[j
] != prev
[j
]) {
218 if (out
[j
] == CRUSH_ITEM_NONE
) {
221 if (i
&& pos
.count(out
[j
])) {
222 // result shouldn't have moved position
223 if (j
!= pos
[out
[j
]]) {
224 cout
<< " " << out
[j
] << " moved from " << pos
[out
[j
]] << " to " << j
<< std::endl
;
227 //ASSERT_EQ(j, pos[out[j]]);
230 if (moved
|| changed
)
231 cout
<< " " << moved
<< " moved, " << changed
<< " changed" << std::endl
;
233 ASSERT_LE(changed
, 3);
235 // mark another osd out
239 for (unsigned j
=0; j
<out
.size(); ++j
) {
240 if (out
[j
] != CRUSH_ITEM_NONE
)
245 cout
<< tchanged
<< " total changed" << std::endl
;
249 TEST(CRUSH
, straw_zero
) {
250 // zero weight items should have no effect on placement.
252 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
253 const int ROOT_TYPE
= 1;
254 c
->set_type_name(ROOT_TYPE
, "root");
255 const int OSD_TYPE
= 0;
256 c
->set_type_name(OSD_TYPE
, "osd");
259 int items
[n
], weights
[n
];
260 for (int i
=0; i
<n
; ++i
) {
262 weights
[i
] = 0x10000 * (n
-i
-1);
265 c
->set_max_devices(n
);
267 string
root_name0("root0");
269 EXPECT_EQ(0, c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
270 ROOT_TYPE
, n
, items
, weights
, &root0
));
271 EXPECT_EQ(0, c
->set_item_name(root0
, root_name0
));
273 string
name0("rule0");
274 int rule0
= c
->add_simple_rule(name0
, root_name0
, "osd", "",
275 "firstn", pg_pool_t::TYPE_REPLICATED
);
278 string
root_name1("root1");
280 EXPECT_EQ(0, c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
281 ROOT_TYPE
, n
-1, items
, weights
, &root1
));
282 EXPECT_EQ(0, c
->set_item_name(root1
, root_name1
));
284 string
name1("rule1");
285 int rule1
= c
->add_simple_rule(name1
, root_name1
, "osd", "",
286 "firstn", pg_pool_t::TYPE_REPLICATED
);
291 vector
<unsigned> reweight(n
, 0x10000);
292 for (int i
=0; i
<10000; ++i
) {
293 vector
<int> out0
, out1
;
294 c
->do_rule(rule0
, i
, out0
, 1, reweight
, 0);
295 ASSERT_EQ(1u, out0
.size());
296 c
->do_rule(rule1
, i
, out1
, 1, reweight
, 0);
297 ASSERT_EQ(1u, out1
.size());
298 ASSERT_EQ(out0
[0], out1
[0]);
299 //cout << i << "\t" << out0 << "\t" << out1 << std::endl;
303 TEST(CRUSH
, straw_same
) {
304 // items with the same weight should map about the same as items
305 // with very similar weights.
307 // give the 0 vector a paired stair pattern, with dup weights. note
308 // that the original straw flaw does not appear when there are 2 of
309 // the initial weight, but it does when there is just 1.
311 // give the 1 vector a similar stair pattern, but make the same
312 // steps weights slightly different (no dups). this works.
314 // compare the result and verify that the resulting mapping is
317 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
318 const int ROOT_TYPE
= 1;
319 c
->set_type_name(ROOT_TYPE
, "root");
320 const int OSD_TYPE
= 0;
321 c
->set_type_name(OSD_TYPE
, "osd");
324 int items
[n
], weights
[n
];
325 for (int i
=0; i
<n
; ++i
) {
327 weights
[i
] = 0x10000 * ((i
+1)/2 + 1);
330 c
->set_max_devices(n
);
332 string
root_name0("root0");
334 EXPECT_EQ(0, c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
335 ROOT_TYPE
, n
, items
, weights
, &root0
));
336 EXPECT_EQ(0, c
->set_item_name(root0
, root_name0
));
338 string
name0("rule0");
339 int rule0
= c
->add_simple_rule(name0
, root_name0
, "osd", "",
340 "firstn", pg_pool_t::TYPE_REPLICATED
);
343 for (int i
=0; i
<n
; ++i
) {
345 weights
[i
] = 0x10000 * ((i
+1)/2 + 1) + (i
%2)*100;
348 string
root_name1("root1");
350 EXPECT_EQ(0, c
->add_bucket(0, CRUSH_BUCKET_STRAW
, CRUSH_HASH_RJENKINS1
,
351 ROOT_TYPE
, n
, items
, weights
, &root1
));
352 EXPECT_EQ(0, c
->set_item_name(root1
, root_name1
));
354 string
name1("rule1");
355 int rule1
= c
->add_simple_rule(name1
, root_name1
, "osd", "",
356 "firstn", pg_pool_t::TYPE_REPLICATED
);
360 crush_bucket_straw
*sb0
= reinterpret_cast<crush_bucket_straw
*>(c
->get_crush_map()->buckets
[-1-root0
]);
361 crush_bucket_straw
*sb1
= reinterpret_cast<crush_bucket_straw
*>(c
->get_crush_map()->buckets
[-1-root1
]);
363 for (int i
=0; i
<n
; ++i
) {
365 << "\t" << sb0
->item_weights
[i
]
366 << "\t" << sb1
->item_weights
[i
]
368 << "\t" << sb0
->straws
[i
]
369 << "\t" << sb1
->straws
[i
]
375 JSONFormatter
jf(true);
376 jf
.open_object_section("crush");
384 vector
<int> sum0(n
, 0), sum1(n
, 0);
385 vector
<unsigned> reweight(n
, 0x10000);
388 for (int i
=0; i
<max
; ++i
) {
389 vector
<int> out0
, out1
;
390 c
->do_rule(rule0
, i
, out0
, 1, reweight
, 0);
391 ASSERT_EQ(1u, out0
.size());
392 c
->do_rule(rule1
, i
, out1
, 1, reweight
, 0);
393 ASSERT_EQ(1u, out1
.size());
396 if (out0
[0] != out1
[0])
399 for (int i
=0; i
<n
; ++i
) {
401 << "\t" << ((double)weights
[i
] / (double)weights
[0])
402 << "\t" << sum0
[i
] << "\t" << ((double)sum0
[i
]/(double)sum0
[0])
403 << "\t" << sum1
[i
] << "\t" << ((double)sum1
[i
]/(double)sum1
[0])
406 double ratio
= ((double)different
/ (double)max
);
407 cout
<< different
<< " of " << max
<< " = "
409 << " different" << std::endl
;
410 ASSERT_LT(ratio
, .001);
413 double calc_straw2_stddev(int *weights
, int n
, bool verbose
)
415 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
416 const int ROOT_TYPE
= 2;
417 c
->set_type_name(ROOT_TYPE
, "root");
418 const int HOST_TYPE
= 1;
419 c
->set_type_name(HOST_TYPE
, "host");
420 const int OSD_TYPE
= 0;
421 c
->set_type_name(OSD_TYPE
, "osd");
424 for (int i
=0; i
<n
; ++i
) {
428 c
->set_max_devices(n
);
430 string
root_name0("root0");
432 crush_bucket
*b0
= crush_make_bucket(c
->get_crush_map(),
433 CRUSH_BUCKET_STRAW2
, CRUSH_HASH_RJENKINS1
,
434 ROOT_TYPE
, n
, items
, weights
);
435 crush_add_bucket(c
->get_crush_map(), 0, b0
, &root0
);
436 c
->set_item_name(root0
, root_name0
);
438 string
name0("rule0");
439 int rule0
= c
->add_simple_rule(name0
, root_name0
, "osd", "",
440 "firstn", pg_pool_t::TYPE_REPLICATED
);
443 double totalweight
= 0;
444 vector
<unsigned> reweight(n
);
445 for (int i
=0; i
<n
; ++i
) {
447 reweight
[i
] = 0x10000;
448 totalweight
+= weights
[i
];
450 totalweight
/= (double)0x10000;
451 double avgweight
= totalweight
/ n
;
456 for (int i
=0; i
<total
; ++i
) {
458 c
->do_rule(rule0
, i
, out
, 1, reweight
, 0);
462 double expected
= (double)total
/ (double)n
;
464 cout
<< "expect\t\t\t" << expected
<< std::endl
;
468 cout
<< "osd\tweight\tcount\tadjusted\n";
469 std::streamsize p
= cout
.precision();
470 cout
<< std::setprecision(4);
471 for (int i
=0; i
<n
; ++i
) {
472 double w
= (double)weights
[i
] / (double)0x10000;
473 double adj
= (double)sum
[i
] * avgweight
/ w
;
474 stddev
+= (adj
- expected
) * (adj
- expected
);
483 cout
<< std::setprecision(p
);
485 stddev
= sqrt(stddev
/ (double)n
);
487 cout
<< "std dev " << stddev
<< std::endl
;
489 double p
= 1.0 / (double)n
;
490 double estddev
= sqrt(exptotal
* p
* (1.0 - p
));
492 cout
<< " vs " << estddev
<< "\t(expected)" << std::endl
;
497 TEST(CRUSH
, straw2_stddev
)
501 cout
<< "maxskew\tstddev\n";
502 for (double step
= 1.0; step
< 2; step
+= .25) {
504 for (int i
= 0; i
< n
; ++i
) {
508 double stddev
= calc_straw2_stddev(weights
, n
, true);
509 cout
<< ((double)weights
[n
-1]/(double)weights
[0])
510 << "\t" << stddev
<< std::endl
;
514 TEST(CRUSH
, straw2_reweight
) {
515 // when we adjust the weight of an item in a straw2 bucket,
516 // we should *only* see movement from or to that item, never
517 // between other items.
539 std::unique_ptr
<CrushWrapper
> c(new CrushWrapper
);
540 const int ROOT_TYPE
= 2;
541 c
->set_type_name(ROOT_TYPE
, "root");
542 const int HOST_TYPE
= 1;
543 c
->set_type_name(HOST_TYPE
, "host");
544 const int OSD_TYPE
= 0;
545 c
->set_type_name(OSD_TYPE
, "osd");
548 for (int i
=0; i
<n
; ++i
) {
550 //weights[i] = 0x10000;
553 c
->set_max_devices(n
);
555 string
root_name0("root0");
557 crush_bucket
*b0
= crush_make_bucket(c
->get_crush_map(),
558 CRUSH_BUCKET_STRAW2
, CRUSH_HASH_RJENKINS1
,
559 ROOT_TYPE
, n
, items
, weights
);
560 EXPECT_EQ(0, crush_add_bucket(c
->get_crush_map(), 0, b0
, &root0
));
561 EXPECT_EQ(0, c
->set_item_name(root0
, root_name0
));
563 string
name0("rule0");
564 int rule0
= c
->add_simple_rule(name0
, root_name0
, "osd", "",
565 "firstn", pg_pool_t::TYPE_REPLICATED
);
569 weights
[changed
] = weights
[changed
] / 10 * (rand() % 10);
571 string
root_name1("root1");
573 crush_bucket
*b1
= crush_make_bucket(c
->get_crush_map(),
574 CRUSH_BUCKET_STRAW2
, CRUSH_HASH_RJENKINS1
,
575 ROOT_TYPE
, n
, items
, weights
);
576 EXPECT_EQ(0, crush_add_bucket(c
->get_crush_map(), 0, b1
, &root1
));
577 EXPECT_EQ(0, c
->set_item_name(root1
, root_name1
));
579 string
name1("rule1");
580 int rule1
= c
->add_simple_rule(name1
, root_name1
, "osd", "",
581 "firstn", pg_pool_t::TYPE_REPLICATED
);
585 double totalweight
= 0;
586 vector
<unsigned> reweight(n
);
587 for (int i
=0; i
<n
; ++i
) {
589 reweight
[i
] = 0x10000;
590 totalweight
+= weights
[i
];
592 totalweight
/= (double)0x10000;
593 double avgweight
= totalweight
/ n
;
598 for (int i
=0; i
<total
; ++i
) {
599 vector
<int> out0
, out1
;
600 c
->do_rule(rule0
, i
, out0
, 1, reweight
, 0);
601 ASSERT_EQ(1u, out0
.size());
603 c
->do_rule(rule1
, i
, out1
, 1, reweight
, 0);
604 ASSERT_EQ(1u, out1
.size());
609 if (out1
[0] == changed
) {
610 ASSERT_EQ(changed
, out0
[0]);
611 } else if (out0
[0] != changed
) {
612 ASSERT_EQ(out0
[0], out1
[0]);
616 double expected
= (double)total
/ (double)n
;
617 cout
<< "expect\t\t\t" << expected
<< std::endl
;
619 cout
<< "osd\tweight\tcount\tadjusted\n";
620 std::streamsize p
= cout
.precision();
621 cout
<< std::setprecision(4);
622 for (int i
=0; i
<n
; ++i
) {
623 double w
= (double)weights
[i
] / (double)0x10000;
624 double adj
= (double)sum
[i
] * avgweight
/ w
;
625 stddev
+= (adj
- expected
) * (adj
- expected
);
632 cout
<< std::setprecision(p
);
634 stddev
= sqrt(stddev
/ (double)n
);
635 cout
<< "std dev " << stddev
<< std::endl
;
637 double p
= 1.0 / (double)n
;
638 double estddev
= sqrt((double)total
* p
* (1.0 - p
));
639 cout
<< " vs " << estddev
<< std::endl
;