]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Osd backfill test | |
3 | """ | |
4 | import logging | |
7c673cae | 5 | import time |
e306af50 | 6 | from tasks import ceph_manager |
7c673cae FG |
7 | from teuthology import misc as teuthology |
8 | ||
9 | ||
10 | log = logging.getLogger(__name__) | |
11 | ||
12 | ||
13 | def rados_start(ctx, remote, cmd): | |
14 | """ | |
15 | Run a remote rados command (currently used to only write data) | |
16 | """ | |
17 | log.info("rados %s" % ' '.join(cmd)) | |
18 | testdir = teuthology.get_testdir(ctx) | |
19 | pre = [ | |
20 | 'adjust-ulimits', | |
21 | 'ceph-coverage', | |
22 | '{tdir}/archive/coverage'.format(tdir=testdir), | |
23 | 'rados', | |
24 | ]; | |
25 | pre.extend(cmd) | |
26 | proc = remote.run( | |
27 | args=pre, | |
28 | wait=False, | |
29 | ) | |
30 | return proc | |
31 | ||
32 | def task(ctx, config): | |
33 | """ | |
34 | Test backfill | |
35 | """ | |
36 | if config is None: | |
37 | config = {} | |
38 | assert isinstance(config, dict), \ | |
39 | 'thrashosds task only accepts a dict for configuration' | |
40 | first_mon = teuthology.get_first_mon(ctx, config) | |
9f95a23c | 41 | (mon,) = ctx.cluster.only(first_mon).remotes.keys() |
7c673cae FG |
42 | |
43 | num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') | |
44 | log.info('num_osds is %s' % num_osds) | |
45 | assert num_osds == 3 | |
46 | ||
47 | manager = ceph_manager.CephManager( | |
48 | mon, | |
49 | ctx=ctx, | |
50 | logger=log.getChild('ceph_manager'), | |
51 | ) | |
52 | ||
53 | while len(manager.get_osd_status()['up']) < 3: | |
54 | time.sleep(10) | |
31f18b77 | 55 | manager.flush_pg_stats([0, 1, 2]) |
7c673cae FG |
56 | manager.wait_for_clean() |
57 | ||
58 | # write some data | |
59 | p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096', | |
60 | '--no-cleanup']) | |
61 | err = p.wait() | |
62 | log.info('err is %d' % err) | |
63 | ||
64 | # mark osd.0 out to trigger a rebalance/backfill | |
65 | manager.mark_out_osd(0) | |
66 | ||
67 | # also mark it down to it won't be included in pg_temps | |
68 | manager.kill_osd(0) | |
69 | manager.mark_down_osd(0) | |
70 | ||
71 | # wait for everything to peer and be happy... | |
31f18b77 | 72 | manager.flush_pg_stats([1, 2]) |
7c673cae FG |
73 | manager.wait_for_recovery() |
74 | ||
75 | # write some new data | |
76 | p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '30', 'write', '-b', '4096', | |
77 | '--no-cleanup']) | |
78 | ||
79 | time.sleep(15) | |
80 | ||
81 | # blackhole + restart osd.1 | |
82 | # this triggers a divergent backfill target | |
83 | manager.blackhole_kill_osd(1) | |
84 | time.sleep(2) | |
85 | manager.revive_osd(1) | |
86 | ||
87 | # wait for our writes to complete + succeed | |
88 | err = p.wait() | |
89 | log.info('err is %d' % err) | |
90 | ||
91 | # wait for osd.1 and osd.2 to be up | |
92 | manager.wait_till_osd_is_up(1) | |
93 | manager.wait_till_osd_is_up(2) | |
94 | ||
95 | # cluster must recover | |
31f18b77 | 96 | manager.flush_pg_stats([1, 2]) |
7c673cae FG |
97 | manager.wait_for_recovery() |
98 | ||
99 | # re-add osd.0 | |
100 | manager.revive_osd(0) | |
31f18b77 | 101 | manager.flush_pg_stats([1, 2]) |
7c673cae FG |
102 | manager.wait_for_clean() |
103 | ||
104 |