]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/recovery_bench.py
2 Recovery system benchmarking
4 from cStringIO
import StringIO
14 from teuthology
import misc
as teuthology
16 log
= logging
.getLogger(__name__
)
18 @contextlib.contextmanager
19 def task(ctx
, config
):
21 Benchmark the recovery system.
23 Generates objects with smalliobench, runs it normally to get a
24 baseline performance measurement, then marks an OSD out and reruns
25 to measure performance during recovery.
27 The config should be as follows:
30 duration: <seconds for each measurement run>
31 num_objects: <number of objects>
32 io_size: <io size in bytes>
45 assert isinstance(config
, dict), \
46 'recovery_bench task only accepts a dict for configuration'
48 log
.info('Beginning recovery bench...')
50 first_mon
= teuthology
.get_first_mon(ctx
, config
)
51 (mon
,) = ctx
.cluster
.only(first_mon
).remotes
.iterkeys()
53 manager
= ceph_manager
.CephManager(
56 logger
=log
.getChild('ceph_manager'),
59 num_osds
= teuthology
.num_instances_of_type(ctx
.cluster
, 'osd')
60 while len(manager
.get_osd_status()['up']) < num_osds
:
63 bench_proc
= RecoveryBencher(
70 log
.info('joining recovery bencher')
73 class RecoveryBencher
:
77 def __init__(self
, manager
, config
):
78 self
.ceph_manager
= manager
79 self
.ceph_manager
.wait_for_clean()
81 osd_status
= self
.ceph_manager
.get_osd_status()
82 self
.osds
= osd_status
['up']
85 if self
.config
is None:
91 Local wrapper to print value.
96 log
.info("spawning thread")
98 self
.thread
= gevent
.spawn(self
.do_bench
)
102 Join the recovery bencher. This is called after the main
111 duration
= self
.config
.get("duration", 60)
112 num_objects
= self
.config
.get("num_objects", 500)
113 io_size
= self
.config
.get("io_size", 4096)
115 osd
= str(random
.choice(self
.osds
))
116 (osd_remote
,) = self
.ceph_manager
.ctx
.cluster
.only('osd.%s' % osd
).remotes
.iterkeys()
118 testdir
= teuthology
.get_testdir(self
.ceph_manager
.ctx
)
125 '{tdir}/archive/coverage'.format(tdir
=testdir
),
126 'smalliobench'.format(tdir
=testdir
),
127 '--use-prefix', 'recovery_bench',
129 '--num-objects', str(num_objects
),
130 '--io-size', str(io_size
),
136 log
.info('non-recovery (baseline)')
141 '{tdir}/archive/coverage'.format(tdir
=testdir
),
143 '--use-prefix', 'recovery_bench',
144 '--do-not-init', '1',
145 '--duration', str(duration
),
146 '--io-size', str(io_size
),
152 self
.process_samples(p
.stderr
.getvalue())
154 self
.ceph_manager
.raw_cluster_cmd('osd', 'out', osd
)
158 log
.info('recovery active')
163 '{tdir}/archive/coverage'.format(tdir
=testdir
),
165 '--use-prefix', 'recovery_bench',
166 '--do-not-init', '1',
167 '--duration', str(duration
),
168 '--io-size', str(io_size
),
174 self
.process_samples(p
.stderr
.getvalue())
176 self
.ceph_manager
.raw_cluster_cmd('osd', 'in', osd
)
178 def process_samples(self
, input):
180 Extract samples from the input and process the results
182 :param input: input lines in JSON format
185 for line
in input.split('\n'):
187 sample
= json
.loads(line
)
188 samples
= lat
.setdefault(sample
['type'], [])
189 samples
.append(float(sample
['latency']))
200 if num
& 1 == 1: # odd number of samples
201 median
= samples
[num
/ 2]
203 median
= (samples
[num
/ 2] + samples
[num
/ 2 - 1]) / 2
206 ninety_nine
= samples
[int(num
* 0.99)]
208 log
.info("%s: median %f, 99%% %f" % (type, median
, ninety_nine
))