]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/radosbenchsweep.py
2 Rados benchmarking sweep
9 from itertools
import product
11 from teuthology
.orchestra
import run
12 from teuthology
import misc
as teuthology
15 log
= logging
.getLogger(__name__
)
18 @contextlib.contextmanager
19 def task(ctx
, config
):
21 Execute a radosbench parameter sweep
23 Puts radosbench in a loop, taking values from the given config at each
24 iteration. If given, the min and max values below create a range, e.g.
25 min_replicas=1 and max_replicas=3 implies executing with 1-3 replicas.
29 clients: [client list]
30 time: seconds to run (default=120)
31 sizes: [list of object sizes] (default=[4M])
32 mode: <write|read|seq> (default=write)
33 repetitions: execute the same configuration multiple times (default=1)
34 min_num_replicas: minimum number of replicas to use (default = 3)
35 max_num_replicas: maximum number of replicas to use (default = 3)
36 min_num_osds: the minimum number of OSDs in a pool (default=all)
37 max_num_osds: the maximum number of OSDs in a pool (default=all)
38 file: name of CSV-formatted output file (default='radosbench.csv')
39 columns: columns to include (default=all)
40 - rep: execution number (takes values from 'repetitions')
41 - num_osd: number of osds for pool
42 - num_replica: number of replicas
43 - avg_throughput: throughput
44 - avg_latency: latency
50 columns: [rep, num_osd, num_replica, avg_throughput, stdev_throughput]
52 log
.info('Beginning radosbenchsweep...')
53 assert isinstance(config
, dict), 'expecting dictionary for configuration'
55 # get and validate config values
58 # only one client supported for now
59 if len(config
.get('clients', [])) != 1:
60 raise Exception("Only one client can be specified")
63 if config
.get('mode', 'write') != 'write':
64 raise Exception("Only 'write' mode supported for now.")
67 total_osds_in_cluster
= teuthology
.num_instances_of_type(ctx
.cluster
, 'osd')
68 min_num_osds
= config
.get('min_num_osds', total_osds_in_cluster
)
69 max_num_osds
= config
.get('max_num_osds', total_osds_in_cluster
)
71 if max_num_osds
> total_osds_in_cluster
:
72 raise Exception('max_num_osds cannot be greater than total in cluster')
74 raise Exception('min_num_osds cannot be less than 1')
75 if min_num_osds
> max_num_osds
:
76 raise Exception('min_num_osds cannot be greater than max_num_osd')
77 osds
= range(0, (total_osds_in_cluster
+ 1))
80 min_num_replicas
= config
.get('min_num_replicas', 3)
81 max_num_replicas
= config
.get('max_num_replicas', 3)
83 if min_num_replicas
< 1:
84 raise Exception('min_num_replicas cannot be less than 1')
85 if min_num_replicas
> max_num_replicas
:
86 raise Exception('min_num_replicas cannot be greater than max_replicas')
87 if max_num_replicas
> max_num_osds
:
88 raise Exception('max_num_replicas cannot be greater than max_num_osds')
89 replicas
= range(min_num_replicas
, (max_num_replicas
+ 1))
92 sizes
= config
.get('size', [4 << 20])
95 reps
= range(config
.get('repetitions', 1))
98 fname
= config
.get('file', 'radosbench.csv')
99 f
= open('{}/{}'.format(ctx
.archive
, fname
), 'w')
100 f
.write(get_csv_header(config
) + '\n')
103 # set default pools size=1 to avoid 'unhealthy' issues
104 ctx
.manager
.set_pool_property('data', 'size', 1)
105 ctx
.manager
.set_pool_property('metadata', 'size', 1)
106 ctx
.manager
.set_pool_property('rbd', 'size', 1)
110 # sweep through all parameters
111 for osds_out
, size
, replica
, rep
in product(osds
, sizes
, replicas
, reps
):
113 osds_in
= total_osds_in_cluster
- osds_out
119 if current_osds_out
!= osds_out
:
121 ctx
.manager
.raw_cluster_cmd(
122 'osd', 'reweight', str(osds_out
-1), '0.0')
123 wait_until_healthy(ctx
, config
)
124 current_osds_out
= osds_out
126 if osds_in
not in range(min_num_osds
, (max_num_osds
+ 1)):
127 # no need to execute with a number of osds that wasn't requested
130 if osds_in
< replica
:
131 # cannot execute with more replicas than available osds
134 run_radosbench(ctx
, config
, f
, osds_in
, size
, replica
, rep
)
141 def get_csv_header(conf
):
143 'rep', 'num_osd', 'num_replica', 'avg_throughput',
144 'avg_latency', 'stdev_throughput', 'stdev_latency'
146 given_columns
= conf
.get('columns', None)
147 if given_columns
and len(given_columns
) != 0:
148 for column
in given_columns
:
149 if column
not in all_columns
:
150 raise Exception('Unknown column ' + column
)
151 return ','.join(conf
['columns'])
153 conf
['columns'] = all_columns
154 return ','.join(all_columns
)
157 def run_radosbench(ctx
, config
, f
, num_osds
, size
, replica
, rep
):
158 pool
= ctx
.manager
.create_pool_with_unique_name()
160 ctx
.manager
.set_pool_property(pool
, 'size', replica
)
162 wait_until_healthy(ctx
, config
)
164 log
.info('Executing with parameters: ')
165 log
.info(' num_osd =' + str(num_osds
))
166 log
.info(' size =' + str(size
))
167 log
.info(' num_replicas =' + str(replica
))
168 log
.info(' repetition =' + str(rep
))
170 for role
in config
.get('clients', ['client.0']):
171 assert isinstance(role
, str)
173 assert role
.startswith(PREFIX
)
174 id_
= role
[len(PREFIX
):]
175 (remote
,) = ctx
.cluster
.only(role
).remotes
.keys()
181 '{}/archive/coverage'.format(teuthology
.get_testdir(ctx
)),
183 '--no-log-to-stderr',
187 'bench', str(config
.get('time', 120)), 'write',
189 logger
=log
.getChild('radosbench.{id}'.format(id=id_
)),
195 # parse output to get summary and format it as CSV
197 out
= proc
.stdout
.getvalue()
199 'stdev_throughput': re
.sub(r
'Stddev Bandwidth: ', '', re
.search(
200 r
'Stddev Bandwidth:.*', out
).group(0)),
201 'stdev_latency': re
.sub(r
'Stddev Latency: ', '', re
.search(
202 r
'Stddev Latency:.*', out
).group(0)),
203 'avg_throughput': re
.sub(r
'Bandwidth \(MB/sec\): ', '', re
.search(
204 r
'Bandwidth \(MB/sec\):.*', out
).group(0)),
205 'avg_latency': re
.sub(r
'Average Latency: ', '', re
.search(
206 r
'Average Latency:.*', out
).group(0)),
208 'num_osd': str(num_osds
),
209 'num_replica': str(replica
)
212 for column
in config
['columns']:
213 values_to_write
.extend([all_values
[column
]])
214 f
.write(','.join(values_to_write
) + '\n')
216 ctx
.manager
.remove_pool(pool
)
219 def wait_until_healthy(ctx
, config
):
220 first_mon
= teuthology
.get_first_mon(ctx
, config
)
221 (mon_remote
,) = ctx
.cluster
.only(first_mon
).remotes
.keys()
222 teuthology
.wait_until_healthy(ctx
, mon_remote
)