]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/radosbenchsweep.py
2 Rados benchmarking sweep
9 from itertools
import product
11 from teuthology
.orchestra
import run
12 from teuthology
import misc
as teuthology
16 log
= logging
.getLogger(__name__
)
19 @contextlib.contextmanager
20 def task(ctx
, config
):
22 Execute a radosbench parameter sweep
24 Puts radosbench in a loop, taking values from the given config at each
25 iteration. If given, the min and max values below create a range, e.g.
26 min_replicas=1 and max_replicas=3 implies executing with 1-3 replicas.
30 clients: [client list]
31 time: seconds to run (default=120)
32 sizes: [list of object sizes] (default=[4M])
33 mode: <write|read|seq> (default=write)
34 repetitions: execute the same configuration multiple times (default=1)
35 min_num_replicas: minimum number of replicas to use (default = 3)
36 max_num_replicas: maximum number of replicas to use (default = 3)
37 min_num_osds: the minimum number of OSDs in a pool (default=all)
38 max_num_osds: the maximum number of OSDs in a pool (default=all)
39 file: name of CSV-formatted output file (default='radosbench.csv')
40 columns: columns to include (default=all)
41 - rep: execution number (takes values from 'repetitions')
42 - num_osd: number of osds for pool
43 - num_replica: number of replicas
44 - avg_throughput: throughput
45 - avg_latency: latency
51 columns: [rep, num_osd, num_replica, avg_throughput, stdev_throughput]
53 log
.info('Beginning radosbenchsweep...')
54 assert isinstance(config
, dict), 'expecting dictionary for configuration'
56 # get and validate config values
59 # only one client supported for now
60 if len(config
.get('clients', [])) != 1:
61 raise Exception("Only one client can be specified")
64 if config
.get('mode', 'write') != 'write':
65 raise Exception("Only 'write' mode supported for now.")
68 total_osds_in_cluster
= teuthology
.num_instances_of_type(ctx
.cluster
, 'osd')
69 min_num_osds
= config
.get('min_num_osds', total_osds_in_cluster
)
70 max_num_osds
= config
.get('max_num_osds', total_osds_in_cluster
)
72 if max_num_osds
> total_osds_in_cluster
:
73 raise Exception('max_num_osds cannot be greater than total in cluster')
75 raise Exception('min_num_osds cannot be less than 1')
76 if min_num_osds
> max_num_osds
:
77 raise Exception('min_num_osds cannot be greater than max_num_osd')
78 osds
= range(0, (total_osds_in_cluster
+ 1))
81 min_num_replicas
= config
.get('min_num_replicas', 3)
82 max_num_replicas
= config
.get('max_num_replicas', 3)
84 if min_num_replicas
< 1:
85 raise Exception('min_num_replicas cannot be less than 1')
86 if min_num_replicas
> max_num_replicas
:
87 raise Exception('min_num_replicas cannot be greater than max_replicas')
88 if max_num_replicas
> max_num_osds
:
89 raise Exception('max_num_replicas cannot be greater than max_num_osds')
90 replicas
= range(min_num_replicas
, (max_num_replicas
+ 1))
93 sizes
= config
.get('size', [4 << 20])
96 reps
= range(config
.get('repetitions', 1))
99 fname
= config
.get('file', 'radosbench.csv')
100 f
= open('{}/{}'.format(ctx
.archive
, fname
), 'w')
101 f
.write(get_csv_header(config
) + '\n')
104 # set default pools size=1 to avoid 'unhealthy' issues
105 ctx
.manager
.set_pool_property('data', 'size', 1)
106 ctx
.manager
.set_pool_property('metadata', 'size', 1)
107 ctx
.manager
.set_pool_property('rbd', 'size', 1)
111 # sweep through all parameters
112 for osds_out
, size
, replica
, rep
in product(osds
, sizes
, replicas
, reps
):
114 osds_in
= total_osds_in_cluster
- osds_out
120 if current_osds_out
!= osds_out
:
122 ctx
.manager
.raw_cluster_cmd(
123 'osd', 'reweight', str(osds_out
-1), '0.0')
124 wait_until_healthy(ctx
, config
)
125 current_osds_out
= osds_out
127 if osds_in
not in range(min_num_osds
, (max_num_osds
+ 1)):
128 # no need to execute with a number of osds that wasn't requested
131 if osds_in
< replica
:
132 # cannot execute with more replicas than available osds
135 run_radosbench(ctx
, config
, f
, osds_in
, size
, replica
, rep
)
142 def get_csv_header(conf
):
144 'rep', 'num_osd', 'num_replica', 'avg_throughput',
145 'avg_latency', 'stdev_throughput', 'stdev_latency'
147 given_columns
= conf
.get('columns', None)
148 if given_columns
and len(given_columns
) != 0:
149 for column
in given_columns
:
150 if column
not in all_columns
:
151 raise Exception('Unknown column ' + column
)
152 return ','.join(conf
['columns'])
154 conf
['columns'] = all_columns
155 return ','.join(all_columns
)
158 def run_radosbench(ctx
, config
, f
, num_osds
, size
, replica
, rep
):
159 pool
= ctx
.manager
.create_pool_with_unique_name()
161 ctx
.manager
.set_pool_property(pool
, 'size', replica
)
163 wait_until_healthy(ctx
, config
)
165 log
.info('Executing with parameters: ')
166 log
.info(' num_osd =' + str(num_osds
))
167 log
.info(' size =' + str(size
))
168 log
.info(' num_replicas =' + str(replica
))
169 log
.info(' repetition =' + str(rep
))
171 for role
in config
.get('clients', ['client.0']):
172 assert isinstance(role
, six
.string_types
)
174 assert role
.startswith(PREFIX
)
175 id_
= role
[len(PREFIX
):]
176 (remote
,) = ctx
.cluster
.only(role
).remotes
.keys()
182 '{}/archive/coverage'.format(teuthology
.get_testdir(ctx
)),
184 '--no-log-to-stderr',
188 'bench', str(config
.get('time', 120)), 'write',
190 logger
=log
.getChild('radosbench.{id}'.format(id=id_
)),
196 # parse output to get summary and format it as CSV
198 out
= proc
.stdout
.getvalue()
200 'stdev_throughput': re
.sub(r
'Stddev Bandwidth: ', '', re
.search(
201 r
'Stddev Bandwidth:.*', out
).group(0)),
202 'stdev_latency': re
.sub(r
'Stddev Latency: ', '', re
.search(
203 r
'Stddev Latency:.*', out
).group(0)),
204 'avg_throughput': re
.sub(r
'Bandwidth \(MB/sec\): ', '', re
.search(
205 r
'Bandwidth \(MB/sec\):.*', out
).group(0)),
206 'avg_latency': re
.sub(r
'Average Latency: ', '', re
.search(
207 r
'Average Latency:.*', out
).group(0)),
209 'num_osd': str(num_osds
),
210 'num_replica': str(replica
)
213 for column
in config
['columns']:
214 values_to_write
.extend([all_values
[column
]])
215 f
.write(','.join(values_to_write
) + '\n')
217 ctx
.manager
.remove_pool(pool
)
220 def wait_until_healthy(ctx
, config
):
221 first_mon
= teuthology
.get_first_mon(ctx
, config
)
222 (mon_remote
,) = ctx
.cluster
.only(first_mon
).remotes
.keys()
223 teuthology
.wait_until_healthy(ctx
, mon_remote
)