]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/radosbenchsweep.py
import quincy beta 17.1.0
[ceph.git] / ceph / qa / tasks / radosbenchsweep.py
CommitLineData
7c673cae
FG
1"""
2Rados benchmarking sweep
3"""
4import contextlib
5import logging
6import re
7
9f95a23c 8from io import BytesIO
7c673cae
FG
9from itertools import product
10
11from teuthology.orchestra import run
12from teuthology import misc as teuthology
13
9f95a23c 14
7c673cae
FG
15log = logging.getLogger(__name__)
16
17
18@contextlib.contextmanager
19def task(ctx, config):
20 """
21 Execute a radosbench parameter sweep
22
23 Puts radosbench in a loop, taking values from the given config at each
24 iteration. If given, the min and max values below create a range, e.g.
25 min_replicas=1 and max_replicas=3 implies executing with 1-3 replicas.
26
27 Parameters:
28
29 clients: [client list]
30 time: seconds to run (default=120)
31 sizes: [list of object sizes] (default=[4M])
32 mode: <write|read|seq> (default=write)
33 repetitions: execute the same configuration multiple times (default=1)
34 min_num_replicas: minimum number of replicas to use (default = 3)
35 max_num_replicas: maximum number of replicas to use (default = 3)
36 min_num_osds: the minimum number of OSDs in a pool (default=all)
37 max_num_osds: the maximum number of OSDs in a pool (default=all)
38 file: name of CSV-formatted output file (default='radosbench.csv')
39 columns: columns to include (default=all)
40 - rep: execution number (takes values from 'repetitions')
41 - num_osd: number of osds for pool
42 - num_replica: number of replicas
43 - avg_throughput: throughput
44 - avg_latency: latency
45 - stdev_throughput:
46 - stdev_latency:
47
48 Example:
49 - radsobenchsweep:
50 columns: [rep, num_osd, num_replica, avg_throughput, stdev_throughput]
51 """
52 log.info('Beginning radosbenchsweep...')
53 assert isinstance(config, dict), 'expecting dictionary for configuration'
54
55 # get and validate config values
56 # {
57
58 # only one client supported for now
59 if len(config.get('clients', [])) != 1:
60 raise Exception("Only one client can be specified")
61
62 # only write mode
63 if config.get('mode', 'write') != 'write':
64 raise Exception("Only 'write' mode supported for now.")
65
66 # OSDs
67 total_osds_in_cluster = teuthology.num_instances_of_type(ctx.cluster, 'osd')
68 min_num_osds = config.get('min_num_osds', total_osds_in_cluster)
69 max_num_osds = config.get('max_num_osds', total_osds_in_cluster)
70
71 if max_num_osds > total_osds_in_cluster:
72 raise Exception('max_num_osds cannot be greater than total in cluster')
73 if min_num_osds < 1:
74 raise Exception('min_num_osds cannot be less than 1')
75 if min_num_osds > max_num_osds:
76 raise Exception('min_num_osds cannot be greater than max_num_osd')
77 osds = range(0, (total_osds_in_cluster + 1))
78
79 # replicas
80 min_num_replicas = config.get('min_num_replicas', 3)
81 max_num_replicas = config.get('max_num_replicas', 3)
82
83 if min_num_replicas < 1:
84 raise Exception('min_num_replicas cannot be less than 1')
85 if min_num_replicas > max_num_replicas:
86 raise Exception('min_num_replicas cannot be greater than max_replicas')
87 if max_num_replicas > max_num_osds:
88 raise Exception('max_num_replicas cannot be greater than max_num_osds')
89 replicas = range(min_num_replicas, (max_num_replicas + 1))
90
91 # object size
92 sizes = config.get('size', [4 << 20])
93
94 # repetitions
95 reps = range(config.get('repetitions', 1))
96
97 # file
98 fname = config.get('file', 'radosbench.csv')
99 f = open('{}/{}'.format(ctx.archive, fname), 'w')
100 f.write(get_csv_header(config) + '\n')
101 # }
102
103 # set default pools size=1 to avoid 'unhealthy' issues
104 ctx.manager.set_pool_property('data', 'size', 1)
105 ctx.manager.set_pool_property('metadata', 'size', 1)
106 ctx.manager.set_pool_property('rbd', 'size', 1)
107
108 current_osds_out = 0
109
110 # sweep through all parameters
111 for osds_out, size, replica, rep in product(osds, sizes, replicas, reps):
112
113 osds_in = total_osds_in_cluster - osds_out
114
115 if osds_in == 0:
116 # we're done
117 break
118
119 if current_osds_out != osds_out:
120 # take an osd out
121 ctx.manager.raw_cluster_cmd(
122 'osd', 'reweight', str(osds_out-1), '0.0')
123 wait_until_healthy(ctx, config)
124 current_osds_out = osds_out
125
126 if osds_in not in range(min_num_osds, (max_num_osds + 1)):
127 # no need to execute with a number of osds that wasn't requested
128 continue
129
130 if osds_in < replica:
131 # cannot execute with more replicas than available osds
132 continue
133
134 run_radosbench(ctx, config, f, osds_in, size, replica, rep)
135
136 f.close()
137
138 yield
139
140
141def get_csv_header(conf):
142 all_columns = [
143 'rep', 'num_osd', 'num_replica', 'avg_throughput',
144 'avg_latency', 'stdev_throughput', 'stdev_latency'
145 ]
146 given_columns = conf.get('columns', None)
147 if given_columns and len(given_columns) != 0:
148 for column in given_columns:
149 if column not in all_columns:
150 raise Exception('Unknown column ' + column)
151 return ','.join(conf['columns'])
152 else:
153 conf['columns'] = all_columns
154 return ','.join(all_columns)
155
156
157def run_radosbench(ctx, config, f, num_osds, size, replica, rep):
158 pool = ctx.manager.create_pool_with_unique_name()
159
160 ctx.manager.set_pool_property(pool, 'size', replica)
161
162 wait_until_healthy(ctx, config)
163
164 log.info('Executing with parameters: ')
165 log.info(' num_osd =' + str(num_osds))
166 log.info(' size =' + str(size))
167 log.info(' num_replicas =' + str(replica))
168 log.info(' repetition =' + str(rep))
169
170 for role in config.get('clients', ['client.0']):
f67539c2 171 assert isinstance(role, str)
7c673cae
FG
172 PREFIX = 'client.'
173 assert role.startswith(PREFIX)
174 id_ = role[len(PREFIX):]
9f95a23c 175 (remote,) = ctx.cluster.only(role).remotes.keys()
7c673cae
FG
176
177 proc = remote.run(
178 args=[
179 'adjust-ulimits',
180 'ceph-coverage',
181 '{}/archive/coverage'.format(teuthology.get_testdir(ctx)),
182 'rados',
183 '--no-log-to-stderr',
184 '--name', role,
185 '-b', str(size),
186 '-p', pool,
187 'bench', str(config.get('time', 120)), 'write',
188 ],
189 logger=log.getChild('radosbench.{id}'.format(id=id_)),
190 stdin=run.PIPE,
9f95a23c 191 stdout=BytesIO(),
7c673cae
FG
192 wait=False
193 )
194
195 # parse output to get summary and format it as CSV
196 proc.wait()
197 out = proc.stdout.getvalue()
198 all_values = {
199 'stdev_throughput': re.sub(r'Stddev Bandwidth: ', '', re.search(
200 r'Stddev Bandwidth:.*', out).group(0)),
201 'stdev_latency': re.sub(r'Stddev Latency: ', '', re.search(
202 r'Stddev Latency:.*', out).group(0)),
203 'avg_throughput': re.sub(r'Bandwidth \(MB/sec\): ', '', re.search(
204 r'Bandwidth \(MB/sec\):.*', out).group(0)),
205 'avg_latency': re.sub(r'Average Latency: ', '', re.search(
206 r'Average Latency:.*', out).group(0)),
207 'rep': str(rep),
208 'num_osd': str(num_osds),
209 'num_replica': str(replica)
210 }
211 values_to_write = []
212 for column in config['columns']:
213 values_to_write.extend([all_values[column]])
214 f.write(','.join(values_to_write) + '\n')
215
216 ctx.manager.remove_pool(pool)
217
218
219def wait_until_healthy(ctx, config):
220 first_mon = teuthology.get_first_mon(ctx, config)
9f95a23c 221 (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys()
7c673cae 222 teuthology.wait_until_healthy(ctx, mon_remote)