]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/radosbenchsweep.py
import ceph 15.2.10
[ceph.git] / ceph / qa / tasks / radosbenchsweep.py
1 """
2 Rados benchmarking sweep
3 """
4 import contextlib
5 import logging
6 import re
7
8 from io import BytesIO
9 from itertools import product
10
11 from teuthology.orchestra import run
12 from teuthology import misc as teuthology
13
14 import six
15
16 log = logging.getLogger(__name__)
17
18
19 @contextlib.contextmanager
20 def task(ctx, config):
21 """
22 Execute a radosbench parameter sweep
23
24 Puts radosbench in a loop, taking values from the given config at each
25 iteration. If given, the min and max values below create a range, e.g.
26 min_replicas=1 and max_replicas=3 implies executing with 1-3 replicas.
27
28 Parameters:
29
30 clients: [client list]
31 time: seconds to run (default=120)
32 sizes: [list of object sizes] (default=[4M])
33 mode: <write|read|seq> (default=write)
34 repetitions: execute the same configuration multiple times (default=1)
35 min_num_replicas: minimum number of replicas to use (default = 3)
36 max_num_replicas: maximum number of replicas to use (default = 3)
37 min_num_osds: the minimum number of OSDs in a pool (default=all)
38 max_num_osds: the maximum number of OSDs in a pool (default=all)
39 file: name of CSV-formatted output file (default='radosbench.csv')
40 columns: columns to include (default=all)
41 - rep: execution number (takes values from 'repetitions')
42 - num_osd: number of osds for pool
43 - num_replica: number of replicas
44 - avg_throughput: throughput
45 - avg_latency: latency
46 - stdev_throughput:
47 - stdev_latency:
48
49 Example:
50 - radsobenchsweep:
51 columns: [rep, num_osd, num_replica, avg_throughput, stdev_throughput]
52 """
53 log.info('Beginning radosbenchsweep...')
54 assert isinstance(config, dict), 'expecting dictionary for configuration'
55
56 # get and validate config values
57 # {
58
59 # only one client supported for now
60 if len(config.get('clients', [])) != 1:
61 raise Exception("Only one client can be specified")
62
63 # only write mode
64 if config.get('mode', 'write') != 'write':
65 raise Exception("Only 'write' mode supported for now.")
66
67 # OSDs
68 total_osds_in_cluster = teuthology.num_instances_of_type(ctx.cluster, 'osd')
69 min_num_osds = config.get('min_num_osds', total_osds_in_cluster)
70 max_num_osds = config.get('max_num_osds', total_osds_in_cluster)
71
72 if max_num_osds > total_osds_in_cluster:
73 raise Exception('max_num_osds cannot be greater than total in cluster')
74 if min_num_osds < 1:
75 raise Exception('min_num_osds cannot be less than 1')
76 if min_num_osds > max_num_osds:
77 raise Exception('min_num_osds cannot be greater than max_num_osd')
78 osds = range(0, (total_osds_in_cluster + 1))
79
80 # replicas
81 min_num_replicas = config.get('min_num_replicas', 3)
82 max_num_replicas = config.get('max_num_replicas', 3)
83
84 if min_num_replicas < 1:
85 raise Exception('min_num_replicas cannot be less than 1')
86 if min_num_replicas > max_num_replicas:
87 raise Exception('min_num_replicas cannot be greater than max_replicas')
88 if max_num_replicas > max_num_osds:
89 raise Exception('max_num_replicas cannot be greater than max_num_osds')
90 replicas = range(min_num_replicas, (max_num_replicas + 1))
91
92 # object size
93 sizes = config.get('size', [4 << 20])
94
95 # repetitions
96 reps = range(config.get('repetitions', 1))
97
98 # file
99 fname = config.get('file', 'radosbench.csv')
100 f = open('{}/{}'.format(ctx.archive, fname), 'w')
101 f.write(get_csv_header(config) + '\n')
102 # }
103
104 # set default pools size=1 to avoid 'unhealthy' issues
105 ctx.manager.set_pool_property('data', 'size', 1)
106 ctx.manager.set_pool_property('metadata', 'size', 1)
107 ctx.manager.set_pool_property('rbd', 'size', 1)
108
109 current_osds_out = 0
110
111 # sweep through all parameters
112 for osds_out, size, replica, rep in product(osds, sizes, replicas, reps):
113
114 osds_in = total_osds_in_cluster - osds_out
115
116 if osds_in == 0:
117 # we're done
118 break
119
120 if current_osds_out != osds_out:
121 # take an osd out
122 ctx.manager.raw_cluster_cmd(
123 'osd', 'reweight', str(osds_out-1), '0.0')
124 wait_until_healthy(ctx, config)
125 current_osds_out = osds_out
126
127 if osds_in not in range(min_num_osds, (max_num_osds + 1)):
128 # no need to execute with a number of osds that wasn't requested
129 continue
130
131 if osds_in < replica:
132 # cannot execute with more replicas than available osds
133 continue
134
135 run_radosbench(ctx, config, f, osds_in, size, replica, rep)
136
137 f.close()
138
139 yield
140
141
142 def get_csv_header(conf):
143 all_columns = [
144 'rep', 'num_osd', 'num_replica', 'avg_throughput',
145 'avg_latency', 'stdev_throughput', 'stdev_latency'
146 ]
147 given_columns = conf.get('columns', None)
148 if given_columns and len(given_columns) != 0:
149 for column in given_columns:
150 if column not in all_columns:
151 raise Exception('Unknown column ' + column)
152 return ','.join(conf['columns'])
153 else:
154 conf['columns'] = all_columns
155 return ','.join(all_columns)
156
157
158 def run_radosbench(ctx, config, f, num_osds, size, replica, rep):
159 pool = ctx.manager.create_pool_with_unique_name()
160
161 ctx.manager.set_pool_property(pool, 'size', replica)
162
163 wait_until_healthy(ctx, config)
164
165 log.info('Executing with parameters: ')
166 log.info(' num_osd =' + str(num_osds))
167 log.info(' size =' + str(size))
168 log.info(' num_replicas =' + str(replica))
169 log.info(' repetition =' + str(rep))
170
171 for role in config.get('clients', ['client.0']):
172 assert isinstance(role, six.string_types)
173 PREFIX = 'client.'
174 assert role.startswith(PREFIX)
175 id_ = role[len(PREFIX):]
176 (remote,) = ctx.cluster.only(role).remotes.keys()
177
178 proc = remote.run(
179 args=[
180 'adjust-ulimits',
181 'ceph-coverage',
182 '{}/archive/coverage'.format(teuthology.get_testdir(ctx)),
183 'rados',
184 '--no-log-to-stderr',
185 '--name', role,
186 '-b', str(size),
187 '-p', pool,
188 'bench', str(config.get('time', 120)), 'write',
189 ],
190 logger=log.getChild('radosbench.{id}'.format(id=id_)),
191 stdin=run.PIPE,
192 stdout=BytesIO(),
193 wait=False
194 )
195
196 # parse output to get summary and format it as CSV
197 proc.wait()
198 out = proc.stdout.getvalue()
199 all_values = {
200 'stdev_throughput': re.sub(r'Stddev Bandwidth: ', '', re.search(
201 r'Stddev Bandwidth:.*', out).group(0)),
202 'stdev_latency': re.sub(r'Stddev Latency: ', '', re.search(
203 r'Stddev Latency:.*', out).group(0)),
204 'avg_throughput': re.sub(r'Bandwidth \(MB/sec\): ', '', re.search(
205 r'Bandwidth \(MB/sec\):.*', out).group(0)),
206 'avg_latency': re.sub(r'Average Latency: ', '', re.search(
207 r'Average Latency:.*', out).group(0)),
208 'rep': str(rep),
209 'num_osd': str(num_osds),
210 'num_replica': str(replica)
211 }
212 values_to_write = []
213 for column in config['columns']:
214 values_to_write.extend([all_values[column]])
215 f.write(','.join(values_to_write) + '\n')
216
217 ctx.manager.remove_pool(pool)
218
219
220 def wait_until_healthy(ctx, config):
221 first_mon = teuthology.get_first_mon(ctx, config)
222 (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys()
223 teuthology.wait_until_healthy(ctx, mon_remote)