]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Rados benchmarking sweep | |
3 | """ | |
4 | import contextlib | |
5 | import logging | |
6 | import re | |
7 | ||
9f95a23c | 8 | from io import BytesIO |
7c673cae FG |
9 | from itertools import product |
10 | ||
11 | from teuthology.orchestra import run | |
12 | from teuthology import misc as teuthology | |
13 | ||
9f95a23c | 14 | |
7c673cae FG |
15 | log = logging.getLogger(__name__) |
16 | ||
17 | ||
18 | @contextlib.contextmanager | |
19 | def task(ctx, config): | |
20 | """ | |
21 | Execute a radosbench parameter sweep | |
22 | ||
23 | Puts radosbench in a loop, taking values from the given config at each | |
24 | iteration. If given, the min and max values below create a range, e.g. | |
25 | min_replicas=1 and max_replicas=3 implies executing with 1-3 replicas. | |
26 | ||
27 | Parameters: | |
28 | ||
29 | clients: [client list] | |
30 | time: seconds to run (default=120) | |
31 | sizes: [list of object sizes] (default=[4M]) | |
32 | mode: <write|read|seq> (default=write) | |
33 | repetitions: execute the same configuration multiple times (default=1) | |
34 | min_num_replicas: minimum number of replicas to use (default = 3) | |
35 | max_num_replicas: maximum number of replicas to use (default = 3) | |
36 | min_num_osds: the minimum number of OSDs in a pool (default=all) | |
37 | max_num_osds: the maximum number of OSDs in a pool (default=all) | |
38 | file: name of CSV-formatted output file (default='radosbench.csv') | |
39 | columns: columns to include (default=all) | |
40 | - rep: execution number (takes values from 'repetitions') | |
41 | - num_osd: number of osds for pool | |
42 | - num_replica: number of replicas | |
43 | - avg_throughput: throughput | |
44 | - avg_latency: latency | |
45 | - stdev_throughput: | |
46 | - stdev_latency: | |
47 | ||
48 | Example: | |
49 | - radsobenchsweep: | |
50 | columns: [rep, num_osd, num_replica, avg_throughput, stdev_throughput] | |
51 | """ | |
52 | log.info('Beginning radosbenchsweep...') | |
53 | assert isinstance(config, dict), 'expecting dictionary for configuration' | |
54 | ||
55 | # get and validate config values | |
56 | # { | |
57 | ||
58 | # only one client supported for now | |
59 | if len(config.get('clients', [])) != 1: | |
60 | raise Exception("Only one client can be specified") | |
61 | ||
62 | # only write mode | |
63 | if config.get('mode', 'write') != 'write': | |
64 | raise Exception("Only 'write' mode supported for now.") | |
65 | ||
66 | # OSDs | |
67 | total_osds_in_cluster = teuthology.num_instances_of_type(ctx.cluster, 'osd') | |
68 | min_num_osds = config.get('min_num_osds', total_osds_in_cluster) | |
69 | max_num_osds = config.get('max_num_osds', total_osds_in_cluster) | |
70 | ||
71 | if max_num_osds > total_osds_in_cluster: | |
72 | raise Exception('max_num_osds cannot be greater than total in cluster') | |
73 | if min_num_osds < 1: | |
74 | raise Exception('min_num_osds cannot be less than 1') | |
75 | if min_num_osds > max_num_osds: | |
76 | raise Exception('min_num_osds cannot be greater than max_num_osd') | |
77 | osds = range(0, (total_osds_in_cluster + 1)) | |
78 | ||
79 | # replicas | |
80 | min_num_replicas = config.get('min_num_replicas', 3) | |
81 | max_num_replicas = config.get('max_num_replicas', 3) | |
82 | ||
83 | if min_num_replicas < 1: | |
84 | raise Exception('min_num_replicas cannot be less than 1') | |
85 | if min_num_replicas > max_num_replicas: | |
86 | raise Exception('min_num_replicas cannot be greater than max_replicas') | |
87 | if max_num_replicas > max_num_osds: | |
88 | raise Exception('max_num_replicas cannot be greater than max_num_osds') | |
89 | replicas = range(min_num_replicas, (max_num_replicas + 1)) | |
90 | ||
91 | # object size | |
92 | sizes = config.get('size', [4 << 20]) | |
93 | ||
94 | # repetitions | |
95 | reps = range(config.get('repetitions', 1)) | |
96 | ||
97 | # file | |
98 | fname = config.get('file', 'radosbench.csv') | |
99 | f = open('{}/{}'.format(ctx.archive, fname), 'w') | |
100 | f.write(get_csv_header(config) + '\n') | |
101 | # } | |
102 | ||
103 | # set default pools size=1 to avoid 'unhealthy' issues | |
104 | ctx.manager.set_pool_property('data', 'size', 1) | |
105 | ctx.manager.set_pool_property('metadata', 'size', 1) | |
106 | ctx.manager.set_pool_property('rbd', 'size', 1) | |
107 | ||
108 | current_osds_out = 0 | |
109 | ||
110 | # sweep through all parameters | |
111 | for osds_out, size, replica, rep in product(osds, sizes, replicas, reps): | |
112 | ||
113 | osds_in = total_osds_in_cluster - osds_out | |
114 | ||
115 | if osds_in == 0: | |
116 | # we're done | |
117 | break | |
118 | ||
119 | if current_osds_out != osds_out: | |
120 | # take an osd out | |
121 | ctx.manager.raw_cluster_cmd( | |
122 | 'osd', 'reweight', str(osds_out-1), '0.0') | |
123 | wait_until_healthy(ctx, config) | |
124 | current_osds_out = osds_out | |
125 | ||
126 | if osds_in not in range(min_num_osds, (max_num_osds + 1)): | |
127 | # no need to execute with a number of osds that wasn't requested | |
128 | continue | |
129 | ||
130 | if osds_in < replica: | |
131 | # cannot execute with more replicas than available osds | |
132 | continue | |
133 | ||
134 | run_radosbench(ctx, config, f, osds_in, size, replica, rep) | |
135 | ||
136 | f.close() | |
137 | ||
138 | yield | |
139 | ||
140 | ||
141 | def get_csv_header(conf): | |
142 | all_columns = [ | |
143 | 'rep', 'num_osd', 'num_replica', 'avg_throughput', | |
144 | 'avg_latency', 'stdev_throughput', 'stdev_latency' | |
145 | ] | |
146 | given_columns = conf.get('columns', None) | |
147 | if given_columns and len(given_columns) != 0: | |
148 | for column in given_columns: | |
149 | if column not in all_columns: | |
150 | raise Exception('Unknown column ' + column) | |
151 | return ','.join(conf['columns']) | |
152 | else: | |
153 | conf['columns'] = all_columns | |
154 | return ','.join(all_columns) | |
155 | ||
156 | ||
157 | def run_radosbench(ctx, config, f, num_osds, size, replica, rep): | |
158 | pool = ctx.manager.create_pool_with_unique_name() | |
159 | ||
160 | ctx.manager.set_pool_property(pool, 'size', replica) | |
161 | ||
162 | wait_until_healthy(ctx, config) | |
163 | ||
164 | log.info('Executing with parameters: ') | |
165 | log.info(' num_osd =' + str(num_osds)) | |
166 | log.info(' size =' + str(size)) | |
167 | log.info(' num_replicas =' + str(replica)) | |
168 | log.info(' repetition =' + str(rep)) | |
169 | ||
170 | for role in config.get('clients', ['client.0']): | |
f67539c2 | 171 | assert isinstance(role, str) |
7c673cae FG |
172 | PREFIX = 'client.' |
173 | assert role.startswith(PREFIX) | |
174 | id_ = role[len(PREFIX):] | |
9f95a23c | 175 | (remote,) = ctx.cluster.only(role).remotes.keys() |
7c673cae FG |
176 | |
177 | proc = remote.run( | |
178 | args=[ | |
179 | 'adjust-ulimits', | |
180 | 'ceph-coverage', | |
181 | '{}/archive/coverage'.format(teuthology.get_testdir(ctx)), | |
182 | 'rados', | |
183 | '--no-log-to-stderr', | |
184 | '--name', role, | |
185 | '-b', str(size), | |
186 | '-p', pool, | |
187 | 'bench', str(config.get('time', 120)), 'write', | |
188 | ], | |
189 | logger=log.getChild('radosbench.{id}'.format(id=id_)), | |
190 | stdin=run.PIPE, | |
9f95a23c | 191 | stdout=BytesIO(), |
7c673cae FG |
192 | wait=False |
193 | ) | |
194 | ||
195 | # parse output to get summary and format it as CSV | |
196 | proc.wait() | |
197 | out = proc.stdout.getvalue() | |
198 | all_values = { | |
199 | 'stdev_throughput': re.sub(r'Stddev Bandwidth: ', '', re.search( | |
200 | r'Stddev Bandwidth:.*', out).group(0)), | |
201 | 'stdev_latency': re.sub(r'Stddev Latency: ', '', re.search( | |
202 | r'Stddev Latency:.*', out).group(0)), | |
203 | 'avg_throughput': re.sub(r'Bandwidth \(MB/sec\): ', '', re.search( | |
204 | r'Bandwidth \(MB/sec\):.*', out).group(0)), | |
205 | 'avg_latency': re.sub(r'Average Latency: ', '', re.search( | |
206 | r'Average Latency:.*', out).group(0)), | |
207 | 'rep': str(rep), | |
208 | 'num_osd': str(num_osds), | |
209 | 'num_replica': str(replica) | |
210 | } | |
211 | values_to_write = [] | |
212 | for column in config['columns']: | |
213 | values_to_write.extend([all_values[column]]) | |
214 | f.write(','.join(values_to_write) + '\n') | |
215 | ||
216 | ctx.manager.remove_pool(pool) | |
217 | ||
218 | ||
219 | def wait_until_healthy(ctx, config): | |
220 | first_mon = teuthology.get_first_mon(ctx, config) | |
9f95a23c | 221 | (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys() |
7c673cae | 222 | teuthology.wait_until_healthy(ctx, mon_remote) |