]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Workunit task -- Run ceph on sets of specific clients | |
3 | """ | |
4 | import logging | |
5 | import pipes | |
6 | import os | |
224ce89b | 7 | import re |
f67539c2 | 8 | import shlex |
9f95a23c TL |
9 | |
10 | from tasks.util import get_remote_for_role | |
11 | from tasks.util.workunit import get_refspec_after_overrides | |
7c673cae FG |
12 | |
13 | from teuthology import misc | |
14 | from teuthology.config import config as teuth_config | |
20effc67 | 15 | from teuthology.exceptions import CommandFailedError |
7c673cae FG |
16 | from teuthology.parallel import parallel |
17 | from teuthology.orchestra import run | |
18 | ||
19 | log = logging.getLogger(__name__) | |
20 | ||
7c673cae FG |
21 | def task(ctx, config): |
22 | """ | |
23 | Run ceph on all workunits found under the specified path. | |
24 | ||
25 | For example:: | |
26 | ||
27 | tasks: | |
28 | - ceph: | |
29 | - ceph-fuse: [client.0] | |
30 | - workunit: | |
31 | clients: | |
32 | client.0: [direct_io, xattrs.sh] | |
33 | client.1: [snaps] | |
34 | branch: foo | |
35 | ||
36 | You can also run a list of workunits on all clients: | |
37 | tasks: | |
38 | - ceph: | |
39 | - ceph-fuse: | |
40 | - workunit: | |
41 | tag: v0.47 | |
42 | clients: | |
43 | all: [direct_io, xattrs.sh, snaps] | |
44 | ||
45 | If you have an "all" section it will run all the workunits | |
46 | on each client simultaneously, AFTER running any workunits specified | |
47 | for individual clients. (This prevents unintended simultaneous runs.) | |
48 | ||
49 | To customize tests, you can specify environment variables as a dict. You | |
50 | can also specify a time limit for each work unit (defaults to 3h): | |
51 | ||
52 | tasks: | |
53 | - ceph: | |
54 | - ceph-fuse: | |
55 | - workunit: | |
56 | sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6 | |
57 | clients: | |
58 | all: [snaps] | |
59 | env: | |
60 | FOO: bar | |
61 | BAZ: quux | |
62 | timeout: 3h | |
63 | ||
f67539c2 TL |
64 | You can also pass optional arguments to the found workunits: |
65 | ||
66 | tasks: | |
67 | - workunit: | |
68 | clients: | |
69 | all: | |
70 | - test-ceph-helpers.sh test_get_config | |
71 | ||
7c673cae FG |
72 | This task supports roles that include a ceph cluster, e.g.:: |
73 | ||
74 | tasks: | |
75 | - ceph: | |
76 | - workunit: | |
77 | clients: | |
78 | backup.client.0: [foo] | |
79 | client.1: [bar] # cluster is implicitly 'ceph' | |
80 | ||
c07f9fc5 FG |
81 | You can also specify an alternative top-level dir to 'qa/workunits', like |
82 | 'qa/standalone', with:: | |
83 | ||
84 | tasks: | |
85 | - install: | |
86 | - workunit: | |
87 | basedir: qa/standalone | |
88 | clients: | |
89 | client.0: | |
90 | - test-ceph-helpers.sh | |
91 | ||
7c673cae FG |
92 | :param ctx: Context |
93 | :param config: Configuration | |
94 | """ | |
95 | assert isinstance(config, dict) | |
96 | assert isinstance(config.get('clients'), dict), \ | |
97 | 'configuration must contain a dictionary of clients' | |
98 | ||
91327a77 AA |
99 | overrides = ctx.config.get('overrides', {}) |
100 | refspec = get_refspec_after_overrides(config, overrides) | |
7c673cae | 101 | timeout = config.get('timeout', '3h') |
91327a77 | 102 | cleanup = config.get('cleanup', True) |
7c673cae FG |
103 | |
104 | log.info('Pulling workunits from ref %s', refspec) | |
105 | ||
106 | created_mountpoint = {} | |
107 | ||
108 | if config.get('env') is not None: | |
109 | assert isinstance(config['env'], dict), 'env must be a dictionary' | |
110 | clients = config['clients'] | |
111 | ||
112 | # Create scratch dirs for any non-all workunits | |
113 | log.info('Making a separate scratch dir for every client...') | |
9f95a23c | 114 | for role in clients.keys(): |
f67539c2 | 115 | assert isinstance(role, str) |
7c673cae FG |
116 | if role == "all": |
117 | continue | |
118 | ||
119 | assert 'client' in role | |
120 | created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir')) | |
121 | created_mountpoint[role] = created_mnt_dir | |
122 | ||
123 | # Execute any non-all workunits | |
91327a77 AA |
124 | log.info("timeout={}".format(timeout)) |
125 | log.info("cleanup={}".format(cleanup)) | |
7c673cae | 126 | with parallel() as p: |
9f95a23c | 127 | for role, tests in clients.items(): |
7c673cae FG |
128 | if role != "all": |
129 | p.spawn(_run_tests, ctx, refspec, role, tests, | |
c07f9fc5 FG |
130 | config.get('env'), |
131 | basedir=config.get('basedir','qa/workunits'), | |
20effc67 | 132 | subdir=config.get('subdir'), |
9f95a23c TL |
133 | timeout=timeout, |
134 | cleanup=cleanup, | |
135 | coverage_and_limits=not config.get('no_coverage_and_limits', None)) | |
7c673cae | 136 | |
91327a77 AA |
137 | if cleanup: |
138 | # Clean up dirs from any non-all workunits | |
139 | for role, created in created_mountpoint.items(): | |
140 | _delete_dir(ctx, role, created) | |
7c673cae FG |
141 | |
142 | # Execute any 'all' workunits | |
143 | if 'all' in clients: | |
144 | all_tasks = clients["all"] | |
145 | _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'), | |
c07f9fc5 | 146 | config.get('basedir', 'qa/workunits'), |
91327a77 AA |
147 | config.get('subdir'), timeout=timeout, |
148 | cleanup=cleanup) | |
7c673cae FG |
149 | |
150 | ||
151 | def _client_mountpoint(ctx, cluster, id_): | |
152 | """ | |
153 | Returns the path to the expected mountpoint for workunits running | |
154 | on some kind of filesystem. | |
155 | """ | |
156 | # for compatibility with tasks like ceph-fuse that aren't cluster-aware yet, | |
157 | # only include the cluster name in the dir if the cluster is not 'ceph' | |
158 | if cluster == 'ceph': | |
159 | dir_ = 'mnt.{0}'.format(id_) | |
160 | else: | |
161 | dir_ = 'mnt.{0}.{1}'.format(cluster, id_) | |
162 | return os.path.join(misc.get_testdir(ctx), dir_) | |
163 | ||
164 | ||
165 | def _delete_dir(ctx, role, created_mountpoint): | |
166 | """ | |
167 | Delete file used by this role, and delete the directory that this | |
168 | role appeared in. | |
169 | ||
170 | :param ctx: Context | |
171 | :param role: "role.#" where # is used for the role id. | |
172 | """ | |
173 | cluster, _, id_ = misc.split_role(role) | |
174 | remote = get_remote_for_role(ctx, role) | |
175 | mnt = _client_mountpoint(ctx, cluster, id_) | |
176 | client = os.path.join(mnt, 'client.{id}'.format(id=id_)) | |
177 | ||
178 | # Remove the directory inside the mount where the workunit ran | |
179 | remote.run( | |
180 | args=[ | |
181 | 'sudo', | |
182 | 'rm', | |
183 | '-rf', | |
184 | '--', | |
185 | client, | |
186 | ], | |
187 | ) | |
188 | log.info("Deleted dir {dir}".format(dir=client)) | |
189 | ||
190 | # If the mount was an artificially created dir, delete that too | |
191 | if created_mountpoint: | |
192 | remote.run( | |
193 | args=[ | |
194 | 'rmdir', | |
195 | '--', | |
196 | mnt, | |
197 | ], | |
198 | ) | |
199 | log.info("Deleted artificial mount point {dir}".format(dir=client)) | |
200 | ||
201 | ||
202 | def _make_scratch_dir(ctx, role, subdir): | |
203 | """ | |
204 | Make scratch directories for this role. This also makes the mount | |
205 | point if that directory does not exist. | |
206 | ||
207 | :param ctx: Context | |
208 | :param role: "role.#" where # is used for the role id. | |
209 | :param subdir: use this subdir (False if not used) | |
210 | """ | |
211 | created_mountpoint = False | |
212 | cluster, _, id_ = misc.split_role(role) | |
213 | remote = get_remote_for_role(ctx, role) | |
214 | dir_owner = remote.user | |
215 | mnt = _client_mountpoint(ctx, cluster, id_) | |
216 | # if neither kclient nor ceph-fuse are required for a workunit, | |
217 | # mnt may not exist. Stat and create the directory if it doesn't. | |
218 | try: | |
219 | remote.run( | |
220 | args=[ | |
221 | 'stat', | |
222 | '--', | |
223 | mnt, | |
224 | ], | |
225 | ) | |
226 | log.info('Did not need to create dir {dir}'.format(dir=mnt)) | |
227 | except CommandFailedError: | |
228 | remote.run( | |
229 | args=[ | |
230 | 'mkdir', | |
231 | '--', | |
232 | mnt, | |
233 | ], | |
234 | ) | |
235 | log.info('Created dir {dir}'.format(dir=mnt)) | |
236 | created_mountpoint = True | |
237 | ||
238 | if not subdir: | |
239 | subdir = 'client.{id}'.format(id=id_) | |
240 | ||
241 | if created_mountpoint: | |
242 | remote.run( | |
243 | args=[ | |
244 | 'cd', | |
245 | '--', | |
246 | mnt, | |
247 | run.Raw('&&'), | |
248 | 'mkdir', | |
249 | '--', | |
250 | subdir, | |
251 | ], | |
252 | ) | |
253 | else: | |
254 | remote.run( | |
255 | args=[ | |
256 | # cd first so this will fail if the mount point does | |
257 | # not exist; pure install -d will silently do the | |
258 | # wrong thing | |
259 | 'cd', | |
260 | '--', | |
261 | mnt, | |
262 | run.Raw('&&'), | |
263 | 'sudo', | |
264 | 'install', | |
265 | '-d', | |
266 | '-m', '0755', | |
267 | '--owner={user}'.format(user=dir_owner), | |
268 | '--', | |
269 | subdir, | |
270 | ], | |
271 | ) | |
272 | ||
273 | return created_mountpoint | |
274 | ||
275 | ||
91327a77 | 276 | def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=None, cleanup=True): |
7c673cae FG |
277 | """ |
278 | Make a scratch directory for each client in the cluster, and then for each | |
279 | test spawn _run_tests() for each role. | |
280 | ||
281 | See run_tests() for parameter documentation. | |
282 | """ | |
283 | is_client = misc.is_type('client') | |
284 | client_remotes = {} | |
285 | created_mountpoint = {} | |
286 | for remote, roles_for_host in ctx.cluster.remotes.items(): | |
287 | for role in roles_for_host: | |
288 | if is_client(role): | |
289 | client_remotes[role] = remote | |
290 | created_mountpoint[role] = _make_scratch_dir(ctx, role, subdir) | |
291 | ||
292 | for unit in tests: | |
293 | with parallel() as p: | |
294 | for role, remote in client_remotes.items(): | |
c07f9fc5 FG |
295 | p.spawn(_run_tests, ctx, refspec, role, [unit], env, |
296 | basedir, | |
297 | subdir, | |
7c673cae FG |
298 | timeout=timeout) |
299 | ||
11fdf7f2 | 300 | # cleanup the generated client directories |
91327a77 | 301 | if cleanup: |
91327a77 AA |
302 | for role, _ in client_remotes.items(): |
303 | _delete_dir(ctx, role, created_mountpoint[role]) | |
7c673cae FG |
304 | |
305 | ||
c07f9fc5 | 306 | def _run_tests(ctx, refspec, role, tests, env, basedir, |
9f95a23c TL |
307 | subdir=None, timeout=None, cleanup=True, |
308 | coverage_and_limits=True): | |
7c673cae FG |
309 | """ |
310 | Run the individual test. Create a scratch directory and then extract the | |
311 | workunits from git. Make the executables, and then run the tests. | |
312 | Clean up (remove files created) after the tests are finished. | |
313 | ||
314 | :param ctx: Context | |
315 | :param refspec: branch, sha1, or version tag used to identify this | |
316 | build | |
317 | :param tests: specific tests specified. | |
318 | :param env: environment set in yaml file. Could be None. | |
319 | :param subdir: subdirectory set in yaml file. Could be None | |
320 | :param timeout: If present, use the 'timeout' command on the remote host | |
321 | to limit execution time. Must be specified by a number | |
322 | followed by 's' for seconds, 'm' for minutes, 'h' for | |
323 | hours, or 'd' for days. If '0' or anything that evaluates | |
324 | to False is passed, the 'timeout' command is not used. | |
325 | """ | |
326 | testdir = misc.get_testdir(ctx) | |
f67539c2 | 327 | assert isinstance(role, str) |
7c673cae FG |
328 | cluster, type_, id_ = misc.split_role(role) |
329 | assert type_ == 'client' | |
330 | remote = get_remote_for_role(ctx, role) | |
331 | mnt = _client_mountpoint(ctx, cluster, id_) | |
332 | # subdir so we can remove and recreate this a lot without sudo | |
333 | if subdir is None: | |
334 | scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp') | |
335 | else: | |
336 | scratch_tmp = os.path.join(mnt, subdir) | |
337 | clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role) | |
c07f9fc5 FG |
338 | srcdir = '{cdir}/{basedir}'.format(cdir=clonedir, |
339 | basedir=basedir) | |
7c673cae FG |
340 | |
341 | git_url = teuth_config.get_ceph_qa_suite_git_url() | |
342 | # if we are running an upgrade test, and ceph-ci does not have branches like | |
343 | # `jewel`, so should use ceph.git as an alternative. | |
344 | try: | |
345 | remote.run(logger=log.getChild(role), | |
346 | args=refspec.clone(git_url, clonedir)) | |
347 | except CommandFailedError: | |
224ce89b WB |
348 | if git_url.endswith('/ceph-ci.git'): |
349 | alt_git_url = git_url.replace('/ceph-ci.git', '/ceph.git') | |
350 | elif git_url.endswith('/ceph-ci'): | |
351 | alt_git_url = re.sub(r'/ceph-ci$', '/ceph.git', git_url) | |
352 | else: | |
7c673cae | 353 | raise |
7c673cae FG |
354 | log.info( |
355 | "failed to check out '%s' from %s; will also try in %s", | |
356 | refspec, | |
357 | git_url, | |
358 | alt_git_url, | |
359 | ) | |
360 | remote.run(logger=log.getChild(role), | |
361 | args=refspec.clone(alt_git_url, clonedir)) | |
362 | remote.run( | |
363 | logger=log.getChild(role), | |
364 | args=[ | |
365 | 'cd', '--', srcdir, | |
366 | run.Raw('&&'), | |
367 | 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', | |
368 | run.Raw('&&'), | |
9f95a23c | 369 | 'find', '-executable', '-type', 'f', '-printf', r'%P\0', |
7c673cae FG |
370 | run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)), |
371 | ], | |
372 | ) | |
373 | ||
374 | workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role) | |
f67539c2 | 375 | workunits = sorted(remote.read_file(workunits_file).decode().split('\0')) |
7c673cae FG |
376 | assert workunits |
377 | ||
378 | try: | |
379 | assert isinstance(tests, list) | |
380 | for spec in tests: | |
f67539c2 TL |
381 | dir_or_fname, *optional_args = shlex.split(spec) |
382 | log.info('Running workunits matching %s on %s...', dir_or_fname, role) | |
383 | # match executables named "foo" or "foo/*" with workunit named | |
384 | # "foo" | |
385 | to_run = [w for w in workunits | |
386 | if os.path.commonpath([w, dir_or_fname]) == dir_or_fname] | |
7c673cae FG |
387 | if not to_run: |
388 | raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec)) | |
389 | for workunit in to_run: | |
390 | log.info('Running workunit %s...', workunit) | |
391 | args = [ | |
392 | 'mkdir', '-p', '--', scratch_tmp, | |
393 | run.Raw('&&'), | |
394 | 'cd', '--', scratch_tmp, | |
395 | run.Raw('&&'), | |
396 | run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'), | |
397 | run.Raw('CEPH_REF={ref}'.format(ref=refspec)), | |
398 | run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), | |
399 | run.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster)), | |
400 | run.Raw('CEPH_ID="{id}"'.format(id=id_)), | |
401 | run.Raw('PATH=$PATH:/usr/sbin'), | |
402 | run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)), | |
c07f9fc5 | 403 | run.Raw('CEPH_ROOT={dir}'.format(dir=clonedir)), |
b3b6e05e | 404 | run.Raw('CEPH_MNT={dir}'.format(dir=mnt)), |
7c673cae FG |
405 | ] |
406 | if env is not None: | |
9f95a23c | 407 | for var, val in env.items(): |
7c673cae FG |
408 | quoted_val = pipes.quote(val) |
409 | env_arg = '{var}={val}'.format(var=var, val=quoted_val) | |
410 | args.append(run.Raw(env_arg)) | |
9f95a23c TL |
411 | if coverage_and_limits: |
412 | args.extend([ | |
413 | 'adjust-ulimits', | |
414 | 'ceph-coverage', | |
415 | '{tdir}/archive/coverage'.format(tdir=testdir)]) | |
7c673cae FG |
416 | if timeout and timeout != '0': |
417 | args.extend(['timeout', timeout]) | |
418 | args.extend([ | |
419 | '{srcdir}/{workunit}'.format( | |
420 | srcdir=srcdir, | |
421 | workunit=workunit, | |
422 | ), | |
423 | ]) | |
424 | remote.run( | |
425 | logger=log.getChild(role), | |
f67539c2 | 426 | args=args + optional_args, |
7c673cae FG |
427 | label="workunit test {workunit}".format(workunit=workunit) |
428 | ) | |
91327a77 AA |
429 | if cleanup: |
430 | args=['sudo', 'rm', '-rf', '--', scratch_tmp] | |
f64942e4 | 431 | remote.run(logger=log.getChild(role), args=args, timeout=(60*60)) |
7c673cae FG |
432 | finally: |
433 | log.info('Stopping %s on %s...', tests, role) | |
91327a77 AA |
434 | args=['sudo', 'rm', '-rf', '--', workunits_file, clonedir] |
435 | # N.B. don't cleanup scratch_tmp! If the mount is broken then rm will hang. | |
7c673cae FG |
436 | remote.run( |
437 | logger=log.getChild(role), | |
91327a77 | 438 | args=args, |
7c673cae | 439 | ) |