]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/workunit.py
import quincy beta 17.1.0
[ceph.git] / ceph / qa / tasks / workunit.py
CommitLineData
7c673cae
FG
1"""
2Workunit task -- Run ceph on sets of specific clients
3"""
4import logging
5import pipes
6import os
224ce89b 7import re
f67539c2 8import shlex
9f95a23c
TL
9
10from tasks.util import get_remote_for_role
11from tasks.util.workunit import get_refspec_after_overrides
7c673cae
FG
12
13from teuthology import misc
14from teuthology.config import config as teuth_config
20effc67 15from teuthology.exceptions import CommandFailedError
7c673cae
FG
16from teuthology.parallel import parallel
17from teuthology.orchestra import run
18
19log = logging.getLogger(__name__)
20
7c673cae
FG
21def task(ctx, config):
22 """
23 Run ceph on all workunits found under the specified path.
24
25 For example::
26
27 tasks:
28 - ceph:
29 - ceph-fuse: [client.0]
30 - workunit:
31 clients:
32 client.0: [direct_io, xattrs.sh]
33 client.1: [snaps]
34 branch: foo
35
36 You can also run a list of workunits on all clients:
37 tasks:
38 - ceph:
39 - ceph-fuse:
40 - workunit:
41 tag: v0.47
42 clients:
43 all: [direct_io, xattrs.sh, snaps]
44
45 If you have an "all" section it will run all the workunits
46 on each client simultaneously, AFTER running any workunits specified
47 for individual clients. (This prevents unintended simultaneous runs.)
48
49 To customize tests, you can specify environment variables as a dict. You
50 can also specify a time limit for each work unit (defaults to 3h):
51
52 tasks:
53 - ceph:
54 - ceph-fuse:
55 - workunit:
56 sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6
57 clients:
58 all: [snaps]
59 env:
60 FOO: bar
61 BAZ: quux
62 timeout: 3h
63
f67539c2
TL
64 You can also pass optional arguments to the found workunits:
65
66 tasks:
67 - workunit:
68 clients:
69 all:
70 - test-ceph-helpers.sh test_get_config
71
7c673cae
FG
72 This task supports roles that include a ceph cluster, e.g.::
73
74 tasks:
75 - ceph:
76 - workunit:
77 clients:
78 backup.client.0: [foo]
79 client.1: [bar] # cluster is implicitly 'ceph'
80
c07f9fc5
FG
81 You can also specify an alternative top-level dir to 'qa/workunits', like
82 'qa/standalone', with::
83
84 tasks:
85 - install:
86 - workunit:
87 basedir: qa/standalone
88 clients:
89 client.0:
90 - test-ceph-helpers.sh
91
7c673cae
FG
92 :param ctx: Context
93 :param config: Configuration
94 """
95 assert isinstance(config, dict)
96 assert isinstance(config.get('clients'), dict), \
97 'configuration must contain a dictionary of clients'
98
91327a77
AA
99 overrides = ctx.config.get('overrides', {})
100 refspec = get_refspec_after_overrides(config, overrides)
7c673cae 101 timeout = config.get('timeout', '3h')
91327a77 102 cleanup = config.get('cleanup', True)
7c673cae
FG
103
104 log.info('Pulling workunits from ref %s', refspec)
105
106 created_mountpoint = {}
107
108 if config.get('env') is not None:
109 assert isinstance(config['env'], dict), 'env must be a dictionary'
110 clients = config['clients']
111
112 # Create scratch dirs for any non-all workunits
113 log.info('Making a separate scratch dir for every client...')
9f95a23c 114 for role in clients.keys():
f67539c2 115 assert isinstance(role, str)
7c673cae
FG
116 if role == "all":
117 continue
118
119 assert 'client' in role
120 created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir'))
121 created_mountpoint[role] = created_mnt_dir
122
123 # Execute any non-all workunits
91327a77
AA
124 log.info("timeout={}".format(timeout))
125 log.info("cleanup={}".format(cleanup))
7c673cae 126 with parallel() as p:
9f95a23c 127 for role, tests in clients.items():
7c673cae
FG
128 if role != "all":
129 p.spawn(_run_tests, ctx, refspec, role, tests,
c07f9fc5
FG
130 config.get('env'),
131 basedir=config.get('basedir','qa/workunits'),
20effc67 132 subdir=config.get('subdir'),
9f95a23c
TL
133 timeout=timeout,
134 cleanup=cleanup,
135 coverage_and_limits=not config.get('no_coverage_and_limits', None))
7c673cae 136
91327a77
AA
137 if cleanup:
138 # Clean up dirs from any non-all workunits
139 for role, created in created_mountpoint.items():
140 _delete_dir(ctx, role, created)
7c673cae
FG
141
142 # Execute any 'all' workunits
143 if 'all' in clients:
144 all_tasks = clients["all"]
145 _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'),
c07f9fc5 146 config.get('basedir', 'qa/workunits'),
91327a77
AA
147 config.get('subdir'), timeout=timeout,
148 cleanup=cleanup)
7c673cae
FG
149
150
151def _client_mountpoint(ctx, cluster, id_):
152 """
153 Returns the path to the expected mountpoint for workunits running
154 on some kind of filesystem.
155 """
156 # for compatibility with tasks like ceph-fuse that aren't cluster-aware yet,
157 # only include the cluster name in the dir if the cluster is not 'ceph'
158 if cluster == 'ceph':
159 dir_ = 'mnt.{0}'.format(id_)
160 else:
161 dir_ = 'mnt.{0}.{1}'.format(cluster, id_)
162 return os.path.join(misc.get_testdir(ctx), dir_)
163
164
165def _delete_dir(ctx, role, created_mountpoint):
166 """
167 Delete file used by this role, and delete the directory that this
168 role appeared in.
169
170 :param ctx: Context
171 :param role: "role.#" where # is used for the role id.
172 """
173 cluster, _, id_ = misc.split_role(role)
174 remote = get_remote_for_role(ctx, role)
175 mnt = _client_mountpoint(ctx, cluster, id_)
176 client = os.path.join(mnt, 'client.{id}'.format(id=id_))
177
178 # Remove the directory inside the mount where the workunit ran
179 remote.run(
180 args=[
181 'sudo',
182 'rm',
183 '-rf',
184 '--',
185 client,
186 ],
187 )
188 log.info("Deleted dir {dir}".format(dir=client))
189
190 # If the mount was an artificially created dir, delete that too
191 if created_mountpoint:
192 remote.run(
193 args=[
194 'rmdir',
195 '--',
196 mnt,
197 ],
198 )
199 log.info("Deleted artificial mount point {dir}".format(dir=client))
200
201
202def _make_scratch_dir(ctx, role, subdir):
203 """
204 Make scratch directories for this role. This also makes the mount
205 point if that directory does not exist.
206
207 :param ctx: Context
208 :param role: "role.#" where # is used for the role id.
209 :param subdir: use this subdir (False if not used)
210 """
211 created_mountpoint = False
212 cluster, _, id_ = misc.split_role(role)
213 remote = get_remote_for_role(ctx, role)
214 dir_owner = remote.user
215 mnt = _client_mountpoint(ctx, cluster, id_)
216 # if neither kclient nor ceph-fuse are required for a workunit,
217 # mnt may not exist. Stat and create the directory if it doesn't.
218 try:
219 remote.run(
220 args=[
221 'stat',
222 '--',
223 mnt,
224 ],
225 )
226 log.info('Did not need to create dir {dir}'.format(dir=mnt))
227 except CommandFailedError:
228 remote.run(
229 args=[
230 'mkdir',
231 '--',
232 mnt,
233 ],
234 )
235 log.info('Created dir {dir}'.format(dir=mnt))
236 created_mountpoint = True
237
238 if not subdir:
239 subdir = 'client.{id}'.format(id=id_)
240
241 if created_mountpoint:
242 remote.run(
243 args=[
244 'cd',
245 '--',
246 mnt,
247 run.Raw('&&'),
248 'mkdir',
249 '--',
250 subdir,
251 ],
252 )
253 else:
254 remote.run(
255 args=[
256 # cd first so this will fail if the mount point does
257 # not exist; pure install -d will silently do the
258 # wrong thing
259 'cd',
260 '--',
261 mnt,
262 run.Raw('&&'),
263 'sudo',
264 'install',
265 '-d',
266 '-m', '0755',
267 '--owner={user}'.format(user=dir_owner),
268 '--',
269 subdir,
270 ],
271 )
272
273 return created_mountpoint
274
275
91327a77 276def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=None, cleanup=True):
7c673cae
FG
277 """
278 Make a scratch directory for each client in the cluster, and then for each
279 test spawn _run_tests() for each role.
280
281 See run_tests() for parameter documentation.
282 """
283 is_client = misc.is_type('client')
284 client_remotes = {}
285 created_mountpoint = {}
286 for remote, roles_for_host in ctx.cluster.remotes.items():
287 for role in roles_for_host:
288 if is_client(role):
289 client_remotes[role] = remote
290 created_mountpoint[role] = _make_scratch_dir(ctx, role, subdir)
291
292 for unit in tests:
293 with parallel() as p:
294 for role, remote in client_remotes.items():
c07f9fc5
FG
295 p.spawn(_run_tests, ctx, refspec, role, [unit], env,
296 basedir,
297 subdir,
7c673cae
FG
298 timeout=timeout)
299
11fdf7f2 300 # cleanup the generated client directories
91327a77 301 if cleanup:
91327a77
AA
302 for role, _ in client_remotes.items():
303 _delete_dir(ctx, role, created_mountpoint[role])
7c673cae
FG
304
305
c07f9fc5 306def _run_tests(ctx, refspec, role, tests, env, basedir,
9f95a23c
TL
307 subdir=None, timeout=None, cleanup=True,
308 coverage_and_limits=True):
7c673cae
FG
309 """
310 Run the individual test. Create a scratch directory and then extract the
311 workunits from git. Make the executables, and then run the tests.
312 Clean up (remove files created) after the tests are finished.
313
314 :param ctx: Context
315 :param refspec: branch, sha1, or version tag used to identify this
316 build
317 :param tests: specific tests specified.
318 :param env: environment set in yaml file. Could be None.
319 :param subdir: subdirectory set in yaml file. Could be None
320 :param timeout: If present, use the 'timeout' command on the remote host
321 to limit execution time. Must be specified by a number
322 followed by 's' for seconds, 'm' for minutes, 'h' for
323 hours, or 'd' for days. If '0' or anything that evaluates
324 to False is passed, the 'timeout' command is not used.
325 """
326 testdir = misc.get_testdir(ctx)
f67539c2 327 assert isinstance(role, str)
7c673cae
FG
328 cluster, type_, id_ = misc.split_role(role)
329 assert type_ == 'client'
330 remote = get_remote_for_role(ctx, role)
331 mnt = _client_mountpoint(ctx, cluster, id_)
332 # subdir so we can remove and recreate this a lot without sudo
333 if subdir is None:
334 scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp')
335 else:
336 scratch_tmp = os.path.join(mnt, subdir)
337 clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role)
c07f9fc5
FG
338 srcdir = '{cdir}/{basedir}'.format(cdir=clonedir,
339 basedir=basedir)
7c673cae
FG
340
341 git_url = teuth_config.get_ceph_qa_suite_git_url()
342 # if we are running an upgrade test, and ceph-ci does not have branches like
343 # `jewel`, so should use ceph.git as an alternative.
344 try:
345 remote.run(logger=log.getChild(role),
346 args=refspec.clone(git_url, clonedir))
347 except CommandFailedError:
224ce89b
WB
348 if git_url.endswith('/ceph-ci.git'):
349 alt_git_url = git_url.replace('/ceph-ci.git', '/ceph.git')
350 elif git_url.endswith('/ceph-ci'):
351 alt_git_url = re.sub(r'/ceph-ci$', '/ceph.git', git_url)
352 else:
7c673cae 353 raise
7c673cae
FG
354 log.info(
355 "failed to check out '%s' from %s; will also try in %s",
356 refspec,
357 git_url,
358 alt_git_url,
359 )
360 remote.run(logger=log.getChild(role),
361 args=refspec.clone(alt_git_url, clonedir))
362 remote.run(
363 logger=log.getChild(role),
364 args=[
365 'cd', '--', srcdir,
366 run.Raw('&&'),
367 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi',
368 run.Raw('&&'),
9f95a23c 369 'find', '-executable', '-type', 'f', '-printf', r'%P\0',
7c673cae
FG
370 run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)),
371 ],
372 )
373
374 workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)
f67539c2 375 workunits = sorted(remote.read_file(workunits_file).decode().split('\0'))
7c673cae
FG
376 assert workunits
377
378 try:
379 assert isinstance(tests, list)
380 for spec in tests:
f67539c2
TL
381 dir_or_fname, *optional_args = shlex.split(spec)
382 log.info('Running workunits matching %s on %s...', dir_or_fname, role)
383 # match executables named "foo" or "foo/*" with workunit named
384 # "foo"
385 to_run = [w for w in workunits
386 if os.path.commonpath([w, dir_or_fname]) == dir_or_fname]
7c673cae
FG
387 if not to_run:
388 raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec))
389 for workunit in to_run:
390 log.info('Running workunit %s...', workunit)
391 args = [
392 'mkdir', '-p', '--', scratch_tmp,
393 run.Raw('&&'),
394 'cd', '--', scratch_tmp,
395 run.Raw('&&'),
396 run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'),
397 run.Raw('CEPH_REF={ref}'.format(ref=refspec)),
398 run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
399 run.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster)),
400 run.Raw('CEPH_ID="{id}"'.format(id=id_)),
401 run.Raw('PATH=$PATH:/usr/sbin'),
402 run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)),
c07f9fc5 403 run.Raw('CEPH_ROOT={dir}'.format(dir=clonedir)),
b3b6e05e 404 run.Raw('CEPH_MNT={dir}'.format(dir=mnt)),
7c673cae
FG
405 ]
406 if env is not None:
9f95a23c 407 for var, val in env.items():
7c673cae
FG
408 quoted_val = pipes.quote(val)
409 env_arg = '{var}={val}'.format(var=var, val=quoted_val)
410 args.append(run.Raw(env_arg))
9f95a23c
TL
411 if coverage_and_limits:
412 args.extend([
413 'adjust-ulimits',
414 'ceph-coverage',
415 '{tdir}/archive/coverage'.format(tdir=testdir)])
7c673cae
FG
416 if timeout and timeout != '0':
417 args.extend(['timeout', timeout])
418 args.extend([
419 '{srcdir}/{workunit}'.format(
420 srcdir=srcdir,
421 workunit=workunit,
422 ),
423 ])
424 remote.run(
425 logger=log.getChild(role),
f67539c2 426 args=args + optional_args,
7c673cae
FG
427 label="workunit test {workunit}".format(workunit=workunit)
428 )
91327a77
AA
429 if cleanup:
430 args=['sudo', 'rm', '-rf', '--', scratch_tmp]
f64942e4 431 remote.run(logger=log.getChild(role), args=args, timeout=(60*60))
7c673cae
FG
432 finally:
433 log.info('Stopping %s on %s...', tests, role)
91327a77
AA
434 args=['sudo', 'rm', '-rf', '--', workunits_file, clonedir]
435 # N.B. don't cleanup scratch_tmp! If the mount is broken then rm will hang.
7c673cae
FG
436 remote.run(
437 logger=log.getChild(role),
91327a77 438 args=args,
7c673cae 439 )