]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/workunit.py
import ceph pacific 16.2.5
[ceph.git] / ceph / qa / tasks / workunit.py
CommitLineData
7c673cae
FG
1"""
2Workunit task -- Run ceph on sets of specific clients
3"""
4import logging
5import pipes
6import os
224ce89b 7import re
f67539c2 8import shlex
9f95a23c
TL
9
10from tasks.util import get_remote_for_role
11from tasks.util.workunit import get_refspec_after_overrides
7c673cae
FG
12
13from teuthology import misc
14from teuthology.config import config as teuth_config
15from teuthology.orchestra.run import CommandFailedError
16from teuthology.parallel import parallel
17from teuthology.orchestra import run
18
19log = logging.getLogger(__name__)
20
7c673cae
FG
21def task(ctx, config):
22 """
23 Run ceph on all workunits found under the specified path.
24
25 For example::
26
27 tasks:
28 - ceph:
29 - ceph-fuse: [client.0]
30 - workunit:
31 clients:
32 client.0: [direct_io, xattrs.sh]
33 client.1: [snaps]
34 branch: foo
35
36 You can also run a list of workunits on all clients:
37 tasks:
38 - ceph:
39 - ceph-fuse:
40 - workunit:
41 tag: v0.47
42 clients:
43 all: [direct_io, xattrs.sh, snaps]
44
45 If you have an "all" section it will run all the workunits
46 on each client simultaneously, AFTER running any workunits specified
47 for individual clients. (This prevents unintended simultaneous runs.)
48
49 To customize tests, you can specify environment variables as a dict. You
50 can also specify a time limit for each work unit (defaults to 3h):
51
52 tasks:
53 - ceph:
54 - ceph-fuse:
55 - workunit:
56 sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6
57 clients:
58 all: [snaps]
59 env:
60 FOO: bar
61 BAZ: quux
62 timeout: 3h
63
f67539c2
TL
64 You can also pass optional arguments to the found workunits:
65
66 tasks:
67 - workunit:
68 clients:
69 all:
70 - test-ceph-helpers.sh test_get_config
71
7c673cae
FG
72 This task supports roles that include a ceph cluster, e.g.::
73
74 tasks:
75 - ceph:
76 - workunit:
77 clients:
78 backup.client.0: [foo]
79 client.1: [bar] # cluster is implicitly 'ceph'
80
c07f9fc5
FG
81 You can also specify an alternative top-level dir to 'qa/workunits', like
82 'qa/standalone', with::
83
84 tasks:
85 - install:
86 - workunit:
87 basedir: qa/standalone
88 clients:
89 client.0:
90 - test-ceph-helpers.sh
91
7c673cae
FG
92 :param ctx: Context
93 :param config: Configuration
94 """
95 assert isinstance(config, dict)
96 assert isinstance(config.get('clients'), dict), \
97 'configuration must contain a dictionary of clients'
98
91327a77
AA
99 overrides = ctx.config.get('overrides', {})
100 refspec = get_refspec_after_overrides(config, overrides)
7c673cae 101 timeout = config.get('timeout', '3h')
91327a77 102 cleanup = config.get('cleanup', True)
7c673cae
FG
103
104 log.info('Pulling workunits from ref %s', refspec)
105
106 created_mountpoint = {}
107
108 if config.get('env') is not None:
109 assert isinstance(config['env'], dict), 'env must be a dictionary'
110 clients = config['clients']
111
112 # Create scratch dirs for any non-all workunits
113 log.info('Making a separate scratch dir for every client...')
9f95a23c 114 for role in clients.keys():
f67539c2 115 assert isinstance(role, str)
7c673cae
FG
116 if role == "all":
117 continue
118
119 assert 'client' in role
120 created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir'))
121 created_mountpoint[role] = created_mnt_dir
122
123 # Execute any non-all workunits
91327a77
AA
124 log.info("timeout={}".format(timeout))
125 log.info("cleanup={}".format(cleanup))
7c673cae 126 with parallel() as p:
9f95a23c 127 for role, tests in clients.items():
7c673cae
FG
128 if role != "all":
129 p.spawn(_run_tests, ctx, refspec, role, tests,
c07f9fc5
FG
130 config.get('env'),
131 basedir=config.get('basedir','qa/workunits'),
9f95a23c
TL
132 timeout=timeout,
133 cleanup=cleanup,
134 coverage_and_limits=not config.get('no_coverage_and_limits', None))
7c673cae 135
91327a77
AA
136 if cleanup:
137 # Clean up dirs from any non-all workunits
138 for role, created in created_mountpoint.items():
139 _delete_dir(ctx, role, created)
7c673cae
FG
140
141 # Execute any 'all' workunits
142 if 'all' in clients:
143 all_tasks = clients["all"]
144 _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'),
c07f9fc5 145 config.get('basedir', 'qa/workunits'),
91327a77
AA
146 config.get('subdir'), timeout=timeout,
147 cleanup=cleanup)
7c673cae
FG
148
149
150def _client_mountpoint(ctx, cluster, id_):
151 """
152 Returns the path to the expected mountpoint for workunits running
153 on some kind of filesystem.
154 """
155 # for compatibility with tasks like ceph-fuse that aren't cluster-aware yet,
156 # only include the cluster name in the dir if the cluster is not 'ceph'
157 if cluster == 'ceph':
158 dir_ = 'mnt.{0}'.format(id_)
159 else:
160 dir_ = 'mnt.{0}.{1}'.format(cluster, id_)
161 return os.path.join(misc.get_testdir(ctx), dir_)
162
163
164def _delete_dir(ctx, role, created_mountpoint):
165 """
166 Delete file used by this role, and delete the directory that this
167 role appeared in.
168
169 :param ctx: Context
170 :param role: "role.#" where # is used for the role id.
171 """
172 cluster, _, id_ = misc.split_role(role)
173 remote = get_remote_for_role(ctx, role)
174 mnt = _client_mountpoint(ctx, cluster, id_)
175 client = os.path.join(mnt, 'client.{id}'.format(id=id_))
176
177 # Remove the directory inside the mount where the workunit ran
178 remote.run(
179 args=[
180 'sudo',
181 'rm',
182 '-rf',
183 '--',
184 client,
185 ],
186 )
187 log.info("Deleted dir {dir}".format(dir=client))
188
189 # If the mount was an artificially created dir, delete that too
190 if created_mountpoint:
191 remote.run(
192 args=[
193 'rmdir',
194 '--',
195 mnt,
196 ],
197 )
198 log.info("Deleted artificial mount point {dir}".format(dir=client))
199
200
201def _make_scratch_dir(ctx, role, subdir):
202 """
203 Make scratch directories for this role. This also makes the mount
204 point if that directory does not exist.
205
206 :param ctx: Context
207 :param role: "role.#" where # is used for the role id.
208 :param subdir: use this subdir (False if not used)
209 """
210 created_mountpoint = False
211 cluster, _, id_ = misc.split_role(role)
212 remote = get_remote_for_role(ctx, role)
213 dir_owner = remote.user
214 mnt = _client_mountpoint(ctx, cluster, id_)
215 # if neither kclient nor ceph-fuse are required for a workunit,
216 # mnt may not exist. Stat and create the directory if it doesn't.
217 try:
218 remote.run(
219 args=[
220 'stat',
221 '--',
222 mnt,
223 ],
224 )
225 log.info('Did not need to create dir {dir}'.format(dir=mnt))
226 except CommandFailedError:
227 remote.run(
228 args=[
229 'mkdir',
230 '--',
231 mnt,
232 ],
233 )
234 log.info('Created dir {dir}'.format(dir=mnt))
235 created_mountpoint = True
236
237 if not subdir:
238 subdir = 'client.{id}'.format(id=id_)
239
240 if created_mountpoint:
241 remote.run(
242 args=[
243 'cd',
244 '--',
245 mnt,
246 run.Raw('&&'),
247 'mkdir',
248 '--',
249 subdir,
250 ],
251 )
252 else:
253 remote.run(
254 args=[
255 # cd first so this will fail if the mount point does
256 # not exist; pure install -d will silently do the
257 # wrong thing
258 'cd',
259 '--',
260 mnt,
261 run.Raw('&&'),
262 'sudo',
263 'install',
264 '-d',
265 '-m', '0755',
266 '--owner={user}'.format(user=dir_owner),
267 '--',
268 subdir,
269 ],
270 )
271
272 return created_mountpoint
273
274
91327a77 275def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=None, cleanup=True):
7c673cae
FG
276 """
277 Make a scratch directory for each client in the cluster, and then for each
278 test spawn _run_tests() for each role.
279
280 See run_tests() for parameter documentation.
281 """
282 is_client = misc.is_type('client')
283 client_remotes = {}
284 created_mountpoint = {}
285 for remote, roles_for_host in ctx.cluster.remotes.items():
286 for role in roles_for_host:
287 if is_client(role):
288 client_remotes[role] = remote
289 created_mountpoint[role] = _make_scratch_dir(ctx, role, subdir)
290
291 for unit in tests:
292 with parallel() as p:
293 for role, remote in client_remotes.items():
c07f9fc5
FG
294 p.spawn(_run_tests, ctx, refspec, role, [unit], env,
295 basedir,
296 subdir,
7c673cae
FG
297 timeout=timeout)
298
11fdf7f2 299 # cleanup the generated client directories
91327a77 300 if cleanup:
91327a77
AA
301 for role, _ in client_remotes.items():
302 _delete_dir(ctx, role, created_mountpoint[role])
7c673cae
FG
303
304
c07f9fc5 305def _run_tests(ctx, refspec, role, tests, env, basedir,
9f95a23c
TL
306 subdir=None, timeout=None, cleanup=True,
307 coverage_and_limits=True):
7c673cae
FG
308 """
309 Run the individual test. Create a scratch directory and then extract the
310 workunits from git. Make the executables, and then run the tests.
311 Clean up (remove files created) after the tests are finished.
312
313 :param ctx: Context
314 :param refspec: branch, sha1, or version tag used to identify this
315 build
316 :param tests: specific tests specified.
317 :param env: environment set in yaml file. Could be None.
318 :param subdir: subdirectory set in yaml file. Could be None
319 :param timeout: If present, use the 'timeout' command on the remote host
320 to limit execution time. Must be specified by a number
321 followed by 's' for seconds, 'm' for minutes, 'h' for
322 hours, or 'd' for days. If '0' or anything that evaluates
323 to False is passed, the 'timeout' command is not used.
324 """
325 testdir = misc.get_testdir(ctx)
f67539c2 326 assert isinstance(role, str)
7c673cae
FG
327 cluster, type_, id_ = misc.split_role(role)
328 assert type_ == 'client'
329 remote = get_remote_for_role(ctx, role)
330 mnt = _client_mountpoint(ctx, cluster, id_)
331 # subdir so we can remove and recreate this a lot without sudo
332 if subdir is None:
333 scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp')
334 else:
335 scratch_tmp = os.path.join(mnt, subdir)
336 clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role)
c07f9fc5
FG
337 srcdir = '{cdir}/{basedir}'.format(cdir=clonedir,
338 basedir=basedir)
7c673cae
FG
339
340 git_url = teuth_config.get_ceph_qa_suite_git_url()
341 # if we are running an upgrade test, and ceph-ci does not have branches like
342 # `jewel`, so should use ceph.git as an alternative.
343 try:
344 remote.run(logger=log.getChild(role),
345 args=refspec.clone(git_url, clonedir))
346 except CommandFailedError:
224ce89b
WB
347 if git_url.endswith('/ceph-ci.git'):
348 alt_git_url = git_url.replace('/ceph-ci.git', '/ceph.git')
349 elif git_url.endswith('/ceph-ci'):
350 alt_git_url = re.sub(r'/ceph-ci$', '/ceph.git', git_url)
351 else:
7c673cae 352 raise
7c673cae
FG
353 log.info(
354 "failed to check out '%s' from %s; will also try in %s",
355 refspec,
356 git_url,
357 alt_git_url,
358 )
359 remote.run(logger=log.getChild(role),
360 args=refspec.clone(alt_git_url, clonedir))
361 remote.run(
362 logger=log.getChild(role),
363 args=[
364 'cd', '--', srcdir,
365 run.Raw('&&'),
366 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi',
367 run.Raw('&&'),
9f95a23c 368 'find', '-executable', '-type', 'f', '-printf', r'%P\0',
7c673cae
FG
369 run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)),
370 ],
371 )
372
373 workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)
f67539c2 374 workunits = sorted(remote.read_file(workunits_file).decode().split('\0'))
7c673cae
FG
375 assert workunits
376
377 try:
378 assert isinstance(tests, list)
379 for spec in tests:
f67539c2
TL
380 dir_or_fname, *optional_args = shlex.split(spec)
381 log.info('Running workunits matching %s on %s...', dir_or_fname, role)
382 # match executables named "foo" or "foo/*" with workunit named
383 # "foo"
384 to_run = [w for w in workunits
385 if os.path.commonpath([w, dir_or_fname]) == dir_or_fname]
7c673cae
FG
386 if not to_run:
387 raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec))
388 for workunit in to_run:
389 log.info('Running workunit %s...', workunit)
390 args = [
391 'mkdir', '-p', '--', scratch_tmp,
392 run.Raw('&&'),
393 'cd', '--', scratch_tmp,
394 run.Raw('&&'),
395 run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'),
396 run.Raw('CEPH_REF={ref}'.format(ref=refspec)),
397 run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
398 run.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster)),
399 run.Raw('CEPH_ID="{id}"'.format(id=id_)),
400 run.Raw('PATH=$PATH:/usr/sbin'),
401 run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)),
c07f9fc5 402 run.Raw('CEPH_ROOT={dir}'.format(dir=clonedir)),
b3b6e05e 403 run.Raw('CEPH_MNT={dir}'.format(dir=mnt)),
7c673cae
FG
404 ]
405 if env is not None:
9f95a23c 406 for var, val in env.items():
7c673cae
FG
407 quoted_val = pipes.quote(val)
408 env_arg = '{var}={val}'.format(var=var, val=quoted_val)
409 args.append(run.Raw(env_arg))
9f95a23c
TL
410 if coverage_and_limits:
411 args.extend([
412 'adjust-ulimits',
413 'ceph-coverage',
414 '{tdir}/archive/coverage'.format(tdir=testdir)])
7c673cae
FG
415 if timeout and timeout != '0':
416 args.extend(['timeout', timeout])
417 args.extend([
418 '{srcdir}/{workunit}'.format(
419 srcdir=srcdir,
420 workunit=workunit,
421 ),
422 ])
423 remote.run(
424 logger=log.getChild(role),
f67539c2 425 args=args + optional_args,
7c673cae
FG
426 label="workunit test {workunit}".format(workunit=workunit)
427 )
91327a77
AA
428 if cleanup:
429 args=['sudo', 'rm', '-rf', '--', scratch_tmp]
f64942e4 430 remote.run(logger=log.getChild(role), args=args, timeout=(60*60))
7c673cae
FG
431 finally:
432 log.info('Stopping %s on %s...', tests, role)
91327a77
AA
433 args=['sudo', 'rm', '-rf', '--', workunits_file, clonedir]
434 # N.B. don't cleanup scratch_tmp! If the mount is broken then rm will hang.
7c673cae
FG
435 remote.run(
436 logger=log.getChild(role),
91327a77 437 args=args,
7c673cae 438 )