]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/workunit.py
2 Workunit task -- Run ceph on sets of specific clients
9 from copy
import deepcopy
10 from util
import get_remote_for_role
12 from teuthology
import misc
13 from teuthology
.config
import config
as teuth_config
14 from teuthology
.orchestra
.run
import CommandFailedError
15 from teuthology
.parallel
import parallel
16 from teuthology
.orchestra
import run
18 log
= logging
.getLogger(__name__
)
22 def __init__(self
, refspec
):
23 self
.refspec
= refspec
28 def _clone(self
, git_url
, clonedir
, opts
=None):
31 return (['rm', '-rf', clonedir
] +
33 ['git', 'clone'] + opts
+
36 def _cd(self
, clonedir
):
37 return ['cd', clonedir
]
40 return ['git', 'checkout', self
.refspec
]
42 def clone(self
, git_url
, clonedir
):
43 return (self
._clone
(git_url
, clonedir
) +
50 class Branch(Refspec
):
51 def __init__(self
, tag
):
52 Refspec
.__init
__(self
, tag
)
54 def clone(self
, git_url
, clonedir
):
55 opts
= ['--depth', '1',
56 '--branch', self
.refspec
]
57 return (self
._clone
(git_url
, clonedir
, opts
) +
64 Refspec
.__init
__(self
, 'HEAD')
66 def clone(self
, git_url
, clonedir
):
67 opts
= ['--depth', '1']
68 return (self
._clone
(git_url
, clonedir
, opts
) +
73 def task(ctx
, config
):
75 Run ceph on all workunits found under the specified path.
81 - ceph-fuse: [client.0]
84 client.0: [direct_io, xattrs.sh]
88 You can also run a list of workunits on all clients:
95 all: [direct_io, xattrs.sh, snaps]
97 If you have an "all" section it will run all the workunits
98 on each client simultaneously, AFTER running any workunits specified
99 for individual clients. (This prevents unintended simultaneous runs.)
101 To customize tests, you can specify environment variables as a dict. You
102 can also specify a time limit for each work unit (defaults to 3h):
108 sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6
116 This task supports roles that include a ceph cluster, e.g.::
122 backup.client.0: [foo]
123 client.1: [bar] # cluster is implicitly 'ceph'
125 You can also specify an alternative top-level dir to 'qa/workunits', like
126 'qa/standalone', with::
131 basedir: qa/standalone
134 - test-ceph-helpers.sh
137 :param config: Configuration
139 assert isinstance(config
, dict)
140 assert isinstance(config
.get('clients'), dict), \
141 'configuration must contain a dictionary of clients'
143 # mimic the behavior of the "install" task, where the "overrides" are
144 # actually the defaults of that task. in other words, if none of "sha1",
145 # "tag", or "branch" is specified by a "workunit" tasks, we will update
146 # it with the information in the "workunit" sub-task nested in "overrides".
147 overrides
= deepcopy(ctx
.config
.get('overrides', {}).get('workunit', {}))
148 refspecs
= {'branch': Branch
, 'tag': Refspec
, 'sha1': Refspec
}
149 if any(map(lambda i
: i
in config
, refspecs
.iterkeys())):
150 for i
in refspecs
.iterkeys():
151 overrides
.pop(i
, None)
152 misc
.deep_merge(config
, overrides
)
154 for spec
, cls
in refspecs
.iteritems():
155 refspec
= config
.get(spec
)
157 refspec
= cls(refspec
)
162 timeout
= config
.get('timeout', '3h')
164 log
.info('Pulling workunits from ref %s', refspec
)
166 created_mountpoint
= {}
168 if config
.get('env') is not None:
169 assert isinstance(config
['env'], dict), 'env must be a dictionary'
170 clients
= config
['clients']
172 # Create scratch dirs for any non-all workunits
173 log
.info('Making a separate scratch dir for every client...')
174 for role
in clients
.iterkeys():
175 assert isinstance(role
, basestring
)
179 assert 'client' in role
180 created_mnt_dir
= _make_scratch_dir(ctx
, role
, config
.get('subdir'))
181 created_mountpoint
[role
] = created_mnt_dir
183 # Execute any non-all workunits
184 with
parallel() as p
:
185 for role
, tests
in clients
.iteritems():
187 p
.spawn(_run_tests
, ctx
, refspec
, role
, tests
,
189 basedir
=config
.get('basedir','qa/workunits'),
192 # Clean up dirs from any non-all workunits
193 for role
, created
in created_mountpoint
.items():
194 _delete_dir(ctx
, role
, created
)
196 # Execute any 'all' workunits
198 all_tasks
= clients
["all"]
199 _spawn_on_all_clients(ctx
, refspec
, all_tasks
, config
.get('env'),
200 config
.get('basedir', 'qa/workunits'),
201 config
.get('subdir'), timeout
=timeout
)
204 def _client_mountpoint(ctx
, cluster
, id_
):
206 Returns the path to the expected mountpoint for workunits running
207 on some kind of filesystem.
209 # for compatibility with tasks like ceph-fuse that aren't cluster-aware yet,
210 # only include the cluster name in the dir if the cluster is not 'ceph'
211 if cluster
== 'ceph':
212 dir_
= 'mnt.{0}'.format(id_
)
214 dir_
= 'mnt.{0}.{1}'.format(cluster
, id_
)
215 return os
.path
.join(misc
.get_testdir(ctx
), dir_
)
218 def _delete_dir(ctx
, role
, created_mountpoint
):
220 Delete file used by this role, and delete the directory that this
224 :param role: "role.#" where # is used for the role id.
226 cluster
, _
, id_
= misc
.split_role(role
)
227 remote
= get_remote_for_role(ctx
, role
)
228 mnt
= _client_mountpoint(ctx
, cluster
, id_
)
229 client
= os
.path
.join(mnt
, 'client.{id}'.format(id=id_
))
231 # Remove the directory inside the mount where the workunit ran
241 log
.info("Deleted dir {dir}".format(dir=client
))
243 # If the mount was an artificially created dir, delete that too
244 if created_mountpoint
:
252 log
.info("Deleted artificial mount point {dir}".format(dir=client
))
255 def _make_scratch_dir(ctx
, role
, subdir
):
257 Make scratch directories for this role. This also makes the mount
258 point if that directory does not exist.
261 :param role: "role.#" where # is used for the role id.
262 :param subdir: use this subdir (False if not used)
264 created_mountpoint
= False
265 cluster
, _
, id_
= misc
.split_role(role
)
266 remote
= get_remote_for_role(ctx
, role
)
267 dir_owner
= remote
.user
268 mnt
= _client_mountpoint(ctx
, cluster
, id_
)
269 # if neither kclient nor ceph-fuse are required for a workunit,
270 # mnt may not exist. Stat and create the directory if it doesn't.
279 log
.info('Did not need to create dir {dir}'.format(dir=mnt
))
280 except CommandFailedError
:
288 log
.info('Created dir {dir}'.format(dir=mnt
))
289 created_mountpoint
= True
292 subdir
= 'client.{id}'.format(id=id_
)
294 if created_mountpoint
:
309 # cd first so this will fail if the mount point does
310 # not exist; pure install -d will silently do the
320 '--owner={user}'.format(user
=dir_owner
),
326 return created_mountpoint
329 def _spawn_on_all_clients(ctx
, refspec
, tests
, env
, basedir
, subdir
, timeout
=None):
331 Make a scratch directory for each client in the cluster, and then for each
332 test spawn _run_tests() for each role.
334 See run_tests() for parameter documentation.
336 is_client
= misc
.is_type('client')
338 created_mountpoint
= {}
339 for remote
, roles_for_host
in ctx
.cluster
.remotes
.items():
340 for role
in roles_for_host
:
342 client_remotes
[role
] = remote
343 created_mountpoint
[role
] = _make_scratch_dir(ctx
, role
, subdir
)
346 with
parallel() as p
:
347 for role
, remote
in client_remotes
.items():
348 p
.spawn(_run_tests
, ctx
, refspec
, role
, [unit
], env
,
353 # cleanup the generated client directories
354 for role
, _
in client_remotes
.items():
355 _delete_dir(ctx
, role
, created_mountpoint
[role
])
358 def _run_tests(ctx
, refspec
, role
, tests
, env
, basedir
,
359 subdir
=None, timeout
=None):
361 Run the individual test. Create a scratch directory and then extract the
362 workunits from git. Make the executables, and then run the tests.
363 Clean up (remove files created) after the tests are finished.
366 :param refspec: branch, sha1, or version tag used to identify this
368 :param tests: specific tests specified.
369 :param env: environment set in yaml file. Could be None.
370 :param subdir: subdirectory set in yaml file. Could be None
371 :param timeout: If present, use the 'timeout' command on the remote host
372 to limit execution time. Must be specified by a number
373 followed by 's' for seconds, 'm' for minutes, 'h' for
374 hours, or 'd' for days. If '0' or anything that evaluates
375 to False is passed, the 'timeout' command is not used.
377 testdir
= misc
.get_testdir(ctx
)
378 assert isinstance(role
, basestring
)
379 cluster
, type_
, id_
= misc
.split_role(role
)
380 assert type_
== 'client'
381 remote
= get_remote_for_role(ctx
, role
)
382 mnt
= _client_mountpoint(ctx
, cluster
, id_
)
383 # subdir so we can remove and recreate this a lot without sudo
385 scratch_tmp
= os
.path
.join(mnt
, 'client.{id}'.format(id=id_
), 'tmp')
387 scratch_tmp
= os
.path
.join(mnt
, subdir
)
388 clonedir
= '{tdir}/clone.{role}'.format(tdir
=testdir
, role
=role
)
389 srcdir
= '{cdir}/{basedir}'.format(cdir
=clonedir
,
392 git_url
= teuth_config
.get_ceph_qa_suite_git_url()
393 # if we are running an upgrade test, and ceph-ci does not have branches like
394 # `jewel`, so should use ceph.git as an alternative.
396 remote
.run(logger
=log
.getChild(role
),
397 args
=refspec
.clone(git_url
, clonedir
))
398 except CommandFailedError
:
399 if git_url
.endswith('/ceph-ci.git'):
400 alt_git_url
= git_url
.replace('/ceph-ci.git', '/ceph.git')
401 elif git_url
.endswith('/ceph-ci'):
402 alt_git_url
= re
.sub(r
'/ceph-ci$', '/ceph.git', git_url
)
406 "failed to check out '%s' from %s; will also try in %s",
411 remote
.run(logger
=log
.getChild(role
),
412 args
=refspec
.clone(alt_git_url
, clonedir
))
414 logger
=log
.getChild(role
),
418 'if', 'test', '-e', 'Makefile', run
.Raw(';'), 'then', 'make', run
.Raw(';'), 'fi',
420 'find', '-executable', '-type', 'f', '-printf', r
'%P\0'.format(srcdir
=srcdir
),
421 run
.Raw('>{tdir}/workunits.list.{role}'.format(tdir
=testdir
, role
=role
)),
425 workunits_file
= '{tdir}/workunits.list.{role}'.format(tdir
=testdir
, role
=role
)
426 workunits
= sorted(misc
.get_file(remote
, workunits_file
).split('\0'))
430 assert isinstance(tests
, list)
432 log
.info('Running workunits matching %s on %s...', spec
, role
)
433 prefix
= '{spec}/'.format(spec
=spec
)
434 to_run
= [w
for w
in workunits
if w
== spec
or w
.startswith(prefix
)]
436 raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec
=spec
))
437 for workunit
in to_run
:
438 log
.info('Running workunit %s...', workunit
)
440 'mkdir', '-p', '--', scratch_tmp
,
442 'cd', '--', scratch_tmp
,
444 run
.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'),
445 run
.Raw('CEPH_REF={ref}'.format(ref
=refspec
)),
446 run
.Raw('TESTDIR="{tdir}"'.format(tdir
=testdir
)),
447 run
.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster
)),
448 run
.Raw('CEPH_ID="{id}"'.format(id=id_
)),
449 run
.Raw('PATH=$PATH:/usr/sbin'),
450 run
.Raw('CEPH_BASE={dir}'.format(dir=clonedir
)),
451 run
.Raw('CEPH_ROOT={dir}'.format(dir=clonedir
)),
454 for var
, val
in env
.iteritems():
455 quoted_val
= pipes
.quote(val
)
456 env_arg
= '{var}={val}'.format(var
=var
, val
=quoted_val
)
457 args
.append(run
.Raw(env_arg
))
461 '{tdir}/archive/coverage'.format(tdir
=testdir
)])
462 if timeout
and timeout
!= '0':
463 args
.extend(['timeout', timeout
])
465 '{srcdir}/{workunit}'.format(
471 logger
=log
.getChild(role
),
473 label
="workunit test {workunit}".format(workunit
=workunit
)
476 logger
=log
.getChild(role
),
477 args
=['sudo', 'rm', '-rf', '--', scratch_tmp
],
480 log
.info('Stopping %s on %s...', tests
, role
)
482 logger
=log
.getChild(role
),
484 'rm', '-rf', '--', workunits_file
, clonedir
,