]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/workunit.py
2 Workunit task -- Run ceph on sets of specific clients
9 from copy
import deepcopy
10 from util
import get_remote_for_role
12 from teuthology
import misc
13 from teuthology
.config
import config
as teuth_config
14 from teuthology
.orchestra
.run
import CommandFailedError
15 from teuthology
.parallel
import parallel
16 from teuthology
.orchestra
import run
18 log
= logging
.getLogger(__name__
)
22 def __init__(self
, refspec
):
23 self
.refspec
= refspec
28 def _clone(self
, git_url
, clonedir
, opts
=None):
31 return (['rm', '-rf', clonedir
] +
33 ['git', 'clone'] + opts
+
36 def _cd(self
, clonedir
):
37 return ['cd', clonedir
]
40 return ['git', 'checkout', self
.refspec
]
42 def clone(self
, git_url
, clonedir
):
43 return (self
._clone
(git_url
, clonedir
) +
50 class Branch(Refspec
):
51 def __init__(self
, tag
):
52 Refspec
.__init
__(self
, tag
)
54 def clone(self
, git_url
, clonedir
):
55 opts
= ['--depth', '1',
56 '--branch', self
.refspec
]
57 return (self
._clone
(git_url
, clonedir
, opts
) +
64 Refspec
.__init
__(self
, 'HEAD')
66 def clone(self
, git_url
, clonedir
):
67 opts
= ['--depth', '1']
68 return (self
._clone
(git_url
, clonedir
, opts
) +
73 def task(ctx
, config
):
75 Run ceph on all workunits found under the specified path.
81 - ceph-fuse: [client.0]
84 client.0: [direct_io, xattrs.sh]
88 You can also run a list of workunits on all clients:
95 all: [direct_io, xattrs.sh, snaps]
97 If you have an "all" section it will run all the workunits
98 on each client simultaneously, AFTER running any workunits specified
99 for individual clients. (This prevents unintended simultaneous runs.)
101 To customize tests, you can specify environment variables as a dict. You
102 can also specify a time limit for each work unit (defaults to 3h):
108 sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6
116 This task supports roles that include a ceph cluster, e.g.::
122 backup.client.0: [foo]
123 client.1: [bar] # cluster is implicitly 'ceph'
126 :param config: Configuration
128 assert isinstance(config
, dict)
129 assert isinstance(config
.get('clients'), dict), \
130 'configuration must contain a dictionary of clients'
132 # mimic the behavior of the "install" task, where the "overrides" are
133 # actually the defaults of that task. in other words, if none of "sha1",
134 # "tag", or "branch" is specified by a "workunit" tasks, we will update
135 # it with the information in the "workunit" sub-task nested in "overrides".
136 overrides
= deepcopy(ctx
.config
.get('overrides', {}).get('workunit', {}))
137 refspecs
= {'branch': Branch
, 'tag': Refspec
, 'sha1': Refspec
}
138 if any(map(lambda i
: i
in config
, refspecs
.iterkeys())):
139 for i
in refspecs
.iterkeys():
140 overrides
.pop(i
, None)
141 misc
.deep_merge(config
, overrides
)
143 for spec
, cls
in refspecs
.iteritems():
144 refspec
= config
.get(spec
)
146 refspec
= cls(refspec
)
151 timeout
= config
.get('timeout', '3h')
153 log
.info('Pulling workunits from ref %s', refspec
)
155 created_mountpoint
= {}
157 if config
.get('env') is not None:
158 assert isinstance(config
['env'], dict), 'env must be a dictionary'
159 clients
= config
['clients']
161 # Create scratch dirs for any non-all workunits
162 log
.info('Making a separate scratch dir for every client...')
163 for role
in clients
.iterkeys():
164 assert isinstance(role
, basestring
)
168 assert 'client' in role
169 created_mnt_dir
= _make_scratch_dir(ctx
, role
, config
.get('subdir'))
170 created_mountpoint
[role
] = created_mnt_dir
172 # Execute any non-all workunits
173 with
parallel() as p
:
174 for role
, tests
in clients
.iteritems():
176 p
.spawn(_run_tests
, ctx
, refspec
, role
, tests
,
177 config
.get('env'), timeout
=timeout
)
179 # Clean up dirs from any non-all workunits
180 for role
, created
in created_mountpoint
.items():
181 _delete_dir(ctx
, role
, created
)
183 # Execute any 'all' workunits
185 all_tasks
= clients
["all"]
186 _spawn_on_all_clients(ctx
, refspec
, all_tasks
, config
.get('env'),
187 config
.get('subdir'), timeout
=timeout
)
190 def _client_mountpoint(ctx
, cluster
, id_
):
192 Returns the path to the expected mountpoint for workunits running
193 on some kind of filesystem.
195 # for compatibility with tasks like ceph-fuse that aren't cluster-aware yet,
196 # only include the cluster name in the dir if the cluster is not 'ceph'
197 if cluster
== 'ceph':
198 dir_
= 'mnt.{0}'.format(id_
)
200 dir_
= 'mnt.{0}.{1}'.format(cluster
, id_
)
201 return os
.path
.join(misc
.get_testdir(ctx
), dir_
)
204 def _delete_dir(ctx
, role
, created_mountpoint
):
206 Delete file used by this role, and delete the directory that this
210 :param role: "role.#" where # is used for the role id.
212 cluster
, _
, id_
= misc
.split_role(role
)
213 remote
= get_remote_for_role(ctx
, role
)
214 mnt
= _client_mountpoint(ctx
, cluster
, id_
)
215 client
= os
.path
.join(mnt
, 'client.{id}'.format(id=id_
))
217 # Remove the directory inside the mount where the workunit ran
227 log
.info("Deleted dir {dir}".format(dir=client
))
229 # If the mount was an artificially created dir, delete that too
230 if created_mountpoint
:
238 log
.info("Deleted artificial mount point {dir}".format(dir=client
))
241 def _make_scratch_dir(ctx
, role
, subdir
):
243 Make scratch directories for this role. This also makes the mount
244 point if that directory does not exist.
247 :param role: "role.#" where # is used for the role id.
248 :param subdir: use this subdir (False if not used)
250 created_mountpoint
= False
251 cluster
, _
, id_
= misc
.split_role(role
)
252 remote
= get_remote_for_role(ctx
, role
)
253 dir_owner
= remote
.user
254 mnt
= _client_mountpoint(ctx
, cluster
, id_
)
255 # if neither kclient nor ceph-fuse are required for a workunit,
256 # mnt may not exist. Stat and create the directory if it doesn't.
265 log
.info('Did not need to create dir {dir}'.format(dir=mnt
))
266 except CommandFailedError
:
274 log
.info('Created dir {dir}'.format(dir=mnt
))
275 created_mountpoint
= True
278 subdir
= 'client.{id}'.format(id=id_
)
280 if created_mountpoint
:
295 # cd first so this will fail if the mount point does
296 # not exist; pure install -d will silently do the
306 '--owner={user}'.format(user
=dir_owner
),
312 return created_mountpoint
315 def _spawn_on_all_clients(ctx
, refspec
, tests
, env
, subdir
, timeout
=None):
317 Make a scratch directory for each client in the cluster, and then for each
318 test spawn _run_tests() for each role.
320 See run_tests() for parameter documentation.
322 is_client
= misc
.is_type('client')
324 created_mountpoint
= {}
325 for remote
, roles_for_host
in ctx
.cluster
.remotes
.items():
326 for role
in roles_for_host
:
328 client_remotes
[role
] = remote
329 created_mountpoint
[role
] = _make_scratch_dir(ctx
, role
, subdir
)
332 with
parallel() as p
:
333 for role
, remote
in client_remotes
.items():
334 p
.spawn(_run_tests
, ctx
, refspec
, role
, [unit
], env
, subdir
,
337 # cleanup the generated client directories
338 for role
, _
in client_remotes
.items():
339 _delete_dir(ctx
, role
, created_mountpoint
[role
])
342 def _run_tests(ctx
, refspec
, role
, tests
, env
, subdir
=None, timeout
=None):
344 Run the individual test. Create a scratch directory and then extract the
345 workunits from git. Make the executables, and then run the tests.
346 Clean up (remove files created) after the tests are finished.
349 :param refspec: branch, sha1, or version tag used to identify this
351 :param tests: specific tests specified.
352 :param env: environment set in yaml file. Could be None.
353 :param subdir: subdirectory set in yaml file. Could be None
354 :param timeout: If present, use the 'timeout' command on the remote host
355 to limit execution time. Must be specified by a number
356 followed by 's' for seconds, 'm' for minutes, 'h' for
357 hours, or 'd' for days. If '0' or anything that evaluates
358 to False is passed, the 'timeout' command is not used.
360 testdir
= misc
.get_testdir(ctx
)
361 assert isinstance(role
, basestring
)
362 cluster
, type_
, id_
= misc
.split_role(role
)
363 assert type_
== 'client'
364 remote
= get_remote_for_role(ctx
, role
)
365 mnt
= _client_mountpoint(ctx
, cluster
, id_
)
366 # subdir so we can remove and recreate this a lot without sudo
368 scratch_tmp
= os
.path
.join(mnt
, 'client.{id}'.format(id=id_
), 'tmp')
370 scratch_tmp
= os
.path
.join(mnt
, subdir
)
371 clonedir
= '{tdir}/clone.{role}'.format(tdir
=testdir
, role
=role
)
372 srcdir
= '{cdir}/qa/workunits'.format(cdir
=clonedir
)
374 git_url
= teuth_config
.get_ceph_qa_suite_git_url()
375 # if we are running an upgrade test, and ceph-ci does not have branches like
376 # `jewel`, so should use ceph.git as an alternative.
378 remote
.run(logger
=log
.getChild(role
),
379 args
=refspec
.clone(git_url
, clonedir
))
380 except CommandFailedError
:
381 if git_url
.endswith('/ceph-ci.git'):
382 alt_git_url
= git_url
.replace('/ceph-ci.git', '/ceph.git')
383 elif git_url
.endswith('/ceph-ci'):
384 alt_git_url
= re
.sub(r
'/ceph-ci$', '/ceph.git', git_url
)
388 "failed to check out '%s' from %s; will also try in %s",
393 remote
.run(logger
=log
.getChild(role
),
394 args
=refspec
.clone(alt_git_url
, clonedir
))
396 logger
=log
.getChild(role
),
400 'if', 'test', '-e', 'Makefile', run
.Raw(';'), 'then', 'make', run
.Raw(';'), 'fi',
402 'find', '-executable', '-type', 'f', '-printf', r
'%P\0'.format(srcdir
=srcdir
),
403 run
.Raw('>{tdir}/workunits.list.{role}'.format(tdir
=testdir
, role
=role
)),
407 workunits_file
= '{tdir}/workunits.list.{role}'.format(tdir
=testdir
, role
=role
)
408 workunits
= sorted(misc
.get_file(remote
, workunits_file
).split('\0'))
412 assert isinstance(tests
, list)
414 log
.info('Running workunits matching %s on %s...', spec
, role
)
415 prefix
= '{spec}/'.format(spec
=spec
)
416 to_run
= [w
for w
in workunits
if w
== spec
or w
.startswith(prefix
)]
418 raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec
=spec
))
419 for workunit
in to_run
:
420 log
.info('Running workunit %s...', workunit
)
422 'mkdir', '-p', '--', scratch_tmp
,
424 'cd', '--', scratch_tmp
,
426 run
.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'),
427 run
.Raw('CEPH_REF={ref}'.format(ref
=refspec
)),
428 run
.Raw('TESTDIR="{tdir}"'.format(tdir
=testdir
)),
429 run
.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster
)),
430 run
.Raw('CEPH_ID="{id}"'.format(id=id_
)),
431 run
.Raw('PATH=$PATH:/usr/sbin'),
432 run
.Raw('CEPH_BASE={dir}'.format(dir=clonedir
)),
435 for var
, val
in env
.iteritems():
436 quoted_val
= pipes
.quote(val
)
437 env_arg
= '{var}={val}'.format(var
=var
, val
=quoted_val
)
438 args
.append(run
.Raw(env_arg
))
442 '{tdir}/archive/coverage'.format(tdir
=testdir
)])
443 if timeout
and timeout
!= '0':
444 args
.extend(['timeout', timeout
])
446 '{srcdir}/{workunit}'.format(
452 logger
=log
.getChild(role
),
454 label
="workunit test {workunit}".format(workunit
=workunit
)
457 logger
=log
.getChild(role
),
458 args
=['sudo', 'rm', '-rf', '--', scratch_tmp
],
461 log
.info('Stopping %s on %s...', tests
, role
)
463 logger
=log
.getChild(role
),
465 'rm', '-rf', '--', workunits_file
, clonedir
,