[ceph.git] / ceph / qa / tasks / workunit.py

"""
Workunit task -- Run ceph on sets of specific clients
"""
import logging
import pipes
import os
import re

from copy import deepcopy
from util import get_remote_for_role

from teuthology import misc
from teuthology.config import config as teuth_config
from teuthology.orchestra.run import CommandFailedError
from teuthology.parallel import parallel
from teuthology.orchestra import run

log = logging.getLogger(__name__)


class Refspec:
    def __init__(self, refspec):
        self.refspec = refspec

    def __str__(self):
        return self.refspec

    def _clone(self, git_url, clonedir, opts=None):
        if opts is None:
            opts = []
        return (['rm', '-rf', clonedir] +
                [run.Raw('&&')] +
                ['git', 'clone'] + opts +
                [git_url, clonedir])

    def _cd(self, clonedir):
        return ['cd', clonedir]

    def _checkout(self):
        return ['git', 'checkout', self.refspec]

    def clone(self, git_url, clonedir):
        return (self._clone(git_url, clonedir) +
                [run.Raw('&&')] +
                self._cd(clonedir) +
                [run.Raw('&&')] +
                self._checkout())


class Branch(Refspec):
    def __init__(self, tag):
        Refspec.__init__(self, tag)

    def clone(self, git_url, clonedir):
        opts = ['--depth', '1',
                '--branch', self.refspec]
        return (self._clone(git_url, clonedir, opts) +
                [run.Raw('&&')] +
                self._cd(clonedir))


class Head(Refspec):
    def __init__(self):
        Refspec.__init__(self, 'HEAD')

    def clone(self, git_url, clonedir):
        opts = ['--depth', '1']
        return (self._clone(git_url, clonedir, opts) +
                [run.Raw('&&')] +
                self._cd(clonedir))


def task(ctx, config):
    """
    Run ceph on all workunits found under the specified path.

    For example::

        tasks:
        - ceph:
        - ceph-fuse: [client.0]
        - workunit:
            clients:
              client.0: [direct_io, xattrs.sh]
              client.1: [snaps]
            branch: foo

    You can also run a list of workunits on all clients:
        tasks:
        - ceph:
        - ceph-fuse:
        - workunit:
            tag: v0.47
            clients:
              all: [direct_io, xattrs.sh, snaps]

    If you have an "all" section it will run all the workunits
    on each client simultaneously, AFTER running any workunits specified
    for individual clients. (This prevents unintended simultaneous runs.)

    To customize tests, you can specify environment variables as a dict. You
    can also specify a time limit for each work unit (defaults to 3h):

        tasks:
        - ceph:
        - ceph-fuse:
        - workunit:
            sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6
            clients:
              all: [snaps]
            env:
              FOO: bar
              BAZ: quux
            timeout: 3h

    This task supports roles that include a ceph cluster, e.g.::

        tasks:
        - ceph:
        - workunit:
            clients:
              backup.client.0: [foo]
              client.1: [bar] # cluster is implicitly 'ceph'

    :param ctx: Context
    :param config: Configuration
    """
    assert isinstance(config, dict)
    assert isinstance(config.get('clients'), dict), \
        'configuration must contain a dictionary of clients'

    # mimic the behavior of the "install" task, where the "overrides" are
    # actually the defaults of that task. in other words, if none of "sha1",
    # "tag", or "branch" is specified by a "workunit" tasks, we will update
    # it with the information in the "workunit" sub-task nested in "overrides".
    overrides = deepcopy(ctx.config.get('overrides', {}).get('workunit', {}))
    refspecs = {'branch': Branch, 'tag': Refspec, 'sha1': Refspec}
    if any(map(lambda i: i in config, refspecs.iterkeys())):
        for i in refspecs.iterkeys():
            overrides.pop(i, None)
    misc.deep_merge(config, overrides)

    for spec, cls in refspecs.iteritems():
        refspec = config.get(spec)
        if refspec:
            refspec = cls(refspec)
            break
    if refspec is None:
        refspec = Head()

    timeout = config.get('timeout', '3h')

    log.info('Pulling workunits from ref %s', refspec)

    created_mountpoint = {}

    if config.get('env') is not None:
        assert isinstance(config['env'], dict), 'env must be a dictionary'
    clients = config['clients']

    # Create scratch dirs for any non-all workunits
    log.info('Making a separate scratch dir for every client...')
    for role in clients.iterkeys():
        assert isinstance(role, basestring)
        if role == "all":
            continue

        assert 'client' in role
        created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir'))
        created_mountpoint[role] = created_mnt_dir

    # Execute any non-all workunits
    with parallel() as p:
        for role, tests in clients.iteritems():
            if role != "all":
                p.spawn(_run_tests, ctx, refspec, role, tests,
                        config.get('env'), timeout=timeout)

    # Clean up dirs from any non-all workunits
    for role, created in created_mountpoint.items():
        _delete_dir(ctx, role, created)

    # Execute any 'all' workunits
    if 'all' in clients:
        all_tasks = clients["all"]
        _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'),
                              config.get('subdir'), timeout=timeout)


def _client_mountpoint(ctx, cluster, id_):
    """
    Returns the path to the expected mountpoint for workunits running
    on some kind of filesystem.
    """
    # for compatibility with tasks like ceph-fuse that aren't cluster-aware yet,
    # only include the cluster name in the dir if the cluster is not 'ceph'
    if cluster == 'ceph':
        dir_ = 'mnt.{0}'.format(id_)
    else:
        dir_ = 'mnt.{0}.{1}'.format(cluster, id_)
    return os.path.join(misc.get_testdir(ctx), dir_)


def _delete_dir(ctx, role, created_mountpoint):
    """
    Delete file used by this role, and delete the directory that this
    role appeared in.

    :param ctx: Context
    :param role: "role.#" where # is used for the role id.
    """
    cluster, _, id_ = misc.split_role(role)
    remote = get_remote_for_role(ctx, role)
    mnt = _client_mountpoint(ctx, cluster, id_)
    client = os.path.join(mnt, 'client.{id}'.format(id=id_))

    # Remove the directory inside the mount where the workunit ran
    remote.run(
        args=[
            'sudo',
            'rm',
            '-rf',
            '--',
            client,
        ],
    )
    log.info("Deleted dir {dir}".format(dir=client))

    # If the mount was an artificially created dir, delete that too
    if created_mountpoint:
        remote.run(
            args=[
                'rmdir',
                '--',
                mnt,
            ],
        )
        log.info("Deleted artificial mount point {dir}".format(dir=client))


def _make_scratch_dir(ctx, role, subdir):
    """
    Make scratch directories for this role.  This also makes the mount
    point if that directory does not exist.

    :param ctx: Context
    :param role: "role.#" where # is used for the role id.
    :param subdir: use this subdir (False if not used)
    """
    created_mountpoint = False
    cluster, _, id_ = misc.split_role(role)
    remote = get_remote_for_role(ctx, role)
    dir_owner = remote.user
    mnt = _client_mountpoint(ctx, cluster, id_)
    # if neither kclient nor ceph-fuse are required for a workunit,
    # mnt may not exist. Stat and create the directory if it doesn't.
    try:
        remote.run(
            args=[
                'stat',
                '--',
                mnt,
            ],
        )
        log.info('Did not need to create dir {dir}'.format(dir=mnt))
    except CommandFailedError:
        remote.run(
            args=[
                'mkdir',
                '--',
                mnt,
            ],
        )
        log.info('Created dir {dir}'.format(dir=mnt))
        created_mountpoint = True

    if not subdir:
        subdir = 'client.{id}'.format(id=id_)

    if created_mountpoint:
        remote.run(
            args=[
                'cd',
                '--',
                mnt,
                run.Raw('&&'),
                'mkdir',
                '--',
                subdir,
            ],
        )
    else:
        remote.run(
            args=[
                # cd first so this will fail if the mount point does
                # not exist; pure install -d will silently do the
                # wrong thing
                'cd',
                '--',
                mnt,
                run.Raw('&&'),
                'sudo',
                'install',
                '-d',
                '-m', '0755',
                '--owner={user}'.format(user=dir_owner),
                '--',
                subdir,
            ],
        )

    return created_mountpoint


def _spawn_on_all_clients(ctx, refspec, tests, env, subdir, timeout=None):
    """
    Make a scratch directory for each client in the cluster, and then for each
    test spawn _run_tests() for each role.

    See run_tests() for parameter documentation.
    """
    is_client = misc.is_type('client')
    client_remotes = {}
    created_mountpoint = {}
    for remote, roles_for_host in ctx.cluster.remotes.items():
        for role in roles_for_host:
            if is_client(role):
                client_remotes[role] = remote
                created_mountpoint[role] = _make_scratch_dir(ctx, role, subdir)

    for unit in tests:
        with parallel() as p:
            for role, remote in client_remotes.items():
                p.spawn(_run_tests, ctx, refspec, role, [unit], env, subdir,
                        timeout=timeout)

    # cleanup the generated client directories
    for role, _ in client_remotes.items():
        _delete_dir(ctx, role, created_mountpoint[role])


def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None):
    """
    Run the individual test. Create a scratch directory and then extract the
    workunits from git. Make the executables, and then run the tests.
    Clean up (remove files created) after the tests are finished.

    :param ctx:     Context
    :param refspec: branch, sha1, or version tag used to identify this
                    build
    :param tests:   specific tests specified.
    :param env:     environment set in yaml file.  Could be None.
    :param subdir:  subdirectory set in yaml file.  Could be None
    :param timeout: If present, use the 'timeout' command on the remote host
                    to limit execution time. Must be specified by a number
                    followed by 's' for seconds, 'm' for minutes, 'h' for
                    hours, or 'd' for days. If '0' or anything that evaluates
                    to False is passed, the 'timeout' command is not used.
    """
    testdir = misc.get_testdir(ctx)
    assert isinstance(role, basestring)
    cluster, type_, id_ = misc.split_role(role)
    assert type_ == 'client'
    remote = get_remote_for_role(ctx, role)
    mnt = _client_mountpoint(ctx, cluster, id_)
    # subdir so we can remove and recreate this a lot without sudo
    if subdir is None:
        scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp')
    else:
        scratch_tmp = os.path.join(mnt, subdir)
    clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role)
    srcdir = '{cdir}/qa/workunits'.format(cdir=clonedir)

    git_url = teuth_config.get_ceph_qa_suite_git_url()
    # if we are running an upgrade test, and ceph-ci does not have branches like
    # `jewel`, so should use ceph.git as an alternative.
    try:
        remote.run(logger=log.getChild(role),
                   args=refspec.clone(git_url, clonedir))
    except CommandFailedError:
        if git_url.endswith('/ceph-ci.git'):
            alt_git_url = git_url.replace('/ceph-ci.git', '/ceph.git')
        elif git_url.endswith('/ceph-ci'):
            alt_git_url = re.sub(r'/ceph-ci$', '/ceph.git', git_url)
        else:
            raise
        log.info(
            "failed to check out '%s' from %s; will also try in %s",
            refspec,
            git_url,
            alt_git_url,
        )
        remote.run(logger=log.getChild(role),
                   args=refspec.clone(alt_git_url, clonedir))
    remote.run(
        logger=log.getChild(role),
        args=[
            'cd', '--', srcdir,
            run.Raw('&&'),
            'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi',
            run.Raw('&&'),
            'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir),
            run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)),
        ],
    )

    workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)
    workunits = sorted(misc.get_file(remote, workunits_file).split('\0'))
    assert workunits

    try:
        assert isinstance(tests, list)
        for spec in tests:
            log.info('Running workunits matching %s on %s...', spec, role)
            prefix = '{spec}/'.format(spec=spec)
            to_run = [w for w in workunits if w == spec or w.startswith(prefix)]
            if not to_run:
                raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec))
            for workunit in to_run:
                log.info('Running workunit %s...', workunit)
                args = [
                    'mkdir', '-p', '--', scratch_tmp,
                    run.Raw('&&'),
                    'cd', '--', scratch_tmp,
                    run.Raw('&&'),
                    run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'),
                    run.Raw('CEPH_REF={ref}'.format(ref=refspec)),
                    run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
                    run.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster)),
                    run.Raw('CEPH_ID="{id}"'.format(id=id_)),
                    run.Raw('PATH=$PATH:/usr/sbin'),
                    run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)),
                ]
                if env is not None:
                    for var, val in env.iteritems():
                        quoted_val = pipes.quote(val)
                        env_arg = '{var}={val}'.format(var=var, val=quoted_val)
                        args.append(run.Raw(env_arg))
                args.extend([
                    'adjust-ulimits',
                    'ceph-coverage',
                    '{tdir}/archive/coverage'.format(tdir=testdir)])
                if timeout and timeout != '0':
                    args.extend(['timeout', timeout])
                args.extend([
                    '{srcdir}/{workunit}'.format(
                        srcdir=srcdir,
                        workunit=workunit,
                    ),
                ])
                remote.run(
                    logger=log.getChild(role),
                    args=args,
                    label="workunit test {workunit}".format(workunit=workunit)
                )
                remote.run(
                    logger=log.getChild(role),
                    args=['sudo', 'rm', '-rf', '--', scratch_tmp],
                )
    finally:
        log.info('Stopping %s on %s...', tests, role)
        remote.run(
            logger=log.getChild(role),
            args=[
                'rm', '-rf', '--', workunits_file, clonedir,
            ],
        )
Commit	Line	Data
7c673cae FG	1	"""
	2	Workunit task -- Run ceph on sets of specific clients
	3	"""
	4	import logging
	5	import pipes
	6	import os
224ce89b	7	import re
7c673cae FG	8
	9	from copy import deepcopy
	10	from util import get_remote_for_role
	11
	12	from teuthology import misc
	13	from teuthology.config import config as teuth_config
	14	from teuthology.orchestra.run import CommandFailedError
	15	from teuthology.parallel import parallel
	16	from teuthology.orchestra import run
	17
	18	log = logging.getLogger(__name__)
	19
	20
	21	class Refspec:
	22	def __init__(self, refspec):
	23	self.refspec = refspec
	24
	25	def __str__(self):
	26	return self.refspec
	27
	28	def _clone(self, git_url, clonedir, opts=None):
	29	if opts is None:
	30	opts = []
	31	return (['rm', '-rf', clonedir] +
	32	[run.Raw('&&')] +
	33	['git', 'clone'] + opts +
	34	[git_url, clonedir])
	35
	36	def _cd(self, clonedir):
	37	return ['cd', clonedir]
	38
	39	def _checkout(self):
	40	return ['git', 'checkout', self.refspec]
	41
	42	def clone(self, git_url, clonedir):
	43	return (self._clone(git_url, clonedir) +
	44	[run.Raw('&&')] +
	45	self._cd(clonedir) +
	46	[run.Raw('&&')] +
	47	self._checkout())
	48
	49
	50	class Branch(Refspec):
	51	def __init__(self, tag):
	52	Refspec.__init__(self, tag)
	53
	54	def clone(self, git_url, clonedir):
	55	opts = ['--depth', '1',
	56	'--branch', self.refspec]
	57	return (self._clone(git_url, clonedir, opts) +
	58	[run.Raw('&&')] +
	59	self._cd(clonedir))
	60
	61
	62	class Head(Refspec):
	63	def __init__(self):
	64	Refspec.__init__(self, 'HEAD')
	65
	66	def clone(self, git_url, clonedir):
	67	opts = ['--depth', '1']
	68	return (self._clone(git_url, clonedir, opts) +
	69	[run.Raw('&&')] +
	70	self._cd(clonedir))
	71
72
73	def task(ctx, config):
74	"""
75	Run ceph on all workunits found under the specified path.
76
77	For example::
78
79	tasks:
80	- ceph:
81	- ceph-fuse: [client.0]
82	- workunit:
83	clients:
84	client.0: [direct_io, xattrs.sh]
85	client.1: [snaps]
86	branch: foo
87
88	You can also run a list of workunits on all clients:
89	tasks:
90	- ceph:
91	- ceph-fuse:
92	- workunit:
93	tag: v0.47
94	clients:
95	all: [direct_io, xattrs.sh, snaps]
96
97	If you have an "all" section it will run all the workunits
98	on each client simultaneously, AFTER running any workunits specified
99	for individual clients. (This prevents unintended simultaneous runs.)
100
101	To customize tests, you can specify environment variables as a dict. You
102	can also specify a time limit for each work unit (defaults to 3h):
103
104	tasks:
105	- ceph:
106	- ceph-fuse:
107	- workunit:
108	sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6
109	clients:
110	all: [snaps]
111	env:
112	FOO: bar
113	BAZ: quux
114	timeout: 3h
115
116	This task supports roles that include a ceph cluster, e.g.::
117
118	tasks:
119	- ceph:
120	- workunit:
121	clients:
122	backup.client.0: [foo]
123	client.1: [bar] # cluster is implicitly 'ceph'
124
125	:param ctx: Context
126	:param config: Configuration
127	"""
128	assert isinstance(config, dict)
129	assert isinstance(config.get('clients'), dict), \
130	'configuration must contain a dictionary of clients'
131
132	# mimic the behavior of the "install" task, where the "overrides" are
133	# actually the defaults of that task. in other words, if none of "sha1",
134	# "tag", or "branch" is specified by a "workunit" tasks, we will update
135	# it with the information in the "workunit" sub-task nested in "overrides".
136	overrides = deepcopy(ctx.config.get('overrides', {}).get('workunit', {}))
137	refspecs = {'branch': Branch, 'tag': Refspec, 'sha1': Refspec}
138	if any(map(lambda i: i in config, refspecs.iterkeys())):
139	for i in refspecs.iterkeys():
140	overrides.pop(i, None)
141	misc.deep_merge(config, overrides)
142
143	for spec, cls in refspecs.iteritems():
144	refspec = config.get(spec)
145	if refspec:
146	refspec = cls(refspec)
147	break
148	if refspec is None:
149	refspec = Head()
150
151	timeout = config.get('timeout', '3h')
152
153	log.info('Pulling workunits from ref %s', refspec)
154
155	created_mountpoint = {}
156
157	if config.get('env') is not None:
158	assert isinstance(config['env'], dict), 'env must be a dictionary'
159	clients = config['clients']
160
161	# Create scratch dirs for any non-all workunits
162	log.info('Making a separate scratch dir for every client...')
163	for role in clients.iterkeys():
164	assert isinstance(role, basestring)
165	if role == "all":
166	continue
167
168	assert 'client' in role
169	created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir'))
170	created_mountpoint[role] = created_mnt_dir
171
172	# Execute any non-all workunits
173	with parallel() as p:
174	for role, tests in clients.iteritems():
175	if role != "all":
176	p.spawn(_run_tests, ctx, refspec, role, tests,
177	config.get('env'), timeout=timeout)
178
179	# Clean up dirs from any non-all workunits
180	for role, created in created_mountpoint.items():
181	_delete_dir(ctx, role, created)
182
183	# Execute any 'all' workunits
184	if 'all' in clients:
185	all_tasks = clients["all"]
186	_spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'),
187	config.get('subdir'), timeout=timeout)
188
189
190	def _client_mountpoint(ctx, cluster, id_):
191	"""
192	Returns the path to the expected mountpoint for workunits running
193	on some kind of filesystem.
194	"""
195	# for compatibility with tasks like ceph-fuse that aren't cluster-aware yet,
196	# only include the cluster name in the dir if the cluster is not 'ceph'
197	if cluster == 'ceph':
198	dir_ = 'mnt.{0}'.format(id_)
199	else:
200	dir_ = 'mnt.{0}.{1}'.format(cluster, id_)
201	return os.path.join(misc.get_testdir(ctx), dir_)
202
203
204	def _delete_dir(ctx, role, created_mountpoint):
205	"""
206	Delete file used by this role, and delete the directory that this
207	role appeared in.
208
209	:param ctx: Context
210	:param role: "role.#" where # is used for the role id.
211	"""
212	cluster, _, id_ = misc.split_role(role)
213	remote = get_remote_for_role(ctx, role)
214	mnt = _client_mountpoint(ctx, cluster, id_)
215	client = os.path.join(mnt, 'client.{id}'.format(id=id_))
216
217	# Remove the directory inside the mount where the workunit ran
218	remote.run(
219	args=[
220	'sudo',
221	'rm',
222	'-rf',
223	'--',
224	client,
225	],
226	)
227	log.info("Deleted dir {dir}".format(dir=client))
228
229	# If the mount was an artificially created dir, delete that too
230	if created_mountpoint:
231	remote.run(
232	args=[
233	'rmdir',
234	'--',
235	mnt,
236	],
237	)
238	log.info("Deleted artificial mount point {dir}".format(dir=client))
239
240
241	def _make_scratch_dir(ctx, role, subdir):
242	"""
243	Make scratch directories for this role. This also makes the mount
244	point if that directory does not exist.
245
246	:param ctx: Context
247	:param role: "role.#" where # is used for the role id.
248	:param subdir: use this subdir (False if not used)
249	"""
250	created_mountpoint = False
251	cluster, _, id_ = misc.split_role(role)
252	remote = get_remote_for_role(ctx, role)
253	dir_owner = remote.user
254	mnt = _client_mountpoint(ctx, cluster, id_)
255	# if neither kclient nor ceph-fuse are required for a workunit,
256	# mnt may not exist. Stat and create the directory if it doesn't.
257	try:
258	remote.run(
259	args=[
260	'stat',
261	'--',
262	mnt,
263	],
264	)
265	log.info('Did not need to create dir {dir}'.format(dir=mnt))
266	except CommandFailedError:
267	remote.run(
268	args=[
269	'mkdir',
270	'--',
271	mnt,
272	],
273	)
274	log.info('Created dir {dir}'.format(dir=mnt))
275	created_mountpoint = True
276
277	if not subdir:
278	subdir = 'client.{id}'.format(id=id_)
279
280	if created_mountpoint:
281	remote.run(
282	args=[
283	'cd',
284	'--',
285	mnt,
286	run.Raw('&&'),
287	'mkdir',
288	'--',
289	subdir,
290	],
291	)
292	else:
293	remote.run(
294	args=[
295	# cd first so this will fail if the mount point does
296	# not exist; pure install -d will silently do the
297	# wrong thing
298	'cd',
299	'--',
300	mnt,
301	run.Raw('&&'),
302	'sudo',
303	'install',
304	'-d',
305	'-m', '0755',
306	'--owner={user}'.format(user=dir_owner),
307	'--',
308	subdir,
309	],
310	)
311
312	return created_mountpoint
313
314
315	def _spawn_on_all_clients(ctx, refspec, tests, env, subdir, timeout=None):
316	"""
317	Make a scratch directory for each client in the cluster, and then for each
318	test spawn _run_tests() for each role.
319
320	See run_tests() for parameter documentation.
321	"""
322	is_client = misc.is_type('client')
323	client_remotes = {}
324	created_mountpoint = {}
325	for remote, roles_for_host in ctx.cluster.remotes.items():
326	for role in roles_for_host:
327	if is_client(role):
328	client_remotes[role] = remote
329	created_mountpoint[role] = _make_scratch_dir(ctx, role, subdir)
330
331	for unit in tests:
332	with parallel() as p:
333	for role, remote in client_remotes.items():
334	p.spawn(_run_tests, ctx, refspec, role, [unit], env, subdir,
335	timeout=timeout)
336
337	# cleanup the generated client directories
338	for role, _ in client_remotes.items():
339	_delete_dir(ctx, role, created_mountpoint[role])
340
341
342	def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None):
343	"""
344	Run the individual test. Create a scratch directory and then extract the
345	workunits from git. Make the executables, and then run the tests.
346	Clean up (remove files created) after the tests are finished.
347
348	:param ctx: Context
349	:param refspec: branch, sha1, or version tag used to identify this
350	build
351	:param tests: specific tests specified.
352	:param env: environment set in yaml file. Could be None.
353	:param subdir: subdirectory set in yaml file. Could be None
354	:param timeout: If present, use the 'timeout' command on the remote host
355	to limit execution time. Must be specified by a number
356	followed by 's' for seconds, 'm' for minutes, 'h' for
357	hours, or 'd' for days. If '0' or anything that evaluates
358	to False is passed, the 'timeout' command is not used.
359	"""
360	testdir = misc.get_testdir(ctx)
361	assert isinstance(role, basestring)
362	cluster, type_, id_ = misc.split_role(role)
363	assert type_ == 'client'
364	remote = get_remote_for_role(ctx, role)
365	mnt = _client_mountpoint(ctx, cluster, id_)
366	# subdir so we can remove and recreate this a lot without sudo
367	if subdir is None:
368	scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp')
369	else:
370	scratch_tmp = os.path.join(mnt, subdir)
371	clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role)
372	srcdir = '{cdir}/qa/workunits'.format(cdir=clonedir)
373
374	git_url = teuth_config.get_ceph_qa_suite_git_url()
375	# if we are running an upgrade test, and ceph-ci does not have branches like
376	# `jewel`, so should use ceph.git as an alternative.
377	try:
378	remote.run(logger=log.getChild(role),
379	args=refspec.clone(git_url, clonedir))
380	except CommandFailedError:
224ce89b WB	381	if git_url.endswith('/ceph-ci.git'):
	382	alt_git_url = git_url.replace('/ceph-ci.git', '/ceph.git')
	383	elif git_url.endswith('/ceph-ci'):
	384	alt_git_url = re.sub(r'/ceph-ci$', '/ceph.git', git_url)
	385	else:
7c673cae	386	raise
7c673cae FG	387	log.info(
	388	"failed to check out '%s' from %s; will also try in %s",
	389	refspec,
	390	git_url,
	391	alt_git_url,
	392	)
	393	remote.run(logger=log.getChild(role),
	394	args=refspec.clone(alt_git_url, clonedir))
	395	remote.run(
	396	logger=log.getChild(role),
	397	args=[
	398	'cd', '--', srcdir,
	399	run.Raw('&&'),
	400	'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi',
	401	run.Raw('&&'),
	402	'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir),
	403	run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)),
	404	],
	405	)
	406
	407	workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)
	408	workunits = sorted(misc.get_file(remote, workunits_file).split('\0'))
	409	assert workunits
	410
	411	try:
	412	assert isinstance(tests, list)
	413	for spec in tests:
	414	log.info('Running workunits matching %s on %s...', spec, role)
	415	prefix = '{spec}/'.format(spec=spec)
	416	to_run = [w for w in workunits if w == spec or w.startswith(prefix)]
	417	if not to_run:
	418	raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec))
	419	for workunit in to_run:
	420	log.info('Running workunit %s...', workunit)
	421	args = [
	422	'mkdir', '-p', '--', scratch_tmp,
	423	run.Raw('&&'),
	424	'cd', '--', scratch_tmp,
	425	run.Raw('&&'),
	426	run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'),
	427	run.Raw('CEPH_REF={ref}'.format(ref=refspec)),
	428	run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
	429	run.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster)),
	430	run.Raw('CEPH_ID="{id}"'.format(id=id_)),
	431	run.Raw('PATH=$PATH:/usr/sbin'),
	432	run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)),
	433	]
	434	if env is not None:
	435	for var, val in env.iteritems():
	436	quoted_val = pipes.quote(val)
	437	env_arg = '{var}={val}'.format(var=var, val=quoted_val)
	438	args.append(run.Raw(env_arg))
	439	args.extend([
	440	'adjust-ulimits',
	441	'ceph-coverage',
	442	'{tdir}/archive/coverage'.format(tdir=testdir)])
	443	if timeout and timeout != '0':
	444	args.extend(['timeout', timeout])
	445	args.extend([
	446	'{srcdir}/{workunit}'.format(
	447	srcdir=srcdir,
	448	workunit=workunit,
	449	),
	450	])
451	remote.run(
452	logger=log.getChild(role),
453	args=args,
454	label="workunit test {workunit}".format(workunit=workunit)
455	)
456	remote.run(
457	logger=log.getChild(role),
458	args=['sudo', 'rm', '-rf', '--', scratch_tmp],
459	)
460	finally:
461	log.info('Stopping %s on %s...', tests, role)
462	remote.run(
463	logger=log.getChild(role),
464	args=[
465	'rm', '-rf', '--', workunits_file, clonedir,
466	],
467	)