]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Workunit task -- Run ceph on sets of specific clients | |
3 | """ | |
4 | import logging | |
5 | import pipes | |
6 | import os | |
224ce89b | 7 | import re |
f67539c2 | 8 | import shlex |
9f95a23c TL |
9 | |
10 | from tasks.util import get_remote_for_role | |
11 | from tasks.util.workunit import get_refspec_after_overrides | |
7c673cae FG |
12 | |
13 | from teuthology import misc | |
14 | from teuthology.config import config as teuth_config | |
15 | from teuthology.orchestra.run import CommandFailedError | |
16 | from teuthology.parallel import parallel | |
17 | from teuthology.orchestra import run | |
18 | ||
19 | log = logging.getLogger(__name__) | |
20 | ||
7c673cae FG |
21 | def task(ctx, config): |
22 | """ | |
23 | Run ceph on all workunits found under the specified path. | |
24 | ||
25 | For example:: | |
26 | ||
27 | tasks: | |
28 | - ceph: | |
29 | - ceph-fuse: [client.0] | |
30 | - workunit: | |
31 | clients: | |
32 | client.0: [direct_io, xattrs.sh] | |
33 | client.1: [snaps] | |
34 | branch: foo | |
35 | ||
36 | You can also run a list of workunits on all clients: | |
37 | tasks: | |
38 | - ceph: | |
39 | - ceph-fuse: | |
40 | - workunit: | |
41 | tag: v0.47 | |
42 | clients: | |
43 | all: [direct_io, xattrs.sh, snaps] | |
44 | ||
45 | If you have an "all" section it will run all the workunits | |
46 | on each client simultaneously, AFTER running any workunits specified | |
47 | for individual clients. (This prevents unintended simultaneous runs.) | |
48 | ||
49 | To customize tests, you can specify environment variables as a dict. You | |
50 | can also specify a time limit for each work unit (defaults to 3h): | |
51 | ||
52 | tasks: | |
53 | - ceph: | |
54 | - ceph-fuse: | |
55 | - workunit: | |
56 | sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6 | |
57 | clients: | |
58 | all: [snaps] | |
59 | env: | |
60 | FOO: bar | |
61 | BAZ: quux | |
62 | timeout: 3h | |
63 | ||
f67539c2 TL |
64 | You can also pass optional arguments to the found workunits: |
65 | ||
66 | tasks: | |
67 | - workunit: | |
68 | clients: | |
69 | all: | |
70 | - test-ceph-helpers.sh test_get_config | |
71 | ||
7c673cae FG |
72 | This task supports roles that include a ceph cluster, e.g.:: |
73 | ||
74 | tasks: | |
75 | - ceph: | |
76 | - workunit: | |
77 | clients: | |
78 | backup.client.0: [foo] | |
79 | client.1: [bar] # cluster is implicitly 'ceph' | |
80 | ||
c07f9fc5 FG |
81 | You can also specify an alternative top-level dir to 'qa/workunits', like |
82 | 'qa/standalone', with:: | |
83 | ||
84 | tasks: | |
85 | - install: | |
86 | - workunit: | |
87 | basedir: qa/standalone | |
88 | clients: | |
89 | client.0: | |
90 | - test-ceph-helpers.sh | |
91 | ||
7c673cae FG |
92 | :param ctx: Context |
93 | :param config: Configuration | |
94 | """ | |
95 | assert isinstance(config, dict) | |
96 | assert isinstance(config.get('clients'), dict), \ | |
97 | 'configuration must contain a dictionary of clients' | |
98 | ||
91327a77 AA |
99 | overrides = ctx.config.get('overrides', {}) |
100 | refspec = get_refspec_after_overrides(config, overrides) | |
7c673cae | 101 | timeout = config.get('timeout', '3h') |
91327a77 | 102 | cleanup = config.get('cleanup', True) |
7c673cae FG |
103 | |
104 | log.info('Pulling workunits from ref %s', refspec) | |
105 | ||
106 | created_mountpoint = {} | |
107 | ||
108 | if config.get('env') is not None: | |
109 | assert isinstance(config['env'], dict), 'env must be a dictionary' | |
110 | clients = config['clients'] | |
111 | ||
112 | # Create scratch dirs for any non-all workunits | |
113 | log.info('Making a separate scratch dir for every client...') | |
9f95a23c | 114 | for role in clients.keys(): |
f67539c2 | 115 | assert isinstance(role, str) |
7c673cae FG |
116 | if role == "all": |
117 | continue | |
118 | ||
119 | assert 'client' in role | |
120 | created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir')) | |
121 | created_mountpoint[role] = created_mnt_dir | |
122 | ||
123 | # Execute any non-all workunits | |
91327a77 AA |
124 | log.info("timeout={}".format(timeout)) |
125 | log.info("cleanup={}".format(cleanup)) | |
7c673cae | 126 | with parallel() as p: |
9f95a23c | 127 | for role, tests in clients.items(): |
7c673cae FG |
128 | if role != "all": |
129 | p.spawn(_run_tests, ctx, refspec, role, tests, | |
c07f9fc5 FG |
130 | config.get('env'), |
131 | basedir=config.get('basedir','qa/workunits'), | |
9f95a23c TL |
132 | timeout=timeout, |
133 | cleanup=cleanup, | |
134 | coverage_and_limits=not config.get('no_coverage_and_limits', None)) | |
7c673cae | 135 | |
91327a77 AA |
136 | if cleanup: |
137 | # Clean up dirs from any non-all workunits | |
138 | for role, created in created_mountpoint.items(): | |
139 | _delete_dir(ctx, role, created) | |
7c673cae FG |
140 | |
141 | # Execute any 'all' workunits | |
142 | if 'all' in clients: | |
143 | all_tasks = clients["all"] | |
144 | _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'), | |
c07f9fc5 | 145 | config.get('basedir', 'qa/workunits'), |
91327a77 AA |
146 | config.get('subdir'), timeout=timeout, |
147 | cleanup=cleanup) | |
7c673cae FG |
148 | |
149 | ||
150 | def _client_mountpoint(ctx, cluster, id_): | |
151 | """ | |
152 | Returns the path to the expected mountpoint for workunits running | |
153 | on some kind of filesystem. | |
154 | """ | |
155 | # for compatibility with tasks like ceph-fuse that aren't cluster-aware yet, | |
156 | # only include the cluster name in the dir if the cluster is not 'ceph' | |
157 | if cluster == 'ceph': | |
158 | dir_ = 'mnt.{0}'.format(id_) | |
159 | else: | |
160 | dir_ = 'mnt.{0}.{1}'.format(cluster, id_) | |
161 | return os.path.join(misc.get_testdir(ctx), dir_) | |
162 | ||
163 | ||
164 | def _delete_dir(ctx, role, created_mountpoint): | |
165 | """ | |
166 | Delete file used by this role, and delete the directory that this | |
167 | role appeared in. | |
168 | ||
169 | :param ctx: Context | |
170 | :param role: "role.#" where # is used for the role id. | |
171 | """ | |
172 | cluster, _, id_ = misc.split_role(role) | |
173 | remote = get_remote_for_role(ctx, role) | |
174 | mnt = _client_mountpoint(ctx, cluster, id_) | |
175 | client = os.path.join(mnt, 'client.{id}'.format(id=id_)) | |
176 | ||
177 | # Remove the directory inside the mount where the workunit ran | |
178 | remote.run( | |
179 | args=[ | |
180 | 'sudo', | |
181 | 'rm', | |
182 | '-rf', | |
183 | '--', | |
184 | client, | |
185 | ], | |
186 | ) | |
187 | log.info("Deleted dir {dir}".format(dir=client)) | |
188 | ||
189 | # If the mount was an artificially created dir, delete that too | |
190 | if created_mountpoint: | |
191 | remote.run( | |
192 | args=[ | |
193 | 'rmdir', | |
194 | '--', | |
195 | mnt, | |
196 | ], | |
197 | ) | |
198 | log.info("Deleted artificial mount point {dir}".format(dir=client)) | |
199 | ||
200 | ||
201 | def _make_scratch_dir(ctx, role, subdir): | |
202 | """ | |
203 | Make scratch directories for this role. This also makes the mount | |
204 | point if that directory does not exist. | |
205 | ||
206 | :param ctx: Context | |
207 | :param role: "role.#" where # is used for the role id. | |
208 | :param subdir: use this subdir (False if not used) | |
209 | """ | |
210 | created_mountpoint = False | |
211 | cluster, _, id_ = misc.split_role(role) | |
212 | remote = get_remote_for_role(ctx, role) | |
213 | dir_owner = remote.user | |
214 | mnt = _client_mountpoint(ctx, cluster, id_) | |
215 | # if neither kclient nor ceph-fuse are required for a workunit, | |
216 | # mnt may not exist. Stat and create the directory if it doesn't. | |
217 | try: | |
218 | remote.run( | |
219 | args=[ | |
220 | 'stat', | |
221 | '--', | |
222 | mnt, | |
223 | ], | |
224 | ) | |
225 | log.info('Did not need to create dir {dir}'.format(dir=mnt)) | |
226 | except CommandFailedError: | |
227 | remote.run( | |
228 | args=[ | |
229 | 'mkdir', | |
230 | '--', | |
231 | mnt, | |
232 | ], | |
233 | ) | |
234 | log.info('Created dir {dir}'.format(dir=mnt)) | |
235 | created_mountpoint = True | |
236 | ||
237 | if not subdir: | |
238 | subdir = 'client.{id}'.format(id=id_) | |
239 | ||
240 | if created_mountpoint: | |
241 | remote.run( | |
242 | args=[ | |
243 | 'cd', | |
244 | '--', | |
245 | mnt, | |
246 | run.Raw('&&'), | |
247 | 'mkdir', | |
248 | '--', | |
249 | subdir, | |
250 | ], | |
251 | ) | |
252 | else: | |
253 | remote.run( | |
254 | args=[ | |
255 | # cd first so this will fail if the mount point does | |
256 | # not exist; pure install -d will silently do the | |
257 | # wrong thing | |
258 | 'cd', | |
259 | '--', | |
260 | mnt, | |
261 | run.Raw('&&'), | |
262 | 'sudo', | |
263 | 'install', | |
264 | '-d', | |
265 | '-m', '0755', | |
266 | '--owner={user}'.format(user=dir_owner), | |
267 | '--', | |
268 | subdir, | |
269 | ], | |
270 | ) | |
271 | ||
272 | return created_mountpoint | |
273 | ||
274 | ||
91327a77 | 275 | def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=None, cleanup=True): |
7c673cae FG |
276 | """ |
277 | Make a scratch directory for each client in the cluster, and then for each | |
278 | test spawn _run_tests() for each role. | |
279 | ||
280 | See run_tests() for parameter documentation. | |
281 | """ | |
282 | is_client = misc.is_type('client') | |
283 | client_remotes = {} | |
284 | created_mountpoint = {} | |
285 | for remote, roles_for_host in ctx.cluster.remotes.items(): | |
286 | for role in roles_for_host: | |
287 | if is_client(role): | |
288 | client_remotes[role] = remote | |
289 | created_mountpoint[role] = _make_scratch_dir(ctx, role, subdir) | |
290 | ||
291 | for unit in tests: | |
292 | with parallel() as p: | |
293 | for role, remote in client_remotes.items(): | |
c07f9fc5 FG |
294 | p.spawn(_run_tests, ctx, refspec, role, [unit], env, |
295 | basedir, | |
296 | subdir, | |
7c673cae FG |
297 | timeout=timeout) |
298 | ||
11fdf7f2 | 299 | # cleanup the generated client directories |
91327a77 | 300 | if cleanup: |
91327a77 AA |
301 | for role, _ in client_remotes.items(): |
302 | _delete_dir(ctx, role, created_mountpoint[role]) | |
7c673cae FG |
303 | |
304 | ||
c07f9fc5 | 305 | def _run_tests(ctx, refspec, role, tests, env, basedir, |
9f95a23c TL |
306 | subdir=None, timeout=None, cleanup=True, |
307 | coverage_and_limits=True): | |
7c673cae FG |
308 | """ |
309 | Run the individual test. Create a scratch directory and then extract the | |
310 | workunits from git. Make the executables, and then run the tests. | |
311 | Clean up (remove files created) after the tests are finished. | |
312 | ||
313 | :param ctx: Context | |
314 | :param refspec: branch, sha1, or version tag used to identify this | |
315 | build | |
316 | :param tests: specific tests specified. | |
317 | :param env: environment set in yaml file. Could be None. | |
318 | :param subdir: subdirectory set in yaml file. Could be None | |
319 | :param timeout: If present, use the 'timeout' command on the remote host | |
320 | to limit execution time. Must be specified by a number | |
321 | followed by 's' for seconds, 'm' for minutes, 'h' for | |
322 | hours, or 'd' for days. If '0' or anything that evaluates | |
323 | to False is passed, the 'timeout' command is not used. | |
324 | """ | |
325 | testdir = misc.get_testdir(ctx) | |
f67539c2 | 326 | assert isinstance(role, str) |
7c673cae FG |
327 | cluster, type_, id_ = misc.split_role(role) |
328 | assert type_ == 'client' | |
329 | remote = get_remote_for_role(ctx, role) | |
330 | mnt = _client_mountpoint(ctx, cluster, id_) | |
331 | # subdir so we can remove and recreate this a lot without sudo | |
332 | if subdir is None: | |
333 | scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp') | |
334 | else: | |
335 | scratch_tmp = os.path.join(mnt, subdir) | |
336 | clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role) | |
c07f9fc5 FG |
337 | srcdir = '{cdir}/{basedir}'.format(cdir=clonedir, |
338 | basedir=basedir) | |
7c673cae FG |
339 | |
340 | git_url = teuth_config.get_ceph_qa_suite_git_url() | |
341 | # if we are running an upgrade test, and ceph-ci does not have branches like | |
342 | # `jewel`, so should use ceph.git as an alternative. | |
343 | try: | |
344 | remote.run(logger=log.getChild(role), | |
345 | args=refspec.clone(git_url, clonedir)) | |
346 | except CommandFailedError: | |
224ce89b WB |
347 | if git_url.endswith('/ceph-ci.git'): |
348 | alt_git_url = git_url.replace('/ceph-ci.git', '/ceph.git') | |
349 | elif git_url.endswith('/ceph-ci'): | |
350 | alt_git_url = re.sub(r'/ceph-ci$', '/ceph.git', git_url) | |
351 | else: | |
7c673cae | 352 | raise |
7c673cae FG |
353 | log.info( |
354 | "failed to check out '%s' from %s; will also try in %s", | |
355 | refspec, | |
356 | git_url, | |
357 | alt_git_url, | |
358 | ) | |
359 | remote.run(logger=log.getChild(role), | |
360 | args=refspec.clone(alt_git_url, clonedir)) | |
361 | remote.run( | |
362 | logger=log.getChild(role), | |
363 | args=[ | |
364 | 'cd', '--', srcdir, | |
365 | run.Raw('&&'), | |
366 | 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', | |
367 | run.Raw('&&'), | |
9f95a23c | 368 | 'find', '-executable', '-type', 'f', '-printf', r'%P\0', |
7c673cae FG |
369 | run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)), |
370 | ], | |
371 | ) | |
372 | ||
373 | workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role) | |
f67539c2 | 374 | workunits = sorted(remote.read_file(workunits_file).decode().split('\0')) |
7c673cae FG |
375 | assert workunits |
376 | ||
377 | try: | |
378 | assert isinstance(tests, list) | |
379 | for spec in tests: | |
f67539c2 TL |
380 | dir_or_fname, *optional_args = shlex.split(spec) |
381 | log.info('Running workunits matching %s on %s...', dir_or_fname, role) | |
382 | # match executables named "foo" or "foo/*" with workunit named | |
383 | # "foo" | |
384 | to_run = [w for w in workunits | |
385 | if os.path.commonpath([w, dir_or_fname]) == dir_or_fname] | |
7c673cae FG |
386 | if not to_run: |
387 | raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec)) | |
388 | for workunit in to_run: | |
389 | log.info('Running workunit %s...', workunit) | |
390 | args = [ | |
391 | 'mkdir', '-p', '--', scratch_tmp, | |
392 | run.Raw('&&'), | |
393 | 'cd', '--', scratch_tmp, | |
394 | run.Raw('&&'), | |
395 | run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'), | |
396 | run.Raw('CEPH_REF={ref}'.format(ref=refspec)), | |
397 | run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), | |
398 | run.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster)), | |
399 | run.Raw('CEPH_ID="{id}"'.format(id=id_)), | |
400 | run.Raw('PATH=$PATH:/usr/sbin'), | |
401 | run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)), | |
c07f9fc5 | 402 | run.Raw('CEPH_ROOT={dir}'.format(dir=clonedir)), |
b3b6e05e | 403 | run.Raw('CEPH_MNT={dir}'.format(dir=mnt)), |
7c673cae FG |
404 | ] |
405 | if env is not None: | |
9f95a23c | 406 | for var, val in env.items(): |
7c673cae FG |
407 | quoted_val = pipes.quote(val) |
408 | env_arg = '{var}={val}'.format(var=var, val=quoted_val) | |
409 | args.append(run.Raw(env_arg)) | |
9f95a23c TL |
410 | if coverage_and_limits: |
411 | args.extend([ | |
412 | 'adjust-ulimits', | |
413 | 'ceph-coverage', | |
414 | '{tdir}/archive/coverage'.format(tdir=testdir)]) | |
7c673cae FG |
415 | if timeout and timeout != '0': |
416 | args.extend(['timeout', timeout]) | |
417 | args.extend([ | |
418 | '{srcdir}/{workunit}'.format( | |
419 | srcdir=srcdir, | |
420 | workunit=workunit, | |
421 | ), | |
422 | ]) | |
423 | remote.run( | |
424 | logger=log.getChild(role), | |
f67539c2 | 425 | args=args + optional_args, |
7c673cae FG |
426 | label="workunit test {workunit}".format(workunit=workunit) |
427 | ) | |
91327a77 AA |
428 | if cleanup: |
429 | args=['sudo', 'rm', '-rf', '--', scratch_tmp] | |
f64942e4 | 430 | remote.run(logger=log.getChild(role), args=args, timeout=(60*60)) |
7c673cae FG |
431 | finally: |
432 | log.info('Stopping %s on %s...', tests, role) | |
91327a77 AA |
433 | args=['sudo', 'rm', '-rf', '--', workunits_file, clonedir] |
434 | # N.B. don't cleanup scratch_tmp! If the mount is broken then rm will hang. | |
7c673cae FG |
435 | remote.run( |
436 | logger=log.getChild(role), | |
91327a77 | 437 | args=args, |
7c673cae | 438 | ) |