]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/vstart_runner.py
import 15.2.4
[ceph.git] / ceph / qa / tasks / vstart_runner.py
CommitLineData
7c673cae
FG
1"""
2vstart_runner: override Filesystem and Mount interfaces to run a CephFSTestCase against a vstart
3ceph instance instead of a packaged/installed cluster. Use this to turn around test cases
4quickly during development.
5
6Simple usage (assuming teuthology and ceph checked out in ~/git):
7
8 # Activate the teuthology virtualenv
9 source ~/git/teuthology/virtualenv/bin/activate
10 # Go into your ceph build directory
11 cd ~/git/ceph/build
12 # Invoke a test using this script
13 python ~/git/ceph/qa/tasks/vstart_runner.py --create tasks.cephfs.test_data_scan
14
15Alternative usage:
16
17 # Alternatively, if you use different paths, specify them as follows:
9f95a23c 18 LD_LIBRARY_PATH=`pwd`/lib PYTHONPATH=~/git/teuthology:~/git/ceph/qa:`pwd`/../src/pybind:`pwd`/lib/cython_modules/lib.3 python ~/git/ceph/qa/tasks/vstart_runner.py
7c673cae
FG
19
20 # If you wish to drop to a python shell on failures, use --interactive:
21 python ~/git/ceph/qa/tasks/vstart_runner.py --interactive
22
23 # If you wish to run a named test case, pass it as an argument:
24 python ~/git/ceph/qa/tasks/vstart_runner.py tasks.cephfs.test_data_scan
25
11fdf7f2
TL
26 # Also, you can create the cluster once and then run named test cases against it:
27 python ~/git/ceph/qa/tasks/vstart_runner.py --create-cluster-only
28 python ~/git/ceph/qa/tasks/vstart_runner.py tasks.mgr.dashboard.test_health
29 python ~/git/ceph/qa/tasks/vstart_runner.py tasks.mgr.dashboard.test_rgw
30
7c673cae
FG
31"""
32
1911f103 33from six import StringIO
9f95a23c 34from io import BytesIO
7c673cae
FG
35from collections import defaultdict
36import getpass
37import signal
38import tempfile
39import threading
40import datetime
41import shutil
42import re
43import os
44import time
7c673cae
FG
45import sys
46import errno
47from unittest import suite, loader
48import unittest
49import platform
9f95a23c 50from teuthology import misc
7c673cae
FG
51from teuthology.orchestra.run import Raw, quote
52from teuthology.orchestra.daemon import DaemonGroup
53from teuthology.config import config as teuth_config
9f95a23c 54import six
7c673cae
FG
55import logging
56
9f95a23c
TL
57def init_log():
58 global log
59 if log is not None:
60 del log
61 log = logging.getLogger(__name__)
62
63 global logpath
64 logpath = './vstart_runner.log'
65
66 handler = logging.FileHandler(logpath)
67 formatter = logging.Formatter(
68 fmt=u'%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s',
69 datefmt='%Y-%m-%dT%H:%M:%S')
70 handler.setFormatter(formatter)
71 log.addHandler(handler)
72 log.setLevel(logging.INFO)
7c673cae 73
9f95a23c
TL
74log = None
75init_log()
7c673cae
FG
76
77
78def respawn_in_path(lib_path, python_paths):
79 execv_cmd = ['python']
80 if platform.system() == "Darwin":
81 lib_path_var = "DYLD_LIBRARY_PATH"
82 else:
83 lib_path_var = "LD_LIBRARY_PATH"
84
9f95a23c 85 py_binary = os.environ.get("PYTHON", sys.executable)
7c673cae
FG
86
87 if lib_path_var in os.environ:
88 if lib_path not in os.environ[lib_path_var]:
89 os.environ[lib_path_var] += ':' + lib_path
90 os.execvp(py_binary, execv_cmd + sys.argv)
91 else:
92 os.environ[lib_path_var] = lib_path
93 os.execvp(py_binary, execv_cmd + sys.argv)
94
95 for p in python_paths:
96 sys.path.insert(0, p)
97
98
99# Let's use some sensible defaults
100if os.path.exists("./CMakeCache.txt") and os.path.exists("./bin"):
101
102 # A list of candidate paths for each package we need
103 guesses = [
104 ["~/git/teuthology", "~/scm/teuthology", "~/teuthology"],
9f95a23c 105 ["lib/cython_modules/lib.3"],
7c673cae
FG
106 ["../src/pybind"],
107 ]
108
109 python_paths = []
110
111 # Up one level so that "tasks.foo.bar" imports work
112 python_paths.append(os.path.abspath(
113 os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
114 ))
115
116 for package_guesses in guesses:
117 for g in package_guesses:
118 g_exp = os.path.abspath(os.path.expanduser(g))
119 if os.path.exists(g_exp):
120 python_paths.append(g_exp)
121
122 ld_path = os.path.join(os.getcwd(), "lib/")
eafe8130 123 print("Using guessed paths {0} {1}".format(ld_path, python_paths))
7c673cae
FG
124 respawn_in_path(ld_path, python_paths)
125
126
127try:
128 from teuthology.exceptions import CommandFailedError
129 from tasks.ceph_manager import CephManager
130 from tasks.cephfs.fuse_mount import FuseMount
9f95a23c 131 from tasks.cephfs.kernel_mount import KernelMount
7c673cae 132 from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster
e306af50 133 from tasks.mgr.mgr_test_case import MgrCluster
7c673cae
FG
134 from teuthology.contextutil import MaxWhileTries
135 from teuthology.task import interactive
136except ImportError:
137 sys.stderr.write("***\nError importing packages, have you activated your teuthology virtualenv "
138 "and set PYTHONPATH to point to teuthology and ceph-qa-suite?\n***\n\n")
139 raise
140
141# Must import after teuthology because of gevent monkey patching
142import subprocess
143
144if os.path.exists("./CMakeCache.txt"):
145 # Running in build dir of a cmake build
146 BIN_PREFIX = "./bin/"
147 SRC_PREFIX = "../src"
148else:
149 # Running in src/ of an autotools build
150 BIN_PREFIX = "./"
151 SRC_PREFIX = "./"
152
153
1911f103
TL
154def rm_nonascii_chars(var):
155 var = var.replace('\xe2\x80\x98', '\'')
156 var = var.replace('\xe2\x80\x99', '\'')
157 return var
158
7c673cae
FG
159class LocalRemoteProcess(object):
160 def __init__(self, args, subproc, check_status, stdout, stderr):
161 self.args = args
162 self.subproc = subproc
9f95a23c
TL
163 self.stdout = stdout or BytesIO()
164 self.stderr = stderr or BytesIO()
7c673cae
FG
165
166 self.check_status = check_status
167 self.exitstatus = self.returncode = None
168
169 def wait(self):
170 if self.finished:
171 # Avoid calling communicate() on a dead process because it'll
172 # give you stick about std* already being closed
11fdf7f2 173 if self.check_status and self.exitstatus != 0:
7c673cae
FG
174 raise CommandFailedError(self.args, self.exitstatus)
175 else:
176 return
177
178 out, err = self.subproc.communicate()
1911f103 179 out, err = rm_nonascii_chars(out), rm_nonascii_chars(err)
7c673cae
FG
180 self.stdout.write(out)
181 self.stderr.write(err)
182
183 self.exitstatus = self.returncode = self.subproc.returncode
184
185 if self.exitstatus != 0:
9f95a23c
TL
186 sys.stderr.write(six.ensure_str(out))
187 sys.stderr.write(six.ensure_str(err))
7c673cae
FG
188
189 if self.check_status and self.exitstatus != 0:
190 raise CommandFailedError(self.args, self.exitstatus)
191
192 @property
193 def finished(self):
194 if self.exitstatus is not None:
195 return True
196
197 if self.subproc.poll() is not None:
198 out, err = self.subproc.communicate()
199 self.stdout.write(out)
200 self.stderr.write(err)
201 self.exitstatus = self.returncode = self.subproc.returncode
202 return True
203 else:
204 return False
205
206 def kill(self):
207 log.info("kill ")
208 if self.subproc.pid and not self.finished:
209 log.info("kill: killing pid {0} ({1})".format(
210 self.subproc.pid, self.args))
211 safe_kill(self.subproc.pid)
212 else:
213 log.info("kill: already terminated ({0})".format(self.args))
214
215 @property
216 def stdin(self):
217 class FakeStdIn(object):
218 def __init__(self, mount_daemon):
219 self.mount_daemon = mount_daemon
220
221 def close(self):
222 self.mount_daemon.kill()
223
224 return FakeStdIn(self)
225
226
227class LocalRemote(object):
228 """
229 Amusingly named class to present the teuthology RemoteProcess interface when we are really
230 running things locally for vstart
231
232 Run this inside your src/ dir!
233 """
234
235 def __init__(self):
236 self.name = "local"
237 self.hostname = "localhost"
238 self.user = getpass.getuser()
239
240 def get_file(self, path, sudo, dest_dir):
241 tmpfile = tempfile.NamedTemporaryFile(delete=False).name
242 shutil.copy(path, tmpfile)
243 return tmpfile
244
9f95a23c
TL
245 # XXX: This method ignores the error raised when src and dst are
246 # holding same path. For teuthology, same path still represents
247 # different locations as they lie on different machines.
7c673cae 248 def put_file(self, src, dst, sudo=False):
9f95a23c
TL
249 if sys.version_info.major < 3:
250 exception = shutil.Error
251 elif sys.version_info.major >= 3:
252 exception = shutil.SameFileError
253
254 try:
255 shutil.copy(src, dst)
256 except exception as e:
257 if sys.version_info.major < 3 and e.message.find('are the same '
258 'file') != -1:
259 return
260 raise e
261
262 # XXX: accepts only two arugments to maintain compatibility with
263 # teuthology's mkdtemp.
264 def mkdtemp(self, suffix='', parentdir=None):
265 from tempfile import mkdtemp
266
267 # XXX: prefix had to be set without that this method failed against
268 # Python2.7 -
269 # > /usr/lib64/python2.7/tempfile.py(337)mkdtemp()
270 # -> file = _os.path.join(dir, prefix + name + suffix)
271 # (Pdb) p prefix
272 # None
273 return mkdtemp(suffix=suffix, prefix='', dir=parentdir)
274
275 def mktemp(self, suffix=None, parentdir=None):
276 """
277 Make a remote temporary file
7c673cae 278
9f95a23c
TL
279 Returns: the path of the temp file created.
280 """
281 from tempfile import mktemp
282 return mktemp(suffix=suffix, dir=parentdir)
283
284 def _perform_checks_and_return_list_of_args(self, args, omit_sudo):
285 # Since Python's shell simulation can only work when commands are
286 # provided as a list of argumensts...
287 if isinstance(args, str) or isinstance(args, six.text_type):
288 args = args.split()
289
290 # We'll let sudo be a part of command even omit flag says otherwise in
291 # cases of commands which can normally be ran only by root.
292 try:
293 if args[args.index('sudo') + 1] in ['-u', 'passwd', 'chown']:
294 omit_sudo = False
295 except ValueError:
296 pass
7c673cae 297
9f95a23c
TL
298 # Quotes wrapping a command argument don't work fine in Python's shell
299 # simulation if the arguments contains spaces too. E.g. '"ls"' is OK
300 # but "ls /" isn't.
301 errmsg = "Don't surround arguments commands by quotes if it " + \
302 "contains spaces.\nargs - %s" % (args)
303 for arg in args:
304 if isinstance(arg, Raw):
305 continue
306
307 if arg and (arg[0] in ['"', "'"] or arg[-1] in ['"', "'"]) and \
308 (arg.find(' ') != -1 and 0 < arg.find(' ') < len(arg) - 1):
309 raise RuntimeError(errmsg)
310
311 # ['sudo', '-u', 'user', '-s', 'path-to-shell', '-c', 'ls', 'a']
312 # and ['sudo', '-u', user, '-s', path_to_shell, '-c', 'ls a'] are
313 # treated differently by Python's shell simulation. Only latter has
314 # the desired effect.
315 errmsg = 'The entire command to executed as other user should be a ' +\
316 'single argument.\nargs - %s' % (args)
317 if 'sudo' in args and '-u' in args and '-c' in args and \
318 args.count('-c') == 1:
319 if args.index('-c') != len(args) - 2 and \
320 args[args.index('-c') + 2].find('-') == -1:
321 raise RuntimeError(errmsg)
322
323 if omit_sudo:
324 args = [a for a in args if a != "sudo"]
325
326 return args
327
328 # Wrapper to keep the interface exactly same as that of
329 # teuthology.remote.run.
330 def run(self, **kwargs):
331 return self._do_run(**kwargs)
332
333 def _do_run(self, args, check_status=True, wait=True, stdout=None,
334 stderr=None, cwd=None, stdin=None, logger=None, label=None,
335 env=None, timeout=None, omit_sudo=False):
336 args = self._perform_checks_and_return_list_of_args(args, omit_sudo)
7c673cae
FG
337
338 # We have to use shell=True if any run.Raw was present, e.g. &&
339 shell = any([a for a in args if isinstance(a, Raw)])
340
11fdf7f2 341 # Filter out helper tools that don't exist in a vstart environment
e306af50
TL
342 args = [a for a in args if a not in ('adjust-ulimits',
343 'ceph-coverage')]
11fdf7f2
TL
344
345 # Adjust binary path prefix if given a bare program name
346 if "/" not in args[0]:
347 # If they asked for a bare binary name, and it exists
348 # in our built tree, use the one there.
349 local_bin = os.path.join(BIN_PREFIX, args[0])
350 if os.path.exists(local_bin):
351 args = [local_bin] + args[1:]
352 else:
353 log.debug("'{0}' is not a binary in the Ceph build dir".format(
354 args[0]
355 ))
7c673cae 356
11fdf7f2
TL
357 log.info("Running {0}".format(args))
358
359 if shell:
360 subproc = subprocess.Popen(quote(args),
7c673cae
FG
361 stdout=subprocess.PIPE,
362 stderr=subprocess.PIPE,
363 stdin=subprocess.PIPE,
364 cwd=cwd,
365 shell=True)
366 else:
11fdf7f2 367 # Sanity check that we've got a list of strings
7c673cae 368 for arg in args:
9f95a23c 369 if not isinstance(arg, six.string_types):
7c673cae
FG
370 raise RuntimeError("Oops, can't handle arg {0} type {1}".format(
371 arg, arg.__class__
372 ))
373
374 subproc = subprocess.Popen(args,
375 stdout=subprocess.PIPE,
376 stderr=subprocess.PIPE,
377 stdin=subprocess.PIPE,
378 cwd=cwd,
379 env=env)
380
381 if stdin:
9f95a23c 382 if not isinstance(stdin, six.string_types):
7c673cae
FG
383 raise RuntimeError("Can't handle non-string stdins on a vstart cluster")
384
385 # Hack: writing to stdin is not deadlock-safe, but it "always" works
386 # as long as the input buffer is "small"
387 subproc.stdin.write(stdin)
388
389 proc = LocalRemoteProcess(
390 args, subproc, check_status,
391 stdout, stderr
392 )
393
394 if wait:
395 proc.wait()
396
397 return proc
398
e306af50
TL
399 # XXX: for compatibility keep this method same teuthology.orchestra.remote.sh
400 def sh(self, script, **kwargs):
401 """
402 Shortcut for run method.
403
404 Usage:
405 my_name = remote.sh('whoami')
406 remote_date = remote.sh('date')
407 """
408 if 'stdout' not in kwargs:
409 kwargs['stdout'] = StringIO()
410 if 'args' not in kwargs:
411 kwargs['args'] = script
412 proc = self.run(**kwargs)
413 return proc.stdout.getvalue()
9f95a23c 414
7c673cae
FG
415
416class LocalDaemon(object):
417 def __init__(self, daemon_type, daemon_id):
418 self.daemon_type = daemon_type
419 self.daemon_id = daemon_id
420 self.controller = LocalRemote()
421 self.proc = None
422
423 @property
424 def remote(self):
425 return LocalRemote()
426
427 def running(self):
428 return self._get_pid() is not None
429
11fdf7f2
TL
430 def check_status(self):
431 if self.proc:
432 return self.proc.poll()
433
7c673cae
FG
434 def _get_pid(self):
435 """
436 Return PID as an integer or None if not found
437 """
9f95a23c 438 ps_txt = six.ensure_str(self.controller.run(
7c673cae 439 args=["ps", "ww", "-u"+str(os.getuid())]
9f95a23c 440 ).stdout.getvalue()).strip()
7c673cae
FG
441 lines = ps_txt.split("\n")[1:]
442
443 for line in lines:
444 if line.find("ceph-{0} -i {1}".format(self.daemon_type, self.daemon_id)) != -1:
445 log.info("Found ps line for daemon: {0}".format(line))
446 return int(line.split()[0])
9f95a23c
TL
447 if opt_log_ps_output:
448 log.info("No match for {0} {1}: {2}".format(
449 self.daemon_type, self.daemon_id, ps_txt))
450 else:
451 log.info("No match for {0} {1}".format(self.daemon_type,
452 self.daemon_id))
453 return None
7c673cae
FG
454
455 def wait(self, timeout):
456 waited = 0
457 while self._get_pid() is not None:
458 if waited > timeout:
459 raise MaxWhileTries("Timed out waiting for daemon {0}.{1}".format(self.daemon_type, self.daemon_id))
460 time.sleep(1)
461 waited += 1
462
463 def stop(self, timeout=300):
464 if not self.running():
465 log.error('tried to stop a non-running daemon')
466 return
467
468 pid = self._get_pid()
469 log.info("Killing PID {0} for {1}.{2}".format(pid, self.daemon_type, self.daemon_id))
11fdf7f2 470 os.kill(pid, signal.SIGTERM)
7c673cae
FG
471
472 waited = 0
473 while pid is not None:
474 new_pid = self._get_pid()
475 if new_pid is not None and new_pid != pid:
476 log.info("Killing new PID {0}".format(new_pid))
477 pid = new_pid
11fdf7f2 478 os.kill(pid, signal.SIGTERM)
7c673cae
FG
479
480 if new_pid is None:
481 break
482 else:
483 if waited > timeout:
484 raise MaxWhileTries(
485 "Timed out waiting for daemon {0}.{1}".format(
486 self.daemon_type, self.daemon_id))
487 time.sleep(1)
488 waited += 1
489
490 self.wait(timeout=timeout)
491
492 def restart(self):
493 if self._get_pid() is not None:
494 self.stop()
495
9f95a23c
TL
496 self.proc = self.controller.run(args=[
497 os.path.join(BIN_PREFIX, "./ceph-{0}".format(self.daemon_type)),
498 "-i", self.daemon_id])
7c673cae 499
1adf2230
AA
500 def signal(self, sig, silent=False):
501 if not self.running():
502 raise RuntimeError("Can't send signal to non-running daemon")
503
504 os.kill(self._get_pid(), sig)
505 if not silent:
506 log.info("Sent signal {0} to {1}.{2}".format(sig, self.daemon_type, self.daemon_id))
507
7c673cae
FG
508
509def safe_kill(pid):
510 """
511 os.kill annoyingly raises exception if process already dead. Ignore it.
512 """
513 try:
514 return os.kill(pid, signal.SIGKILL)
515 except OSError as e:
516 if e.errno == errno.ESRCH:
517 # Raced with process termination
518 pass
519 else:
520 raise
521
522
9f95a23c 523class LocalKernelMount(KernelMount):
11fdf7f2 524 def __init__(self, ctx, test_dir, client_id):
9f95a23c 525 super(LocalKernelMount, self).__init__(ctx, test_dir, client_id, LocalRemote(), None, None, None)
7c673cae
FG
526
527 @property
528 def config_path(self):
529 return "./ceph.conf"
530
531 def get_keyring_path(self):
532 # This is going to end up in a config file, so use an absolute path
533 # to avoid assumptions about daemons' pwd
9f95a23c
TL
534 keyring_path = "./client.{0}.keyring".format(self.client_id)
535 try:
536 os.stat(keyring_path)
537 except OSError:
538 return os.path.join(os.getcwd(), 'keyring')
539 else:
540 return keyring_path
7c673cae 541
9f95a23c
TL
542 def run_shell(self, args, wait=True, stdin=None, check_status=True,
543 omit_sudo=False):
7c673cae
FG
544 # FIXME maybe should add a pwd arg to teuthology.orchestra so that
545 # the "cd foo && bar" shenanigans isn't needed to begin with and
546 # then we wouldn't have to special case this
9f95a23c
TL
547 return self.client_remote.run(args=args, wait=wait, cwd=self.mountpoint,
548 stdin=stdin, check_status=check_status,
549 omit_sudo=omit_sudo)
550
551 def run_as_user(self, args, user, wait=True, stdin=None, check_status=True):
552 # FIXME maybe should add a pwd arg to teuthology.orchestra so that
553 # the "cd foo && bar" shenanigans isn't needed to begin with and
554 # then we wouldn't have to special case this
555 if isinstance(args, str):
556 args = 'sudo -u %s -s /bin/bash -c %s' % (user, args)
557 elif isinstance(args, list):
558 cmdlist = args
559 cmd = ''
560 for i in cmdlist:
561 cmd = cmd + i + ' '
562 args = ['sudo', '-u', user, '-s', '/bin/bash', '-c']
563 args.append(cmd)
564
565 return self.client_remote.run(args=args, wait=wait, cwd=self.mountpoint,
566 check_status=check_status, stdin=stdin,
567 omit_sudo=False)
568
569 def run_as_root(self, args, wait=True, stdin=None, check_status=True):
570 # FIXME maybe should add a pwd arg to teuthology.orchestra so that
571 # the "cd foo && bar" shenanigans isn't needed to begin with and
572 # then we wouldn't have to special case this
573 if isinstance(args, str):
574 args = 'sudo ' + args
575 if isinstance(args, list):
576 args.insert(0, 'sudo')
577
578 return self.client_remote.run(args=args, wait=wait, cwd=self.mountpoint,
579 check_status=check_status,
580 omit_sudo=False)
581
582 def testcmd(self, args, wait=True, stdin=None, omit_sudo=False):
583 # FIXME maybe should add a pwd arg to teuthology.orchestra so that
584 # the "cd foo && bar" shenanigans isn't needed to begin with and
585 # then we wouldn't have to special case this
586 return self.run_shell(args, wait=wait, stdin=stdin, check_status=False,
587 omit_sudo=omit_sudo)
588
589 def testcmd_as_user(self, args, user, wait=True, stdin=None):
590 # FIXME maybe should add a pwd arg to teuthology.orchestra so that
591 # the "cd foo && bar" shenanigans isn't needed to begin with and
592 # then we wouldn't have to special case this
593 return self.run_as_user(args, user=user, wait=wait, stdin=stdin,
594 check_status=False)
595
596 def testcmd_as_root(self, args, wait=True, stdin=None):
597 # FIXME maybe should add a pwd arg to teuthology.orchestra so that
598 # the "cd foo && bar" shenanigans isn't needed to begin with and
599 # then we wouldn't have to special case this
600 return self.run_as_root(args, wait=wait, stdin=stdin,
601 check_status=False)
7c673cae 602
11fdf7f2
TL
603 def setupfs(self, name=None):
604 if name is None and self.fs is not None:
605 # Previous mount existed, reuse the old name
606 name = self.fs.name
607 self.fs = LocalFilesystem(self.ctx, name=name)
608 log.info('Wait for MDS to reach steady state...')
609 self.fs.wait_for_daemons()
610 log.info('Ready to start {}...'.format(type(self).__name__))
611
7c673cae
FG
612 @property
613 def _prefix(self):
614 return BIN_PREFIX
615
616 def _asok_path(self):
617 # In teuthology, the asok is named after the PID of the ceph-fuse process, because it's
618 # run foreground. When running it daemonized however, the asok is named after
619 # the PID of the launching process, not the long running ceph-fuse process. Therefore
620 # we need to give an exact path here as the logic for checking /proc/ for which
621 # asok is alive does not work.
11fdf7f2
TL
622
623 # Load the asok path from ceph.conf as vstart.sh now puts admin sockets
624 # in a tmpdir. All of the paths are the same, so no need to select
625 # based off of the service type.
626 d = "./out"
627 with open(self.config_path) as f:
628 for line in f:
629 asok_conf = re.search("^\s*admin\s+socket\s*=\s*(.*?)[^/]+$", line)
630 if asok_conf:
631 d = asok_conf.groups(1)[0]
632 break
633 path = "{0}/client.{1}.{2}.asok".format(d, self.client_id, self.fuse_daemon.subproc.pid)
7c673cae
FG
634 log.info("I think my launching pid was {0}".format(self.fuse_daemon.subproc.pid))
635 return path
636
e306af50 637 def mount(self, mount_path=None, mount_fs_name=None, mount_options=[], **kwargs):
11fdf7f2
TL
638 self.setupfs(name=mount_fs_name)
639
9f95a23c
TL
640 log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format(
641 id=self.client_id, remote=self.client_remote, mnt=self.mountpoint))
642
7c673cae
FG
643 self.client_remote.run(
644 args=[
645 'mkdir',
646 '--',
647 self.mountpoint,
648 ],
9f95a23c 649 timeout=(5*60),
7c673cae
FG
650 )
651
9f95a23c
TL
652 if mount_path is None:
653 mount_path = "/"
654
655 opts = 'name={id},norequire_active_mds,conf={conf}'.format(id=self.client_id,
656 conf=self.config_path)
657
658 if mount_fs_name is not None:
659 opts += ",mds_namespace={0}".format(mount_fs_name)
660
661 for mount_opt in mount_options:
662 opts += ",{0}".format(mount_opt)
663
664 self.client_remote.run(
665 args=[
666 'sudo',
667 './bin/mount.ceph',
668 ':{mount_path}'.format(mount_path=mount_path),
669 self.mountpoint,
670 '-v',
671 '-o',
672 opts
673 ],
674 timeout=(30*60),
675 omit_sudo=False,
676 )
677
678 self.client_remote.run(
679 args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(5*60))
680
681 self.mounted = True
682
683 def _run_python(self, pyscript, py_version='python'):
684 """
685 Override this to remove the daemon-helper prefix that is used otherwise
686 to make the process killable.
687 """
688 return self.client_remote.run(args=[py_version, '-c', pyscript],
689 wait=False)
690
691class LocalFuseMount(FuseMount):
692 def __init__(self, ctx, test_dir, client_id):
693 super(LocalFuseMount, self).__init__(ctx, None, test_dir, client_id, LocalRemote())
694
695 @property
696 def config_path(self):
697 return "./ceph.conf"
698
699 def get_keyring_path(self):
700 # This is going to end up in a config file, so use an absolute path
701 # to avoid assumptions about daemons' pwd
702 return os.path.abspath("./client.{0}.keyring".format(self.client_id))
703
704 def run_shell(self, args, wait=True, stdin=None, check_status=True, omit_sudo=True):
705 # FIXME maybe should add a pwd arg to teuthology.orchestra so that
706 # the "cd foo && bar" shenanigans isn't needed to begin with and
707 # then we wouldn't have to special case this
708 return self.client_remote.run(args=args, wait=wait, cwd=self.mountpoint,
709 stdin=stdin, check_status=check_status,
710 omit_sudo=omit_sudo)
711
712 def run_as_user(self, args, user, wait=True, stdin=None, check_status=True):
713 # FIXME maybe should add a pwd arg to teuthology.orchestra so that
714 # the "cd foo && bar" shenanigans isn't needed to begin with and
715 # then we wouldn't have to special case this
716 if isinstance(args, str):
717 args = 'sudo -u %s -s /bin/bash -c %s' % (user, args)
718 elif isinstance(args, list):
719 cmdlist = args
720 cmd = ''
721 for i in cmdlist:
722 cmd = cmd + i + ' '
723 args = ['sudo', '-u', user, '-s', '/bin/bash', '-c']
724 args.append(cmd)
725
726 return self.client_remote.run(args=args, wait=wait, cwd=self.mountpoint,
727 check_status=check_status, stdin=stdin,
728 omit_sudo=False)
729
730 def run_as_root(self, args, wait=True, stdin=None, check_status=True):
731 # FIXME maybe should add a pwd arg to teuthology.orchestra so that
732 # the "cd foo && bar" shenanigans isn't needed to begin with and
733 # then we wouldn't have to special case this
734 if isinstance(args, str):
735 args = 'sudo ' + args
736 if isinstance(args, list):
737 args.insert(0, 'sudo')
738
739 return self.client_remote.run(args=args, wait=wait, cwd=self.mountpoint,
740 check_status=check_status,
741 omit_sudo=False)
742
743 def testcmd(self, args, wait=True, stdin=None, omit_sudo=True):
744 # FIXME maybe should add a pwd arg to teuthology.orchestra so that
745 # the "cd foo && bar" shenanigans isn't needed to begin with and
746 # then we wouldn't have to special case this
747 return self.run_shell(args, wait=wait, stdin=stdin, check_status=False,
748 omit_sudo=omit_sudo)
749
750 def testcmd_as_user(self, args, user, wait=True, stdin=None):
751 # FIXME maybe should add a pwd arg to teuthology.orchestra so that
752 # the "cd foo && bar" shenanigans isn't needed to begin with and
753 # then we wouldn't have to special case this
754 return self.run_as_user(args, user=user, wait=wait, stdin=stdin,
755 check_status=False)
756
757 def testcmd_as_root(self, args, wait=True, stdin=None):
758 # FIXME maybe should add a pwd arg to teuthology.orchestra so that
759 # the "cd foo && bar" shenanigans isn't needed to begin with and
760 # then we wouldn't have to special case this
761 return self.run_as_root(args, wait=wait, stdin=stdin,
762 check_status=False)
763
764 def setupfs(self, name=None):
765 if name is None and self.fs is not None:
766 # Previous mount existed, reuse the old name
767 name = self.fs.name
768 self.fs = LocalFilesystem(self.ctx, name=name)
769 log.info('Wait for MDS to reach steady state...')
770 self.fs.wait_for_daemons()
771 log.info('Ready to start {}...'.format(type(self).__name__))
772
773 @property
774 def _prefix(self):
775 return BIN_PREFIX
776
777 def _asok_path(self):
778 # In teuthology, the asok is named after the PID of the ceph-fuse process, because it's
779 # run foreground. When running it daemonized however, the asok is named after
780 # the PID of the launching process, not the long running ceph-fuse process. Therefore
781 # we need to give an exact path here as the logic for checking /proc/ for which
782 # asok is alive does not work.
783
784 # Load the asok path from ceph.conf as vstart.sh now puts admin sockets
785 # in a tmpdir. All of the paths are the same, so no need to select
786 # based off of the service type.
787 d = "./out"
788 with open(self.config_path) as f:
789 for line in f:
790 asok_conf = re.search("^\s*admin\s+socket\s*=\s*(.*?)[^/]+$", line)
791 if asok_conf:
792 d = asok_conf.groups(1)[0]
793 break
794 path = "{0}/client.{1}.{2}.asok".format(d, self.client_id, self.fuse_daemon.subproc.pid)
795 log.info("I think my launching pid was {0}".format(self.fuse_daemon.subproc.pid))
796 return path
797
9f95a23c
TL
798 def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None, mount_options=[]):
799 if mountpoint is not None:
800 self.mountpoint = mountpoint
801 self.setupfs(name=mount_fs_name)
802
803 self.client_remote.run(args=['mkdir', '-p', self.mountpoint])
804
7c673cae
FG
805 def list_connections():
806 self.client_remote.run(
807 args=["mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections"],
808 check_status=False
809 )
810 p = self.client_remote.run(
811 args=["ls", "/sys/fs/fuse/connections"],
812 check_status=False
813 )
814 if p.exitstatus != 0:
e306af50 815 log.warning("ls conns failed with {0}, assuming none".format(p.exitstatus))
7c673cae
FG
816 return []
817
9f95a23c 818 ls_str = six.ensure_str(p.stdout.getvalue().strip())
7c673cae
FG
819 if ls_str:
820 return [int(n) for n in ls_str.split("\n")]
821 else:
822 return []
823
824 # Before starting ceph-fuse process, note the contents of
825 # /sys/fs/fuse/connections
826 pre_mount_conns = list_connections()
827 log.info("Pre-mount connections: {0}".format(pre_mount_conns))
828
829 prefix = [os.path.join(BIN_PREFIX, "ceph-fuse")]
830 if os.getuid() != 0:
b32b8144 831 prefix += ["--client_die_on_failed_dentry_invalidate=false"]
7c673cae
FG
832
833 if mount_path is not None:
834 prefix += ["--client_mountpoint={0}".format(mount_path)]
835
836 if mount_fs_name is not None:
9f95a23c
TL
837 prefix += ["--client_fs={0}".format(mount_fs_name)]
838
839 prefix += mount_options;
7c673cae
FG
840
841 self.fuse_daemon = self.client_remote.run(args=
842 prefix + [
843 "-f",
844 "--name",
845 "client.{0}".format(self.client_id),
846 self.mountpoint
847 ], wait=False)
848
849 log.info("Mounting client.{0} with pid {1}".format(self.client_id, self.fuse_daemon.subproc.pid))
850
851 # Wait for the connection reference to appear in /sys
852 waited = 0
853 post_mount_conns = list_connections()
854 while len(post_mount_conns) <= len(pre_mount_conns):
855 if self.fuse_daemon.finished:
856 # Did mount fail? Raise the CommandFailedError instead of
857 # hitting the "failed to populate /sys/" timeout
858 self.fuse_daemon.wait()
859 time.sleep(1)
860 waited += 1
861 if waited > 30:
862 raise RuntimeError("Fuse mount failed to populate /sys/ after {0} seconds".format(
863 waited
864 ))
865 post_mount_conns = list_connections()
866
867 log.info("Post-mount connections: {0}".format(post_mount_conns))
868
869 # Record our fuse connection number so that we can use it when
870 # forcing an unmount
871 new_conns = list(set(post_mount_conns) - set(pre_mount_conns))
872 if len(new_conns) == 0:
873 raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns))
874 elif len(new_conns) > 1:
875 raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns))
876 else:
877 self._fuse_conn = new_conns[0]
878
11fdf7f2
TL
879 self.gather_mount_info()
880
e306af50
TL
881 self.mounted = True
882
91327a77 883 def _run_python(self, pyscript, py_version='python'):
7c673cae
FG
884 """
885 Override this to remove the daemon-helper prefix that is used otherwise
886 to make the process killable.
887 """
91327a77
AA
888 return self.client_remote.run(args=[py_version, '-c', pyscript],
889 wait=False)
7c673cae 890
9f95a23c
TL
891# XXX: this class has nothing to do with the Ceph daemon (ceph-mgr) of
892# the same name.
7c673cae
FG
893class LocalCephManager(CephManager):
894 def __init__(self):
895 # Deliberately skip parent init, only inheriting from it to get
896 # util methods like osd_dump that sit on top of raw_cluster_cmd
897 self.controller = LocalRemote()
898
899 # A minority of CephManager fns actually bother locking for when
900 # certain teuthology tests want to run tasks in parallel
901 self.lock = threading.RLock()
902
903 self.log = lambda x: log.info(x)
904
11fdf7f2
TL
905 # Don't bother constructing a map of pools: it should be empty
906 # at test cluster start, and in any case it would be out of date
907 # in no time. The attribute needs to exist for some of the CephManager
908 # methods to work though.
909 self.pools = {}
910
7c673cae
FG
911 def find_remote(self, daemon_type, daemon_id):
912 """
913 daemon_type like 'mds', 'osd'
914 daemon_id like 'a', '0'
915 """
916 return LocalRemote()
917
11fdf7f2
TL
918 def run_ceph_w(self, watch_channel=None):
919 """
920 :param watch_channel: Specifies the channel to be watched.
921 This can be 'cluster', 'audit', ...
922 :type watch_channel: str
923 """
924 args = [os.path.join(BIN_PREFIX, "ceph"), "-w"]
925 if watch_channel is not None:
926 args.append("--watch-channel")
927 args.append(watch_channel)
1911f103 928 proc = self.controller.run(args=args, wait=False, stdout=StringIO())
7c673cae
FG
929 return proc
930
11fdf7f2 931 def raw_cluster_cmd(self, *args, **kwargs):
7c673cae
FG
932 """
933 args like ["osd", "dump"}
934 return stdout string
935 """
9f95a23c
TL
936 proc = self.controller.run(args=[os.path.join(BIN_PREFIX, "ceph")] + \
937 list(args), **kwargs)
938 return six.ensure_str(proc.stdout.getvalue())
7c673cae 939
11fdf7f2 940 def raw_cluster_cmd_result(self, *args, **kwargs):
7c673cae
FG
941 """
942 like raw_cluster_cmd but don't check status, just return rc
943 """
11fdf7f2 944 kwargs['check_status'] = False
9f95a23c
TL
945 proc = self.controller.run(args=[os.path.join(BIN_PREFIX, "ceph")] + \
946 list(args), **kwargs)
7c673cae
FG
947 return proc.exitstatus
948
11fdf7f2 949 def admin_socket(self, daemon_type, daemon_id, command, check_status=True, timeout=None):
7c673cae 950 return self.controller.run(
11fdf7f2
TL
951 args=[os.path.join(BIN_PREFIX, "ceph"), "daemon", "{0}.{1}".format(daemon_type, daemon_id)] + command,
952 check_status=check_status,
953 timeout=timeout
7c673cae
FG
954 )
955
9f95a23c
TL
956 def get_mon_socks(self):
957 """
958 Get monitor sockets.
959
960 :return socks: tuple of strings; strings are individual sockets.
961 """
962 from json import loads
963
964 output = loads(self.raw_cluster_cmd('--format=json', 'mon', 'dump'))
965 socks = []
966 for mon in output['mons']:
967 for addrvec_mem in mon['public_addrs']['addrvec']:
968 socks.append(addrvec_mem['addr'])
969 return tuple(socks)
970
971 def get_msgrv1_mon_socks(self):
972 """
973 Get monitor sockets that use msgrv2 to operate.
974
975 :return socks: tuple of strings; strings are individual sockets.
976 """
977 from json import loads
978
979 output = loads(self.raw_cluster_cmd('--format=json', 'mon', 'dump'))
980 socks = []
981 for mon in output['mons']:
982 for addrvec_mem in mon['public_addrs']['addrvec']:
983 if addrvec_mem['type'] == 'v1':
984 socks.append(addrvec_mem['addr'])
985 return tuple(socks)
986
987 def get_msgrv2_mon_socks(self):
988 """
989 Get monitor sockets that use msgrv2 to operate.
990
991 :return socks: tuple of strings; strings are individual sockets.
992 """
993 from json import loads
994
995 output = loads(self.raw_cluster_cmd('--format=json', 'mon', 'dump'))
996 socks = []
997 for mon in output['mons']:
998 for addrvec_mem in mon['public_addrs']['addrvec']:
999 if addrvec_mem['type'] == 'v2':
1000 socks.append(addrvec_mem['addr'])
1001 return tuple(socks)
1002
7c673cae
FG
1003
1004class LocalCephCluster(CephCluster):
1005 def __init__(self, ctx):
1006 # Deliberately skip calling parent constructor
1007 self._ctx = ctx
1008 self.mon_manager = LocalCephManager()
1009 self._conf = defaultdict(dict)
1010
1011 @property
1012 def admin_remote(self):
1013 return LocalRemote()
1014
1015 def get_config(self, key, service_type=None):
1016 if service_type is None:
1017 service_type = 'mon'
1018
1019 # FIXME hardcoded vstart service IDs
1020 service_id = {
1021 'mon': 'a',
1022 'mds': 'a',
1023 'osd': '0'
1024 }[service_type]
1025
1026 return self.json_asok(['config', 'get', key], service_type, service_id)[key]
1027
1028 def _write_conf(self):
1029 # In teuthology, we have the honour of writing the entire ceph.conf, but
1030 # in vstart land it has mostly already been written and we need to carefully
1031 # append to it.
1032 conf_path = "./ceph.conf"
1033 banner = "\n#LOCAL_TEST\n"
1034 existing_str = open(conf_path).read()
1035
1036 if banner in existing_str:
1037 existing_str = existing_str[0:existing_str.find(banner)]
1038
1039 existing_str += banner
1040
1041 for subsys, kvs in self._conf.items():
1042 existing_str += "\n[{0}]\n".format(subsys)
1043 for key, val in kvs.items():
1044 # Comment out existing instance if it exists
1045 log.info("Searching for existing instance {0}/{1}".format(
1046 key, subsys
1047 ))
1048 existing_section = re.search("^\[{0}\]$([\n]|[^\[])+".format(
1049 subsys
1050 ), existing_str, re.MULTILINE)
1051
1052 if existing_section:
1053 section_str = existing_str[existing_section.start():existing_section.end()]
1054 existing_val = re.search("^\s*[^#]({0}) =".format(key), section_str, re.MULTILINE)
1055 if existing_val:
1056 start = existing_section.start() + existing_val.start(1)
1057 log.info("Found string to replace at {0}".format(
1058 start
1059 ))
1060 existing_str = existing_str[0:start] + "#" + existing_str[start:]
1061
1062 existing_str += "{0} = {1}\n".format(key, val)
1063
1064 open(conf_path, "w").write(existing_str)
1065
1066 def set_ceph_conf(self, subsys, key, value):
1067 self._conf[subsys][key] = value
1068 self._write_conf()
1069
1070 def clear_ceph_conf(self, subsys, key):
1071 del self._conf[subsys][key]
1072 self._write_conf()
1073
1074
1075class LocalMDSCluster(LocalCephCluster, MDSCluster):
1076 def __init__(self, ctx):
1077 super(LocalMDSCluster, self).__init__(ctx)
1078
11fdf7f2 1079 self.mds_ids = ctx.daemons.daemons['ceph.mds'].keys()
7c673cae
FG
1080 self.mds_daemons = dict([(id_, LocalDaemon("mds", id_)) for id_ in self.mds_ids])
1081
1082 def clear_firewall(self):
1083 # FIXME: unimplemented
1084 pass
1085
181888fb
FG
1086 def newfs(self, name='cephfs', create=True):
1087 return LocalFilesystem(self._ctx, name=name, create=create)
7c673cae
FG
1088
1089
1090class LocalMgrCluster(LocalCephCluster, MgrCluster):
1091 def __init__(self, ctx):
1092 super(LocalMgrCluster, self).__init__(ctx)
1093
11fdf7f2 1094 self.mgr_ids = ctx.daemons.daemons['ceph.mgr'].keys()
7c673cae
FG
1095 self.mgr_daemons = dict([(id_, LocalDaemon("mgr", id_)) for id_ in self.mgr_ids])
1096
1097
1098class LocalFilesystem(Filesystem, LocalMDSCluster):
9f95a23c 1099 def __init__(self, ctx, fscid=None, name='cephfs', create=False, ec_profile=None):
7c673cae
FG
1100 # Deliberately skip calling parent constructor
1101 self._ctx = ctx
1102
1103 self.id = None
9f95a23c
TL
1104 self.name = name
1105 self.ec_profile = ec_profile
7c673cae 1106 self.metadata_pool_name = None
181888fb
FG
1107 self.metadata_overlay = False
1108 self.data_pool_name = None
7c673cae
FG
1109 self.data_pools = None
1110
1111 # Hack: cheeky inspection of ceph.conf to see what MDSs exist
1112 self.mds_ids = set()
1113 for line in open("ceph.conf").readlines():
1114 match = re.match("^\[mds\.(.+)\]$", line)
1115 if match:
1116 self.mds_ids.add(match.group(1))
1117
1118 if not self.mds_ids:
1119 raise RuntimeError("No MDSs found in ceph.conf!")
1120
1121 self.mds_ids = list(self.mds_ids)
1122
1123 log.info("Discovered MDS IDs: {0}".format(self.mds_ids))
1124
1125 self.mon_manager = LocalCephManager()
1126
1127 self.mds_daemons = dict([(id_, LocalDaemon("mds", id_)) for id_ in self.mds_ids])
1128
1129 self.client_remote = LocalRemote()
1130
1131 self._conf = defaultdict(dict)
1132
181888fb 1133 if name is not None:
7c673cae
FG
1134 if fscid is not None:
1135 raise RuntimeError("cannot specify fscid when creating fs")
181888fb
FG
1136 if create and not self.legacy_configured():
1137 self.create()
1138 else:
1139 if fscid is not None:
1140 self.id = fscid
1141 self.getinfo(refresh=True)
7c673cae
FG
1142
1143 # Stash a reference to the first created filesystem on ctx, so
1144 # that if someone drops to the interactive shell they can easily
1145 # poke our methods.
1146 if not hasattr(self._ctx, "filesystem"):
1147 self._ctx.filesystem = self
1148
1149 @property
1150 def _prefix(self):
1151 return BIN_PREFIX
1152
1153 def set_clients_block(self, blocked, mds_id=None):
1154 raise NotImplementedError()
1155
7c673cae
FG
1156
1157class InteractiveFailureResult(unittest.TextTestResult):
1158 """
1159 Specialization that implements interactive-on-error style
1160 behavior.
1161 """
1162 def addFailure(self, test, err):
1163 super(InteractiveFailureResult, self).addFailure(test, err)
1164 log.error(self._exc_info_to_string(err, test))
1165 log.error("Failure in test '{0}', going interactive".format(
1166 self.getDescription(test)
1167 ))
1168 interactive.task(ctx=None, config=None)
1169
1170 def addError(self, test, err):
1171 super(InteractiveFailureResult, self).addError(test, err)
1172 log.error(self._exc_info_to_string(err, test))
1173 log.error("Error in test '{0}', going interactive".format(
1174 self.getDescription(test)
1175 ))
1176 interactive.task(ctx=None, config=None)
1177
1178
1179def enumerate_methods(s):
1180 log.info("e: {0}".format(s))
1181 for t in s._tests:
1182 if isinstance(t, suite.BaseTestSuite):
1183 for sub in enumerate_methods(t):
1184 yield sub
1185 else:
1186 yield s, t
1187
1188
1189def load_tests(modules, loader):
1190 if modules:
1191 log.info("Executing modules: {0}".format(modules))
1192 module_suites = []
1193 for mod_name in modules:
1194 # Test names like cephfs.test_auto_repair
1195 module_suites.append(loader.loadTestsFromName(mod_name))
1196 log.info("Loaded: {0}".format(list(module_suites)))
1197 return suite.TestSuite(module_suites)
1198 else:
1199 log.info("Executing all cephfs tests")
1200 return loader.discover(
1201 os.path.join(os.path.dirname(os.path.abspath(__file__)), "cephfs")
1202 )
1203
1204
1205def scan_tests(modules):
1206 overall_suite = load_tests(modules, loader.TestLoader())
1207
1208 max_required_mds = 0
1209 max_required_clients = 0
1210 max_required_mgr = 0
11fdf7f2 1211 require_memstore = False
7c673cae 1212
9f95a23c 1213 for suite_, case in enumerate_methods(overall_suite):
7c673cae
FG
1214 max_required_mds = max(max_required_mds,
1215 getattr(case, "MDSS_REQUIRED", 0))
1216 max_required_clients = max(max_required_clients,
1217 getattr(case, "CLIENTS_REQUIRED", 0))
1218 max_required_mgr = max(max_required_mgr,
1219 getattr(case, "MGRS_REQUIRED", 0))
11fdf7f2
TL
1220 require_memstore = getattr(case, "REQUIRE_MEMSTORE", False) \
1221 or require_memstore
7c673cae 1222
11fdf7f2
TL
1223 return max_required_mds, max_required_clients, \
1224 max_required_mgr, require_memstore
7c673cae
FG
1225
1226
1227class LocalCluster(object):
1228 def __init__(self, rolename="placeholder"):
1229 self.remotes = {
1230 LocalRemote(): [rolename]
1231 }
1232
1233 def only(self, requested):
1234 return self.__class__(rolename=requested)
1235
1236
1237class LocalContext(object):
1238 def __init__(self):
1239 self.config = {}
1240 self.teuthology_config = teuth_config
1241 self.cluster = LocalCluster()
1242 self.daemons = DaemonGroup()
1243
1244 # Shove some LocalDaemons into the ctx.daemons DaemonGroup instance so that any
1245 # tests that want to look these up via ctx can do so.
1246 # Inspect ceph.conf to see what roles exist
1247 for conf_line in open("ceph.conf").readlines():
1248 for svc_type in ["mon", "osd", "mds", "mgr"]:
11fdf7f2
TL
1249 prefixed_type = "ceph." + svc_type
1250 if prefixed_type not in self.daemons.daemons:
1251 self.daemons.daemons[prefixed_type] = {}
7c673cae
FG
1252 match = re.match("^\[{0}\.(.+)\]$".format(svc_type), conf_line)
1253 if match:
1254 svc_id = match.group(1)
11fdf7f2 1255 self.daemons.daemons[prefixed_type][svc_id] = LocalDaemon(svc_type, svc_id)
7c673cae
FG
1256
1257 def __del__(self):
1258 shutil.rmtree(self.teuthology_config['test_path'])
1259
9f95a23c
TL
1260def teardown_cluster():
1261 log.info('\ntearing down the cluster...')
1262 remote.run(args=[os.path.join(SRC_PREFIX, "stop.sh")], timeout=60)
1263 remote.run(args=['rm', '-rf', './dev', './out'])
1264
1265def clear_old_log():
1266 from os import stat
1267
1268 try:
1269 stat(logpath)
1270 # would need an update when making this py3 compatible. Use FileNotFound
1271 # instead.
1272 except OSError:
1273 return
1274 else:
1275 os.remove(logpath)
1276 with open(logpath, 'w') as logfile:
1277 logfile.write('')
1278 init_log()
1279 log.info('logging in a fresh file now...')
1280
7c673cae
FG
1281def exec_test():
1282 # Parse arguments
9f95a23c
TL
1283 opt_interactive_on_error = False
1284 opt_create_cluster = False
1285 opt_create_cluster_only = False
1286 opt_ignore_missing_binaries = False
1287 opt_teardown_cluster = False
1288 global opt_log_ps_output
1289 opt_log_ps_output = False
1290 use_kernel_client = False
7c673cae
FG
1291
1292 args = sys.argv[1:]
1293 flags = [a for a in args if a.startswith("-")]
1294 modules = [a for a in args if not a.startswith("-")]
1295 for f in flags:
1296 if f == "--interactive":
9f95a23c 1297 opt_interactive_on_error = True
7c673cae 1298 elif f == "--create":
9f95a23c 1299 opt_create_cluster = True
11fdf7f2 1300 elif f == "--create-cluster-only":
9f95a23c 1301 opt_create_cluster_only = True
eafe8130 1302 elif f == "--ignore-missing-binaries":
9f95a23c
TL
1303 opt_ignore_missing_binaries = True
1304 elif f == '--teardown':
1305 opt_teardown_cluster = True
1306 elif f == '--log-ps-output':
1307 opt_log_ps_output = True
1308 elif f == '--clear-old-log':
1309 clear_old_log()
1310 elif f == "--kclient":
1311 use_kernel_client = True
7c673cae
FG
1312 else:
1313 log.error("Unknown option '{0}'".format(f))
1314 sys.exit(-1)
1315
1316 # Help developers by stopping up-front if their tree isn't built enough for all the
1317 # tools that the tests might want to use (add more here if needed)
1318 require_binaries = ["ceph-dencoder", "cephfs-journal-tool", "cephfs-data-scan",
1319 "cephfs-table-tool", "ceph-fuse", "rados"]
1320 missing_binaries = [b for b in require_binaries if not os.path.exists(os.path.join(BIN_PREFIX, b))]
9f95a23c 1321 if missing_binaries and not opt_ignore_missing_binaries:
7c673cae
FG
1322 log.error("Some ceph binaries missing, please build them: {0}".format(" ".join(missing_binaries)))
1323 sys.exit(-1)
1324
11fdf7f2
TL
1325 max_required_mds, max_required_clients, \
1326 max_required_mgr, require_memstore = scan_tests(modules)
7c673cae 1327
9f95a23c 1328 global remote
7c673cae
FG
1329 remote = LocalRemote()
1330
1331 # Tolerate no MDSs or clients running at start
9f95a23c 1332 ps_txt = six.ensure_str(remote.run(
7c673cae 1333 args=["ps", "-u"+str(os.getuid())]
9f95a23c 1334 ).stdout.getvalue().strip())
7c673cae
FG
1335 lines = ps_txt.split("\n")[1:]
1336 for line in lines:
1337 if 'ceph-fuse' in line or 'ceph-mds' in line:
1338 pid = int(line.split()[0])
e306af50 1339 log.warning("Killing stray process {0}".format(line))
7c673cae
FG
1340 os.kill(pid, signal.SIGKILL)
1341
1342 # Fire up the Ceph cluster if the user requested it
9f95a23c 1343 if opt_create_cluster or opt_create_cluster_only:
7c673cae
FG
1344 log.info("Creating cluster with {0} MDS daemons".format(
1345 max_required_mds))
9f95a23c 1346 teardown_cluster()
7c673cae
FG
1347 vstart_env = os.environ.copy()
1348 vstart_env["FS"] = "0"
1349 vstart_env["MDS"] = max_required_mds.__str__()
11fdf7f2 1350 vstart_env["OSD"] = "4"
7c673cae
FG
1351 vstart_env["MGR"] = max(max_required_mgr, 1).__str__()
1352
11fdf7f2
TL
1353 args = [os.path.join(SRC_PREFIX, "vstart.sh"), "-n", "-d",
1354 "--nolockdep"]
1355 if require_memstore:
1356 args.append("--memstore")
1357
9f95a23c
TL
1358 # usually, i get vstart.sh running completely in less than 100
1359 # seconds.
1360 remote.run(args=args, env=vstart_env, timeout=(3 * 60))
7c673cae
FG
1361
1362 # Wait for OSD to come up so that subsequent injectargs etc will
1363 # definitely succeed
c07f9fc5 1364 LocalCephCluster(LocalContext()).mon_manager.wait_for_all_osds_up(timeout=30)
7c673cae 1365
9f95a23c 1366 if opt_create_cluster_only:
11fdf7f2
TL
1367 return
1368
7c673cae
FG
1369 # List of client mounts, sufficient to run the selected tests
1370 clients = [i.__str__() for i in range(0, max_required_clients)]
1371
1372 test_dir = tempfile.mkdtemp()
1373 teuth_config['test_path'] = test_dir
1374
11fdf7f2
TL
1375 ctx = LocalContext()
1376 ceph_cluster = LocalCephCluster(ctx)
1377 mds_cluster = LocalMDSCluster(ctx)
1378 mgr_cluster = LocalMgrCluster(ctx)
1379
7c673cae
FG
1380 # Construct Mount classes
1381 mounts = []
1382 for client_id in clients:
1383 # Populate client keyring (it sucks to use client.admin for test clients
1384 # because it's awkward to find the logs later)
1385 client_name = "client.{0}".format(client_id)
1386
1387 if client_name not in open("./keyring").read():
1388 p = remote.run(args=[os.path.join(BIN_PREFIX, "ceph"), "auth", "get-or-create", client_name,
1389 "osd", "allow rw",
1390 "mds", "allow",
1391 "mon", "allow r"])
1392
1393 open("./keyring", "a").write(p.stdout.getvalue())
1394
9f95a23c
TL
1395 if use_kernel_client:
1396 mount = LocalKernelMount(ctx, test_dir, client_id)
1397 else:
1398 mount = LocalFuseMount(ctx, test_dir, client_id)
1399
7c673cae 1400 mounts.append(mount)
801d1391
TL
1401 if os.path.exists(mount.mountpoint):
1402 if mount.is_mounted():
e306af50 1403 log.warning("unmounting {0}".format(mount.mountpoint))
801d1391
TL
1404 mount.umount_wait()
1405 else:
7c673cae
FG
1406 os.rmdir(mount.mountpoint)
1407
7c673cae
FG
1408 from tasks.cephfs_test_runner import DecoratingLoader
1409
1410 class LogStream(object):
1411 def __init__(self):
1412 self.buffer = ""
1413
1414 def write(self, data):
1415 self.buffer += data
1416 if "\n" in self.buffer:
1417 lines = self.buffer.split("\n")
1418 for line in lines[:-1]:
1419 pass
1420 # sys.stderr.write(line + "\n")
1421 log.info(line)
1422 self.buffer = lines[-1]
1423
1424 def flush(self):
1425 pass
1426
1427 decorating_loader = DecoratingLoader({
1428 "ctx": ctx,
1429 "mounts": mounts,
1430 "ceph_cluster": ceph_cluster,
1431 "mds_cluster": mds_cluster,
1432 "mgr_cluster": mgr_cluster,
1433 })
1434
1435 # For the benefit of polling tests like test_full -- in teuthology land we set this
1436 # in a .yaml, here it's just a hardcoded thing for the developer's pleasure.
11fdf7f2
TL
1437 remote.run(args=[os.path.join(BIN_PREFIX, "ceph"), "tell", "osd.*", "injectargs", "--osd-mon-report-interval", "5"])
1438 ceph_cluster.set_ceph_conf("osd", "osd_mon_report_interval", "5")
7c673cae
FG
1439
1440 # Vstart defaults to two segments, which very easily gets a "behind on trimming" health warning
1441 # from normal IO latency. Increase it for running teests.
1442 ceph_cluster.set_ceph_conf("mds", "mds log max segments", "10")
1443
1444 # Make sure the filesystem created in tests has uid/gid that will let us talk to
1445 # it after mounting it (without having to go root). Set in 'global' not just 'mds'
1446 # so that cephfs-data-scan will pick it up too.
1447 ceph_cluster.set_ceph_conf("global", "mds root ino uid", "%s" % os.getuid())
1448 ceph_cluster.set_ceph_conf("global", "mds root ino gid", "%s" % os.getgid())
1449
1450 # Monkeypatch get_package_version to avoid having to work out what kind of distro we're on
1451 def _get_package_version(remote, pkg_name):
1452 # Used in cephfs tests to find fuse version. Your development workstation *does* have >=2.9, right?
1453 return "2.9"
1454
1455 import teuthology.packaging
1456 teuthology.packaging.get_package_version = _get_package_version
1457
1458 overall_suite = load_tests(modules, decorating_loader)
1459
1460 # Filter out tests that don't lend themselves to interactive running,
1461 victims = []
1462 for case, method in enumerate_methods(overall_suite):
1463 fn = getattr(method, method._testMethodName)
1464
1465 drop_test = False
1466
1467 if hasattr(fn, 'is_for_teuthology') and getattr(fn, 'is_for_teuthology') is True:
1468 drop_test = True
e306af50 1469 log.warning("Dropping test because long running: {method_id}".format(method_id=method.id()))
7c673cae
FG
1470
1471 if getattr(fn, "needs_trimming", False) is True:
1472 drop_test = (os.getuid() != 0)
e306af50 1473 log.warning("Dropping test because client trim unavailable: {method_id}".format(method_id=method.id()))
7c673cae
FG
1474
1475 if drop_test:
1476 # Don't drop the test if it was explicitly requested in arguments
1477 is_named = False
1478 for named in modules:
1479 if named.endswith(method.id()):
1480 is_named = True
1481 break
1482
1483 if not is_named:
1484 victims.append((case, method))
1485
1486 log.info("Disabling {0} tests because of is_for_teuthology or needs_trimming".format(len(victims)))
1487 for s, method in victims:
1488 s._tests.remove(method)
1489
9f95a23c 1490 if opt_interactive_on_error:
7c673cae
FG
1491 result_class = InteractiveFailureResult
1492 else:
1493 result_class = unittest.TextTestResult
1494 fail_on_skip = False
1495
1496 class LoggingResult(result_class):
1497 def startTest(self, test):
1498 log.info("Starting test: {0}".format(self.getDescription(test)))
1499 test.started_at = datetime.datetime.utcnow()
1500 return super(LoggingResult, self).startTest(test)
1501
1502 def stopTest(self, test):
1503 log.info("Stopped test: {0} in {1}s".format(
1504 self.getDescription(test),
1505 (datetime.datetime.utcnow() - test.started_at).total_seconds()
1506 ))
1507
1508 def addSkip(self, test, reason):
1509 if fail_on_skip:
1510 # Don't just call addFailure because that requires a traceback
1511 self.failures.append((test, reason))
1512 else:
1513 super(LoggingResult, self).addSkip(test, reason)
1514
1515 # Execute!
1516 result = unittest.TextTestRunner(
1517 stream=LogStream(),
1518 resultclass=LoggingResult,
1519 verbosity=2,
1520 failfast=True).run(overall_suite)
1521
9f95a23c
TL
1522 if opt_teardown_cluster:
1523 teardown_cluster()
1524
7c673cae
FG
1525 if not result.wasSuccessful():
1526 result.printErrors() # duplicate output at end for convenience
1527
1528 bad_tests = []
1529 for test, error in result.errors:
1530 bad_tests.append(str(test))
1531 for test, failure in result.failures:
1532 bad_tests.append(str(test))
1533
1534 sys.exit(-1)
1535 else:
1536 sys.exit(0)
1537
1538
1539if __name__ == "__main__":
1540 exec_test()