]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/vstart_runner.py
import quincy beta 17.1.0
[ceph.git] / ceph / qa / tasks / vstart_runner.py
CommitLineData
7c673cae
FG
1"""
2vstart_runner: override Filesystem and Mount interfaces to run a CephFSTestCase against a vstart
3ceph instance instead of a packaged/installed cluster. Use this to turn around test cases
4quickly during development.
5
6Simple usage (assuming teuthology and ceph checked out in ~/git):
7
8 # Activate the teuthology virtualenv
9 source ~/git/teuthology/virtualenv/bin/activate
10 # Go into your ceph build directory
11 cd ~/git/ceph/build
12 # Invoke a test using this script
13 python ~/git/ceph/qa/tasks/vstart_runner.py --create tasks.cephfs.test_data_scan
14
15Alternative usage:
16
17 # Alternatively, if you use different paths, specify them as follows:
9f95a23c 18 LD_LIBRARY_PATH=`pwd`/lib PYTHONPATH=~/git/teuthology:~/git/ceph/qa:`pwd`/../src/pybind:`pwd`/lib/cython_modules/lib.3 python ~/git/ceph/qa/tasks/vstart_runner.py
7c673cae
FG
19
20 # If you wish to drop to a python shell on failures, use --interactive:
21 python ~/git/ceph/qa/tasks/vstart_runner.py --interactive
22
23 # If you wish to run a named test case, pass it as an argument:
24 python ~/git/ceph/qa/tasks/vstart_runner.py tasks.cephfs.test_data_scan
25
11fdf7f2
TL
26 # Also, you can create the cluster once and then run named test cases against it:
27 python ~/git/ceph/qa/tasks/vstart_runner.py --create-cluster-only
28 python ~/git/ceph/qa/tasks/vstart_runner.py tasks.mgr.dashboard.test_health
29 python ~/git/ceph/qa/tasks/vstart_runner.py tasks.mgr.dashboard.test_rgw
30
7c673cae
FG
31"""
32
f6b5b4d7 33from io import StringIO
7c673cae
FG
34from collections import defaultdict
35import getpass
36import signal
37import tempfile
38import threading
39import datetime
40import shutil
41import re
42import os
43import time
7c673cae
FG
44import sys
45import errno
f67539c2 46from IPy import IP
7c673cae
FG
47import unittest
48import platform
f67539c2
TL
49import logging
50
51from unittest import suite, loader
52
20effc67 53from teuthology.orchestra.run import Raw, quote, PIPE
7c673cae 54from teuthology.orchestra.daemon import DaemonGroup
f67539c2 55from teuthology.orchestra.remote import Remote
7c673cae 56from teuthology.config import config as teuth_config
f67539c2
TL
57from teuthology.contextutil import safe_while
58from teuthology.contextutil import MaxWhileTries
20effc67 59from teuthology.exceptions import CommandFailedError
cd265ab1
TL
60try:
61 import urllib3
62 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
63except:
64 pass
7c673cae 65
20effc67 66def init_log(log_level=logging.INFO):
9f95a23c
TL
67 global log
68 if log is not None:
69 del log
70 log = logging.getLogger(__name__)
71
72 global logpath
73 logpath = './vstart_runner.log'
74
75 handler = logging.FileHandler(logpath)
76 formatter = logging.Formatter(
77 fmt=u'%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s',
78 datefmt='%Y-%m-%dT%H:%M:%S')
79 handler.setFormatter(formatter)
80 log.addHandler(handler)
20effc67 81 log.setLevel(log_level)
7c673cae 82
9f95a23c
TL
83log = None
84init_log()
7c673cae
FG
85
86
87def respawn_in_path(lib_path, python_paths):
88 execv_cmd = ['python']
89 if platform.system() == "Darwin":
90 lib_path_var = "DYLD_LIBRARY_PATH"
91 else:
92 lib_path_var = "LD_LIBRARY_PATH"
93
9f95a23c 94 py_binary = os.environ.get("PYTHON", sys.executable)
7c673cae
FG
95
96 if lib_path_var in os.environ:
97 if lib_path not in os.environ[lib_path_var]:
98 os.environ[lib_path_var] += ':' + lib_path
99 os.execvp(py_binary, execv_cmd + sys.argv)
100 else:
101 os.environ[lib_path_var] = lib_path
102 os.execvp(py_binary, execv_cmd + sys.argv)
103
104 for p in python_paths:
105 sys.path.insert(0, p)
106
107
108# Let's use some sensible defaults
109if os.path.exists("./CMakeCache.txt") and os.path.exists("./bin"):
110
111 # A list of candidate paths for each package we need
112 guesses = [
113 ["~/git/teuthology", "~/scm/teuthology", "~/teuthology"],
9f95a23c 114 ["lib/cython_modules/lib.3"],
7c673cae
FG
115 ["../src/pybind"],
116 ]
117
118 python_paths = []
119
120 # Up one level so that "tasks.foo.bar" imports work
121 python_paths.append(os.path.abspath(
122 os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
123 ))
124
125 for package_guesses in guesses:
126 for g in package_guesses:
127 g_exp = os.path.abspath(os.path.expanduser(g))
128 if os.path.exists(g_exp):
129 python_paths.append(g_exp)
130
131 ld_path = os.path.join(os.getcwd(), "lib/")
eafe8130 132 print("Using guessed paths {0} {1}".format(ld_path, python_paths))
7c673cae
FG
133 respawn_in_path(ld_path, python_paths)
134
135
136try:
7c673cae
FG
137 from tasks.ceph_manager import CephManager
138 from tasks.cephfs.fuse_mount import FuseMount
9f95a23c 139 from tasks.cephfs.kernel_mount import KernelMount
7c673cae 140 from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster
f67539c2 141 from tasks.cephfs.mount import CephFSMount
e306af50 142 from tasks.mgr.mgr_test_case import MgrCluster
7c673cae
FG
143 from teuthology.task import interactive
144except ImportError:
145 sys.stderr.write("***\nError importing packages, have you activated your teuthology virtualenv "
146 "and set PYTHONPATH to point to teuthology and ceph-qa-suite?\n***\n\n")
147 raise
148
149# Must import after teuthology because of gevent monkey patching
150import subprocess
151
152if os.path.exists("./CMakeCache.txt"):
153 # Running in build dir of a cmake build
154 BIN_PREFIX = "./bin/"
155 SRC_PREFIX = "../src"
156else:
157 # Running in src/ of an autotools build
158 BIN_PREFIX = "./"
159 SRC_PREFIX = "./"
160
20effc67
TL
161CEPH_CMD = os.path.join(BIN_PREFIX, 'ceph')
162
7c673cae 163
1911f103 164def rm_nonascii_chars(var):
f6b5b4d7
TL
165 var = var.replace(b'\xe2\x80\x98', b'\'')
166 var = var.replace(b'\xe2\x80\x99', b'\'')
1911f103
TL
167 return var
168
7c673cae
FG
169class LocalRemoteProcess(object):
170 def __init__(self, args, subproc, check_status, stdout, stderr):
171 self.args = args
172 self.subproc = subproc
f67539c2
TL
173 self.stdout = stdout
174 self.stderr = stderr
175 # this variable is meant for instance of this class named fuse_daemon.
176 # child process of the command launched with sudo must be killed,
177 # since killing parent process alone has no impact on the child
178 # process.
179 self.fuse_pid = -1
7c673cae
FG
180
181 self.check_status = check_status
182 self.exitstatus = self.returncode = None
183
20effc67 184 def _write_stdout(self, out):
f6b5b4d7
TL
185 if isinstance(self.stdout, StringIO):
186 self.stdout.write(out.decode(errors='ignore'))
f67539c2
TL
187 elif self.stdout is None:
188 pass
f6b5b4d7
TL
189 else:
190 self.stdout.write(out)
20effc67
TL
191
192 def _write_stderr(self, err):
f6b5b4d7
TL
193 if isinstance(self.stderr, StringIO):
194 self.stderr.write(err.decode(errors='ignore'))
f67539c2
TL
195 elif self.stderr is None:
196 pass
f6b5b4d7
TL
197 else:
198 self.stderr.write(err)
7c673cae 199
20effc67
TL
200 def wait(self):
201 if self.finished:
202 # Avoid calling communicate() on a dead process because it'll
203 # give you stick about std* already being closed
204 if self.check_status and self.exitstatus != 0:
205 raise CommandFailedError(self.args, self.exitstatus)
206 else:
207 return
208
209 out, err = self.subproc.communicate()
210 out, err = rm_nonascii_chars(out), rm_nonascii_chars(err)
211 self._write_stdout(out)
212 self._write_stderr(err)
213
7c673cae
FG
214 self.exitstatus = self.returncode = self.subproc.returncode
215
216 if self.exitstatus != 0:
f67539c2
TL
217 sys.stderr.write(out.decode())
218 sys.stderr.write(err.decode())
7c673cae
FG
219
220 if self.check_status and self.exitstatus != 0:
221 raise CommandFailedError(self.args, self.exitstatus)
222
223 @property
224 def finished(self):
225 if self.exitstatus is not None:
226 return True
227
228 if self.subproc.poll() is not None:
229 out, err = self.subproc.communicate()
20effc67
TL
230 self._write_stdout(out)
231 self._write_stderr(err)
232
7c673cae 233 self.exitstatus = self.returncode = self.subproc.returncode
20effc67 234
7c673cae
FG
235 return True
236 else:
237 return False
238
239 def kill(self):
cd265ab1 240 log.debug("kill ")
7c673cae 241 if self.subproc.pid and not self.finished:
cd265ab1 242 log.debug("kill: killing pid {0} ({1})".format(
7c673cae 243 self.subproc.pid, self.args))
f67539c2
TL
244 if self.fuse_pid != -1:
245 safe_kill(self.fuse_pid)
246 else:
247 safe_kill(self.subproc.pid)
7c673cae 248 else:
cd265ab1 249 log.debug("kill: already terminated ({0})".format(self.args))
7c673cae
FG
250
251 @property
252 def stdin(self):
253 class FakeStdIn(object):
254 def __init__(self, mount_daemon):
255 self.mount_daemon = mount_daemon
256
257 def close(self):
258 self.mount_daemon.kill()
259
260 return FakeStdIn(self)
261
262
263class LocalRemote(object):
264 """
265 Amusingly named class to present the teuthology RemoteProcess interface when we are really
266 running things locally for vstart
267
268 Run this inside your src/ dir!
269 """
270
f67539c2
TL
271 os = Remote.os
272 arch = Remote.arch
273
7c673cae
FG
274 def __init__(self):
275 self.name = "local"
276 self.hostname = "localhost"
277 self.user = getpass.getuser()
278
279 def get_file(self, path, sudo, dest_dir):
280 tmpfile = tempfile.NamedTemporaryFile(delete=False).name
281 shutil.copy(path, tmpfile)
282 return tmpfile
283
9f95a23c
TL
284 # XXX: This method ignores the error raised when src and dst are
285 # holding same path. For teuthology, same path still represents
286 # different locations as they lie on different machines.
7c673cae 287 def put_file(self, src, dst, sudo=False):
9f95a23c
TL
288 try:
289 shutil.copy(src, dst)
f67539c2
TL
290 except shutil.SameFileError:
291 pass
9f95a23c
TL
292
293 # XXX: accepts only two arugments to maintain compatibility with
294 # teuthology's mkdtemp.
295 def mkdtemp(self, suffix='', parentdir=None):
296 from tempfile import mkdtemp
297
298 # XXX: prefix had to be set without that this method failed against
299 # Python2.7 -
300 # > /usr/lib64/python2.7/tempfile.py(337)mkdtemp()
301 # -> file = _os.path.join(dir, prefix + name + suffix)
302 # (Pdb) p prefix
303 # None
304 return mkdtemp(suffix=suffix, prefix='', dir=parentdir)
305
20effc67
TL
306 def mktemp(self, suffix='', parentdir='', path=None, data=None,
307 owner=None, mode=None):
9f95a23c
TL
308 """
309 Make a remote temporary file
7c673cae 310
9f95a23c
TL
311 Returns: the path of the temp file created.
312 """
313 from tempfile import mktemp
20effc67
TL
314 if not path:
315 path = mktemp(suffix=suffix, dir=parentdir)
316 if not parentdir:
317 path = os.path.join('/tmp', path)
318
319 if data:
320 # sudo is set to False since root user can't write files in /tmp
321 # owned by other users.
322 self.write_file(path=path, data=data, sudo=False)
323
324 return path
9f95a23c 325
f67539c2
TL
326 def write_file(self, path, data, sudo=False, mode=None, owner=None,
327 mkdir=False, append=False):
328 """
329 Write data to file
330
331 :param path: file path on host
332 :param data: str, binary or fileobj to be written
333 :param sudo: use sudo to write file, defaults False
334 :param mode: set file mode bits if provided
335 :param owner: set file owner if provided
336 :param mkdir: preliminary create the file directory, defaults False
337 :param append: append data to the file, defaults False
338 """
339 dd = 'sudo dd' if sudo else 'dd'
340 args = dd + ' of=' + path
341 if append:
342 args += ' conv=notrunc oflag=append'
343 if mkdir:
344 mkdirp = 'sudo mkdir -p' if sudo else 'mkdir -p'
345 dirpath = os.path.dirname(path)
346 if dirpath:
347 args = mkdirp + ' ' + dirpath + '\n' + args
348 if mode:
349 chmod = 'sudo chmod' if sudo else 'chmod'
350 args += '\n' + chmod + ' ' + mode + ' ' + path
351 if owner:
352 chown = 'sudo chown' if sudo else 'chown'
353 args += '\n' + chown + ' ' + owner + ' ' + path
354 omit_sudo = False if sudo else True
355 self.run(args=args, stdin=data, omit_sudo=omit_sudo)
356
357 def sudo_write_file(self, path, data, **kwargs):
358 """
359 Write data to file with sudo, for more info see `write_file()`.
360 """
361 self.write_file(path, data, sudo=True, **kwargs)
362
9f95a23c
TL
363 def _perform_checks_and_return_list_of_args(self, args, omit_sudo):
364 # Since Python's shell simulation can only work when commands are
365 # provided as a list of argumensts...
f67539c2 366 if isinstance(args, str):
9f95a23c
TL
367 args = args.split()
368
369 # We'll let sudo be a part of command even omit flag says otherwise in
370 # cases of commands which can normally be ran only by root.
371 try:
372 if args[args.index('sudo') + 1] in ['-u', 'passwd', 'chown']:
373 omit_sudo = False
374 except ValueError:
375 pass
7c673cae 376
9f95a23c
TL
377 # Quotes wrapping a command argument don't work fine in Python's shell
378 # simulation if the arguments contains spaces too. E.g. '"ls"' is OK
379 # but "ls /" isn't.
380 errmsg = "Don't surround arguments commands by quotes if it " + \
381 "contains spaces.\nargs - %s" % (args)
382 for arg in args:
383 if isinstance(arg, Raw):
384 continue
385
386 if arg and (arg[0] in ['"', "'"] or arg[-1] in ['"', "'"]) and \
387 (arg.find(' ') != -1 and 0 < arg.find(' ') < len(arg) - 1):
388 raise RuntimeError(errmsg)
389
390 # ['sudo', '-u', 'user', '-s', 'path-to-shell', '-c', 'ls', 'a']
391 # and ['sudo', '-u', user, '-s', path_to_shell, '-c', 'ls a'] are
392 # treated differently by Python's shell simulation. Only latter has
393 # the desired effect.
394 errmsg = 'The entire command to executed as other user should be a ' +\
395 'single argument.\nargs - %s' % (args)
396 if 'sudo' in args and '-u' in args and '-c' in args and \
397 args.count('-c') == 1:
398 if args.index('-c') != len(args) - 2 and \
399 args[args.index('-c') + 2].find('-') == -1:
400 raise RuntimeError(errmsg)
401
402 if omit_sudo:
403 args = [a for a in args if a != "sudo"]
404
405 return args
406
407 # Wrapper to keep the interface exactly same as that of
408 # teuthology.remote.run.
409 def run(self, **kwargs):
410 return self._do_run(**kwargs)
411
f67539c2
TL
412 # XXX: omit_sudo is set to True since using sudo can change the ownership
413 # of files which becomes problematic for following executions of
414 # vstart_runner.py.
9f95a23c
TL
415 def _do_run(self, args, check_status=True, wait=True, stdout=None,
416 stderr=None, cwd=None, stdin=None, logger=None, label=None,
20effc67 417 env=None, timeout=None, omit_sudo=True, shell=True):
9f95a23c 418 args = self._perform_checks_and_return_list_of_args(args, omit_sudo)
7c673cae
FG
419
420 # We have to use shell=True if any run.Raw was present, e.g. &&
20effc67
TL
421 if not shell:
422 shell = any([a for a in args if isinstance(a, Raw)])
7c673cae 423
11fdf7f2 424 # Filter out helper tools that don't exist in a vstart environment
e306af50
TL
425 args = [a for a in args if a not in ('adjust-ulimits',
426 'ceph-coverage')]
11fdf7f2
TL
427
428 # Adjust binary path prefix if given a bare program name
f67539c2 429 if not isinstance(args[0], Raw) and "/" not in args[0]:
11fdf7f2
TL
430 # If they asked for a bare binary name, and it exists
431 # in our built tree, use the one there.
432 local_bin = os.path.join(BIN_PREFIX, args[0])
433 if os.path.exists(local_bin):
434 args = [local_bin] + args[1:]
7c673cae 435
f67539c2
TL
436 log.debug('> ' +
437 ' '.join([str(a.value) if isinstance(a, Raw) else a for a in args]))
11fdf7f2
TL
438
439 if shell:
440 subproc = subprocess.Popen(quote(args),
7c673cae
FG
441 stdout=subprocess.PIPE,
442 stderr=subprocess.PIPE,
443 stdin=subprocess.PIPE,
444 cwd=cwd,
f67539c2 445 env=env,
7c673cae
FG
446 shell=True)
447 else:
11fdf7f2 448 # Sanity check that we've got a list of strings
7c673cae 449 for arg in args:
f67539c2 450 if not isinstance(arg, str):
7c673cae
FG
451 raise RuntimeError("Oops, can't handle arg {0} type {1}".format(
452 arg, arg.__class__
453 ))
454
455 subproc = subprocess.Popen(args,
456 stdout=subprocess.PIPE,
457 stderr=subprocess.PIPE,
458 stdin=subprocess.PIPE,
459 cwd=cwd,
460 env=env)
461
462 if stdin:
7c673cae
FG
463 # Hack: writing to stdin is not deadlock-safe, but it "always" works
464 # as long as the input buffer is "small"
f67539c2
TL
465 if isinstance(stdin, str):
466 subproc.stdin.write(stdin.encode())
20effc67
TL
467 elif stdin == subprocess.PIPE or stdin == PIPE:
468 pass
469 elif isinstance(stdin, StringIO):
470 subproc.stdin.write(bytes(stdin.getvalue(),encoding='utf8'))
f67539c2
TL
471 else:
472 subproc.stdin.write(stdin)
7c673cae
FG
473
474 proc = LocalRemoteProcess(
475 args, subproc, check_status,
476 stdout, stderr
477 )
478
479 if wait:
480 proc.wait()
481
482 return proc
483
f6b5b4d7
TL
484 # XXX: for compatibility keep this method same as teuthology.orchestra.remote.sh
485 # BytesIO is being used just to keep things identical
e306af50
TL
486 def sh(self, script, **kwargs):
487 """
488 Shortcut for run method.
489
490 Usage:
491 my_name = remote.sh('whoami')
492 remote_date = remote.sh('date')
493 """
f6b5b4d7
TL
494 from io import BytesIO
495
e306af50 496 if 'stdout' not in kwargs:
f6b5b4d7 497 kwargs['stdout'] = BytesIO()
e306af50
TL
498 if 'args' not in kwargs:
499 kwargs['args'] = script
500 proc = self.run(**kwargs)
f6b5b4d7
TL
501 out = proc.stdout.getvalue()
502 if isinstance(out, bytes):
503 return out.decode()
504 else:
505 return out
7c673cae
FG
506
507class LocalDaemon(object):
508 def __init__(self, daemon_type, daemon_id):
509 self.daemon_type = daemon_type
510 self.daemon_id = daemon_id
511 self.controller = LocalRemote()
512 self.proc = None
513
514 @property
515 def remote(self):
516 return LocalRemote()
517
518 def running(self):
519 return self._get_pid() is not None
520
11fdf7f2
TL
521 def check_status(self):
522 if self.proc:
523 return self.proc.poll()
524
7c673cae
FG
525 def _get_pid(self):
526 """
527 Return PID as an integer or None if not found
528 """
f67539c2
TL
529 ps_txt = self.controller.run(args=["ps", "ww", "-u"+str(os.getuid())],
530 stdout=StringIO()).\
531 stdout.getvalue().strip()
7c673cae
FG
532 lines = ps_txt.split("\n")[1:]
533
534 for line in lines:
535 if line.find("ceph-{0} -i {1}".format(self.daemon_type, self.daemon_id)) != -1:
cd265ab1 536 log.debug("Found ps line for daemon: {0}".format(line))
7c673cae 537 return int(line.split()[0])
20effc67
TL
538 if not opt_log_ps_output:
539 ps_txt = '(omitted)'
540 log.debug("No match for {0} {1}: {2}".format(
541 self.daemon_type, self.daemon_id, ps_txt))
542 return None
7c673cae
FG
543
544 def wait(self, timeout):
545 waited = 0
546 while self._get_pid() is not None:
547 if waited > timeout:
548 raise MaxWhileTries("Timed out waiting for daemon {0}.{1}".format(self.daemon_type, self.daemon_id))
549 time.sleep(1)
550 waited += 1
551
552 def stop(self, timeout=300):
553 if not self.running():
554 log.error('tried to stop a non-running daemon')
555 return
556
557 pid = self._get_pid()
20effc67
TL
558 if pid is None:
559 return
cd265ab1 560 log.debug("Killing PID {0} for {1}.{2}".format(pid, self.daemon_type, self.daemon_id))
11fdf7f2 561 os.kill(pid, signal.SIGTERM)
7c673cae
FG
562
563 waited = 0
564 while pid is not None:
565 new_pid = self._get_pid()
566 if new_pid is not None and new_pid != pid:
cd265ab1 567 log.debug("Killing new PID {0}".format(new_pid))
7c673cae 568 pid = new_pid
11fdf7f2 569 os.kill(pid, signal.SIGTERM)
7c673cae
FG
570
571 if new_pid is None:
572 break
573 else:
574 if waited > timeout:
575 raise MaxWhileTries(
576 "Timed out waiting for daemon {0}.{1}".format(
577 self.daemon_type, self.daemon_id))
578 time.sleep(1)
579 waited += 1
580
581 self.wait(timeout=timeout)
582
583 def restart(self):
584 if self._get_pid() is not None:
585 self.stop()
586
9f95a23c
TL
587 self.proc = self.controller.run(args=[
588 os.path.join(BIN_PREFIX, "./ceph-{0}".format(self.daemon_type)),
589 "-i", self.daemon_id])
7c673cae 590
1adf2230
AA
591 def signal(self, sig, silent=False):
592 if not self.running():
593 raise RuntimeError("Can't send signal to non-running daemon")
594
595 os.kill(self._get_pid(), sig)
596 if not silent:
cd265ab1 597 log.debug("Sent signal {0} to {1}.{2}".format(sig, self.daemon_type, self.daemon_id))
1adf2230 598
7c673cae
FG
599
600def safe_kill(pid):
601 """
602 os.kill annoyingly raises exception if process already dead. Ignore it.
603 """
604 try:
605 return os.kill(pid, signal.SIGKILL)
606 except OSError as e:
607 if e.errno == errno.ESRCH:
608 # Raced with process termination
609 pass
610 else:
611 raise
612
f67539c2
TL
613def mon_in_localhost(config_path="./ceph.conf"):
614 """
615 If the ceph cluster is using the localhost IP as mon host, will must disable ns unsharing
616 """
617 with open(config_path) as f:
618 for line in f:
619 local = re.match(r'^\s*mon host\s*=\s*\[((v1|v2):127\.0\.0\.1:\d+,?)+\]', line)
620 if local:
621 return True
622 return False
7c673cae 623
20effc67 624class LocalCephFSMount():
7c673cae
FG
625 @property
626 def config_path(self):
627 return "./ceph.conf"
628
629 def get_keyring_path(self):
630 # This is going to end up in a config file, so use an absolute path
631 # to avoid assumptions about daemons' pwd
9f95a23c
TL
632 keyring_path = "./client.{0}.keyring".format(self.client_id)
633 try:
634 os.stat(keyring_path)
635 except OSError:
636 return os.path.join(os.getcwd(), 'keyring')
637 else:
638 return keyring_path
7c673cae 639
7c673cae
FG
640 @property
641 def _prefix(self):
642 return BIN_PREFIX
643
644 def _asok_path(self):
20effc67
TL
645 # In teuthology, the asok is named after the PID of the ceph-fuse
646 # process, because it's run foreground. When running it daemonized
647 # however, the asok is named after the PID of the launching process,
648 # not the long running ceph-fuse process. Therefore we need to give
649 # an exact path here as the logic for checking /proc/ for which asok
650 # is alive does not work.
11fdf7f2
TL
651
652 # Load the asok path from ceph.conf as vstart.sh now puts admin sockets
653 # in a tmpdir. All of the paths are the same, so no need to select
654 # based off of the service type.
655 d = "./out"
656 with open(self.config_path) as f:
657 for line in f:
658 asok_conf = re.search("^\s*admin\s+socket\s*=\s*(.*?)[^/]+$", line)
659 if asok_conf:
660 d = asok_conf.groups(1)[0]
661 break
f67539c2 662 path = "{0}/client.{1}.*.asok".format(d, self.client_id)
7c673cae
FG
663 return path
664
20effc67 665 def _run_python(self, pyscript, py_version='python', sudo=False):
9f95a23c
TL
666 """
667 Override this to remove the daemon-helper prefix that is used otherwise
668 to make the process killable.
669 """
20effc67
TL
670 args = []
671 if sudo:
672 args.append('sudo')
673 args += [py_version, '-c', pyscript]
674 return self.client_remote.run(args=args,
f67539c2 675 wait=False, stdout=StringIO())
9f95a23c 676
20effc67
TL
677 def setup_netns(self):
678 if opt_use_ns:
679 super(type(self), self).setup_netns()
9f95a23c
TL
680
681 @property
20effc67
TL
682 def _nsenter_args(self):
683 if opt_use_ns:
684 return super(type(self), self)._nsenter_args
685 else:
686 return []
9f95a23c 687
9f95a23c
TL
688 def setupfs(self, name=None):
689 if name is None and self.fs is not None:
690 # Previous mount existed, reuse the old name
691 name = self.fs.name
692 self.fs = LocalFilesystem(self.ctx, name=name)
20effc67 693 log.info('Wait for MDS to reach steady state...')
9f95a23c 694 self.fs.wait_for_daemons()
20effc67 695 log.info('Ready to start {}...'.format(type(self).__name__))
9f95a23c 696
f67539c2 697
20effc67
TL
698class LocalKernelMount(LocalCephFSMount, KernelMount):
699 def __init__(self, ctx, test_dir, client_id=None,
700 client_keyring_path=None, client_remote=None,
701 hostfs_mntpt=None, cephfs_name=None, cephfs_mntpt=None,
702 brxnet=None):
703 super(LocalKernelMount, self).__init__(ctx=ctx, test_dir=test_dir,
704 client_id=client_id, client_keyring_path=client_keyring_path,
705 client_remote=LocalRemote(), hostfs_mntpt=hostfs_mntpt,
706 cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet)
9f95a23c 707
20effc67
TL
708 # Make vstart_runner compatible with teuth and qa/tasks/cephfs.
709 self._mount_bin = [os.path.join(BIN_PREFIX , 'mount.ceph')]
9f95a23c 710
f67539c2 711
20effc67
TL
712class LocalFuseMount(LocalCephFSMount, FuseMount):
713 def __init__(self, ctx, test_dir, client_id, client_keyring_path=None,
714 client_remote=None, hostfs_mntpt=None, cephfs_name=None,
715 cephfs_mntpt=None, brxnet=None):
716 super(LocalFuseMount, self).__init__(ctx=ctx, test_dir=test_dir,
717 client_id=client_id, client_keyring_path=client_keyring_path,
718 client_remote=LocalRemote(), hostfs_mntpt=hostfs_mntpt,
719 cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet)
7c673cae 720
20effc67
TL
721 # Following block makes tests meant for teuthology compatible with
722 # vstart_runner.
723 self._mount_bin = [os.path.join(BIN_PREFIX, 'ceph-fuse')]
724 self._mount_cmd_cwd, self._mount_cmd_logger, \
725 self._mount_cmd_stdin = None, None, None
7c673cae 726
20effc67
TL
727 # XXX: CephFSMount._create_mntpt() sets mountpoint's permission mode to
728 # 0000 which doesn't work for vstart_runner since superuser privileges are
729 # not used for mounting Ceph FS with FUSE.
730 def _create_mntpt(self):
731 self.client_remote.run(args=f'mkdir -p -v {self.hostfs_mntpt}')
7c673cae 732
20effc67
TL
733 def _run_mount_cmd(self, mntopts, check_status):
734 super(type(self), self)._run_mount_cmd(mntopts, check_status)
f67539c2 735 self._set_fuse_daemon_pid(check_status)
7c673cae 736
20effc67
TL
737 def _get_mount_cmd(self, mntopts):
738 mount_cmd = super(type(self), self)._get_mount_cmd(mntopts)
7c673cae 739
20effc67
TL
740 if os.getuid() != 0:
741 mount_cmd += ['--client_die_on_failed_dentry_invalidate=false']
7c673cae 742
20effc67 743 return mount_cmd
7c673cae 744
20effc67
TL
745 @property
746 def _fuse_conn_check_timeout(self):
747 return 30
11fdf7f2 748
20effc67
TL
749 def _add_valgrind_args(self, mount_cmd):
750 return []
e306af50 751
f67539c2
TL
752 def _set_fuse_daemon_pid(self, check_status):
753 # NOTE: When a command <args> is launched with sudo, two processes are
754 # launched, one with sudo in <args> and other without. Make sure we
755 # get the PID of latter one.
756 try:
757 with safe_while(sleep=1, tries=15) as proceed:
758 while proceed():
759 try:
760 sock = self.find_admin_socket()
761 except (RuntimeError, CommandFailedError):
762 continue
763
764 self.fuse_daemon.fuse_pid = int(re.match(".*\.(\d+)\.asok$",
765 sock).group(1))
766 break
767 except MaxWhileTries:
768 if check_status:
769 raise
770 else:
771 pass
772
9f95a23c
TL
773# XXX: this class has nothing to do with the Ceph daemon (ceph-mgr) of
774# the same name.
7c673cae
FG
775class LocalCephManager(CephManager):
776 def __init__(self):
777 # Deliberately skip parent init, only inheriting from it to get
778 # util methods like osd_dump that sit on top of raw_cluster_cmd
779 self.controller = LocalRemote()
780
781 # A minority of CephManager fns actually bother locking for when
782 # certain teuthology tests want to run tasks in parallel
783 self.lock = threading.RLock()
784
cd265ab1 785 self.log = lambda x: log.debug(x)
7c673cae 786
11fdf7f2
TL
787 # Don't bother constructing a map of pools: it should be empty
788 # at test cluster start, and in any case it would be out of date
789 # in no time. The attribute needs to exist for some of the CephManager
790 # methods to work though.
791 self.pools = {}
792
20effc67
TL
793 # NOTE: These variables are being overriden here so that parent class
794 # can pick it up.
795 self.cephadm = False
796 self.rook = False
797 self.testdir = None
798 self.run_cluster_cmd_prefix = [CEPH_CMD]
799 # XXX: Ceph API test CI job crashes because "ceph -w" process launched
800 # by run_ceph_w() crashes when shell is set to True.
801 # See https://tracker.ceph.com/issues/49644.
802 #
803 # The 2 possible workaround this are either setting "shell" to "False"
804 # when command "ceph -w" is executed or to prepend "exec sudo" to
805 # command arguments. We are going with latter since former would make
806 # it necessary to pass "shell" parameter to run() method. This leads
807 # to incompatibility with the method teuthology.orchestra.run's run()
808 # since it doesn't accept "shell" as parameter.
809 self.run_ceph_w_prefix = ['exec', 'sudo', CEPH_CMD]
810
7c673cae
FG
811 def find_remote(self, daemon_type, daemon_id):
812 """
813 daemon_type like 'mds', 'osd'
814 daemon_id like 'a', '0'
815 """
816 return LocalRemote()
817
f67539c2
TL
818 def admin_socket(self, daemon_type, daemon_id, command, check_status=True,
819 timeout=None, stdout=None):
820 if stdout is None:
821 stdout = StringIO()
7c673cae 822
20effc67
TL
823 args=[CEPH_CMD, "daemon", f"{daemon_type}.{daemon_id}"] + command
824 return self.controller.run(args=args, check_status=check_status,
825 timeout=timeout, stdout=stdout)
9f95a23c 826
7c673cae
FG
827
828class LocalCephCluster(CephCluster):
829 def __init__(self, ctx):
f67539c2 830 # Deliberately skip calling CephCluster constructor
7c673cae
FG
831 self._ctx = ctx
832 self.mon_manager = LocalCephManager()
833 self._conf = defaultdict(dict)
834
835 @property
836 def admin_remote(self):
837 return LocalRemote()
838
839 def get_config(self, key, service_type=None):
840 if service_type is None:
841 service_type = 'mon'
842
843 # FIXME hardcoded vstart service IDs
844 service_id = {
845 'mon': 'a',
846 'mds': 'a',
847 'osd': '0'
848 }[service_type]
849
850 return self.json_asok(['config', 'get', key], service_type, service_id)[key]
851
852 def _write_conf(self):
853 # In teuthology, we have the honour of writing the entire ceph.conf, but
854 # in vstart land it has mostly already been written and we need to carefully
855 # append to it.
856 conf_path = "./ceph.conf"
857 banner = "\n#LOCAL_TEST\n"
858 existing_str = open(conf_path).read()
859
860 if banner in existing_str:
861 existing_str = existing_str[0:existing_str.find(banner)]
862
863 existing_str += banner
864
865 for subsys, kvs in self._conf.items():
866 existing_str += "\n[{0}]\n".format(subsys)
867 for key, val in kvs.items():
868 # Comment out existing instance if it exists
cd265ab1 869 log.debug("Searching for existing instance {0}/{1}".format(
7c673cae
FG
870 key, subsys
871 ))
872 existing_section = re.search("^\[{0}\]$([\n]|[^\[])+".format(
873 subsys
874 ), existing_str, re.MULTILINE)
875
876 if existing_section:
877 section_str = existing_str[existing_section.start():existing_section.end()]
878 existing_val = re.search("^\s*[^#]({0}) =".format(key), section_str, re.MULTILINE)
879 if existing_val:
880 start = existing_section.start() + existing_val.start(1)
cd265ab1 881 log.debug("Found string to replace at {0}".format(
7c673cae
FG
882 start
883 ))
884 existing_str = existing_str[0:start] + "#" + existing_str[start:]
885
886 existing_str += "{0} = {1}\n".format(key, val)
887
888 open(conf_path, "w").write(existing_str)
889
890 def set_ceph_conf(self, subsys, key, value):
891 self._conf[subsys][key] = value
892 self._write_conf()
893
894 def clear_ceph_conf(self, subsys, key):
895 del self._conf[subsys][key]
896 self._write_conf()
897
898
899class LocalMDSCluster(LocalCephCluster, MDSCluster):
900 def __init__(self, ctx):
f67539c2
TL
901 LocalCephCluster.__init__(self, ctx)
902 # Deliberately skip calling MDSCluster constructor
903 self._mds_ids = ctx.daemons.daemons['ceph.mds'].keys()
904 log.debug("Discovered MDS IDs: {0}".format(self._mds_ids))
905 self._mds_daemons = dict([(id_, LocalDaemon("mds", id_)) for id_ in self.mds_ids])
906
907 @property
908 def mds_ids(self):
909 return self._mds_ids
7c673cae 910
f67539c2
TL
911 @property
912 def mds_daemons(self):
913 return self._mds_daemons
7c673cae
FG
914
915 def clear_firewall(self):
916 # FIXME: unimplemented
917 pass
918
181888fb
FG
919 def newfs(self, name='cephfs', create=True):
920 return LocalFilesystem(self._ctx, name=name, create=create)
7c673cae 921
f67539c2
TL
922 def delete_all_filesystems(self):
923 """
924 Remove all filesystems that exist, and any pools in use by them.
925 """
926 for fs in self.status().get_filesystems():
927 LocalFilesystem(ctx=self._ctx, fscid=fs['id']).destroy()
928
7c673cae
FG
929
930class LocalMgrCluster(LocalCephCluster, MgrCluster):
931 def __init__(self, ctx):
932 super(LocalMgrCluster, self).__init__(ctx)
933
11fdf7f2 934 self.mgr_ids = ctx.daemons.daemons['ceph.mgr'].keys()
7c673cae
FG
935 self.mgr_daemons = dict([(id_, LocalDaemon("mgr", id_)) for id_ in self.mgr_ids])
936
937
f67539c2
TL
938class LocalFilesystem(LocalMDSCluster, Filesystem):
939 def __init__(self, ctx, fs_config={}, fscid=None, name=None, create=False):
940 # Deliberately skip calling Filesystem constructor
941 LocalMDSCluster.__init__(self, ctx)
7c673cae
FG
942
943 self.id = None
9f95a23c 944 self.name = name
7c673cae 945 self.metadata_pool_name = None
181888fb
FG
946 self.metadata_overlay = False
947 self.data_pool_name = None
7c673cae 948 self.data_pools = None
f67539c2
TL
949 self.fs_config = fs_config
950 self.ec_profile = fs_config.get('ec_profile')
7c673cae
FG
951
952 self.mon_manager = LocalCephManager()
953
7c673cae
FG
954 self.client_remote = LocalRemote()
955
956 self._conf = defaultdict(dict)
957
181888fb 958 if name is not None:
7c673cae
FG
959 if fscid is not None:
960 raise RuntimeError("cannot specify fscid when creating fs")
181888fb
FG
961 if create and not self.legacy_configured():
962 self.create()
963 else:
964 if fscid is not None:
965 self.id = fscid
966 self.getinfo(refresh=True)
7c673cae
FG
967
968 # Stash a reference to the first created filesystem on ctx, so
969 # that if someone drops to the interactive shell they can easily
970 # poke our methods.
971 if not hasattr(self._ctx, "filesystem"):
972 self._ctx.filesystem = self
973
974 @property
975 def _prefix(self):
976 return BIN_PREFIX
977
978 def set_clients_block(self, blocked, mds_id=None):
979 raise NotImplementedError()
980
7c673cae 981
f67539c2
TL
982class LocalCluster(object):
983 def __init__(self, rolename="placeholder"):
984 self.remotes = {
985 LocalRemote(): [rolename]
986 }
7c673cae 987
f67539c2
TL
988 def only(self, requested):
989 return self.__class__(rolename=requested)
990
a4b75251
TL
991 def run(self, *args, **kwargs):
992 r = []
993 for remote in self.remotes.keys():
994 r.append(remote.run(*args, **kwargs))
995 return r
996
f67539c2
TL
997
998class LocalContext(object):
999 def __init__(self):
20effc67 1000 self.config = {'cluster': 'ceph'}
f67539c2
TL
1001 self.teuthology_config = teuth_config
1002 self.cluster = LocalCluster()
1003 self.daemons = DaemonGroup()
20effc67
TL
1004 if not hasattr(self, 'managers'):
1005 self.managers = {}
1006 self.managers[self.config['cluster']] = LocalCephManager()
f67539c2
TL
1007
1008 # Shove some LocalDaemons into the ctx.daemons DaemonGroup instance so that any
1009 # tests that want to look these up via ctx can do so.
1010 # Inspect ceph.conf to see what roles exist
1011 for conf_line in open("ceph.conf").readlines():
1012 for svc_type in ["mon", "osd", "mds", "mgr"]:
1013 prefixed_type = "ceph." + svc_type
1014 if prefixed_type not in self.daemons.daemons:
1015 self.daemons.daemons[prefixed_type] = {}
1016 match = re.match("^\[{0}\.(.+)\]$".format(svc_type), conf_line)
1017 if match:
1018 svc_id = match.group(1)
1019 self.daemons.daemons[prefixed_type][svc_id] = LocalDaemon(svc_type, svc_id)
1020
1021 def __del__(self):
1022 test_path = self.teuthology_config['test_path']
1023 # opt_create_cluster_only does not create the test path
1024 if test_path:
1025 shutil.rmtree(test_path)
1026
1027
1028#########################################
1029#
1030# stuff necessary for launching tests...
1031#
1032#########################################
7c673cae
FG
1033
1034
1035def enumerate_methods(s):
cd265ab1 1036 log.debug("e: {0}".format(s))
7c673cae
FG
1037 for t in s._tests:
1038 if isinstance(t, suite.BaseTestSuite):
1039 for sub in enumerate_methods(t):
1040 yield sub
1041 else:
1042 yield s, t
1043
1044
1045def load_tests(modules, loader):
1046 if modules:
cd265ab1 1047 log.debug("Executing modules: {0}".format(modules))
7c673cae
FG
1048 module_suites = []
1049 for mod_name in modules:
1050 # Test names like cephfs.test_auto_repair
1051 module_suites.append(loader.loadTestsFromName(mod_name))
cd265ab1 1052 log.debug("Loaded: {0}".format(list(module_suites)))
7c673cae
FG
1053 return suite.TestSuite(module_suites)
1054 else:
cd265ab1 1055 log.debug("Executing all cephfs tests")
7c673cae
FG
1056 return loader.discover(
1057 os.path.join(os.path.dirname(os.path.abspath(__file__)), "cephfs")
1058 )
1059
1060
1061def scan_tests(modules):
1062 overall_suite = load_tests(modules, loader.TestLoader())
7c673cae
FG
1063 max_required_mds = 0
1064 max_required_clients = 0
1065 max_required_mgr = 0
11fdf7f2 1066 require_memstore = False
7c673cae 1067
9f95a23c 1068 for suite_, case in enumerate_methods(overall_suite):
7c673cae
FG
1069 max_required_mds = max(max_required_mds,
1070 getattr(case, "MDSS_REQUIRED", 0))
1071 max_required_clients = max(max_required_clients,
1072 getattr(case, "CLIENTS_REQUIRED", 0))
1073 max_required_mgr = max(max_required_mgr,
1074 getattr(case, "MGRS_REQUIRED", 0))
11fdf7f2
TL
1075 require_memstore = getattr(case, "REQUIRE_MEMSTORE", False) \
1076 or require_memstore
7c673cae 1077
11fdf7f2
TL
1078 return max_required_mds, max_required_clients, \
1079 max_required_mgr, require_memstore
7c673cae
FG
1080
1081
f67539c2 1082class LogRotate():
7c673cae 1083 def __init__(self):
f67539c2
TL
1084 self.conf_file_path = os.path.join(os.getcwd(), 'logrotate.conf')
1085 self.state_file_path = os.path.join(os.getcwd(), 'logrotate.state')
7c673cae 1086
f67539c2
TL
1087 def run_logrotate(self):
1088 remote.run(args=['logrotate', '-f', self.conf_file_path, '-s',
1089 self.state_file_path, '--verbose'])
7c673cae 1090
7c673cae 1091
9f95a23c
TL
1092def teardown_cluster():
1093 log.info('\ntearing down the cluster...')
1094 remote.run(args=[os.path.join(SRC_PREFIX, "stop.sh")], timeout=60)
20effc67 1095 log.info('\nceph cluster torn down')
9f95a23c
TL
1096 remote.run(args=['rm', '-rf', './dev', './out'])
1097
f67539c2 1098
9f95a23c 1099def clear_old_log():
9f95a23c 1100 try:
20effc67
TL
1101 os.stat(logpath)
1102 except FileNotFoundError:
9f95a23c
TL
1103 return
1104 else:
1105 os.remove(logpath)
1106 with open(logpath, 'w') as logfile:
1107 logfile.write('')
20effc67 1108 init_log(log.level)
cd265ab1 1109 log.debug('logging in a fresh file now...')
9f95a23c 1110
f67539c2
TL
1111
1112class LogStream(object):
1113 def __init__(self):
1114 self.buffer = ""
1115 self.omit_result_lines = False
1116
1117 def _del_result_lines(self):
1118 """
1119 Don't let unittest.TextTestRunner print "Ran X tests in Ys",
1120 vstart_runner.py will do it for itself since it runs tests in a
1121 testsuite one by one.
1122 """
1123 if self.omit_result_lines:
1124 self.buffer = re.sub('-'*70+'\nran [0-9]* test in [0-9.]*s\n*',
1125 '', self.buffer, flags=re.I)
1126 self.buffer = re.sub('failed \(failures=[0-9]*\)\n', '', self.buffer,
1127 flags=re.I)
1128 self.buffer = self.buffer.replace('OK\n', '')
1129
1130 def write(self, data):
1131 self.buffer += data
1132 if self.buffer.count("\n") > 5:
1133 self._write()
1134
1135 def _write(self):
1136 if opt_rotate_logs:
1137 self._del_result_lines()
1138 if self.buffer == '':
1139 return
1140
1141 lines = self.buffer.split("\n")
1142 for line in lines:
1143 # sys.stderr.write(line + "\n")
1144 log.info(line)
1145 self.buffer = ''
1146
1147 def flush(self):
1148 pass
1149
1150 def __del__(self):
1151 self._write()
1152
1153
1154class InteractiveFailureResult(unittest.TextTestResult):
1155 """
1156 Specialization that implements interactive-on-error style
1157 behavior.
1158 """
1159 def addFailure(self, test, err):
1160 super(InteractiveFailureResult, self).addFailure(test, err)
1161 log.error(self._exc_info_to_string(err, test))
1162 log.error("Failure in test '{0}', going interactive".format(
1163 self.getDescription(test)
1164 ))
1165 interactive.task(ctx=None, config=None)
1166
1167 def addError(self, test, err):
1168 super(InteractiveFailureResult, self).addError(test, err)
1169 log.error(self._exc_info_to_string(err, test))
1170 log.error("Error in test '{0}', going interactive".format(
1171 self.getDescription(test)
1172 ))
1173 interactive.task(ctx=None, config=None)
1174
1175
1176# XXX: class we require would be inherited from this one and one of
1177# InteractiveFailureResult and unittestunittest.TextTestResult.
1178class LoggingResultTemplate(object):
1179 fail_on_skip = False
1180
1181 def startTest(self, test):
1182 log.info("Starting test: {0}".format(self.getDescription(test)))
1183 test.started_at = datetime.datetime.utcnow()
1184 return super(LoggingResultTemplate, self).startTest(test)
1185
1186 def stopTest(self, test):
1187 log.info("Stopped test: {0} in {1}s".format(
1188 self.getDescription(test),
1189 (datetime.datetime.utcnow() - test.started_at).total_seconds()
1190 ))
1191
1192 def addSkip(self, test, reason):
1193 if LoggingResultTemplate.fail_on_skip:
1194 # Don't just call addFailure because that requires a traceback
1195 self.failures.append((test, reason))
1196 else:
1197 super(LoggingResultTemplate, self).addSkip(test, reason)
1198
1199
1200def launch_tests(overall_suite):
1201 if opt_rotate_logs or not opt_exit_on_test_failure:
1202 return launch_individually(overall_suite)
1203 else:
1204 return launch_entire_suite(overall_suite)
1205
1206
1207def get_logging_result_class():
1208 result_class = InteractiveFailureResult if opt_interactive_on_error else \
1209 unittest.TextTestResult
1210 return type('', (LoggingResultTemplate, result_class), {})
1211
1212
1213def launch_individually(overall_suite):
1214 no_of_tests_execed = 0
1215 no_of_tests_failed, no_of_tests_execed = 0, 0
1216 LoggingResult = get_logging_result_class()
1217 stream = LogStream()
1218 stream.omit_result_lines = True
1219 if opt_rotate_logs:
1220 logrotate = LogRotate()
1221
1222 started_at = datetime.datetime.utcnow()
1223 for suite_, case in enumerate_methods(overall_suite):
1224 # don't run logrotate beforehand since some ceph daemons might be
1225 # down and pre/post-rotate scripts in logrotate.conf might fail.
1226 if opt_rotate_logs:
1227 logrotate.run_logrotate()
1228
1229 result = unittest.TextTestRunner(stream=stream,
1230 resultclass=LoggingResult,
1231 verbosity=2, failfast=True).run(case)
1232
1233 if not result.wasSuccessful():
1234 if opt_exit_on_test_failure:
1235 break
1236 else:
1237 no_of_tests_failed += 1
1238
1239 no_of_tests_execed += 1
1240 time_elapsed = (datetime.datetime.utcnow() - started_at).total_seconds()
1241
1242 if result.wasSuccessful():
1243 log.info('')
1244 log.info('-'*70)
1245 log.info(f'Ran {no_of_tests_execed} tests in {time_elapsed}s')
1246 if no_of_tests_failed > 0:
1247 log.info(f'{no_of_tests_failed} tests failed')
1248 log.info('')
1249 log.info('OK')
1250
1251 return result
1252
1253
1254def launch_entire_suite(overall_suite):
1255 LoggingResult = get_logging_result_class()
1256
1257 testrunner = unittest.TextTestRunner(stream=LogStream(),
1258 resultclass=LoggingResult,
1259 verbosity=2, failfast=True)
1260 return testrunner.run(overall_suite)
1261
1262
7c673cae
FG
1263def exec_test():
1264 # Parse arguments
f67539c2 1265 global opt_interactive_on_error
9f95a23c
TL
1266 opt_interactive_on_error = False
1267 opt_create_cluster = False
1268 opt_create_cluster_only = False
1269 opt_ignore_missing_binaries = False
1270 opt_teardown_cluster = False
1271 global opt_log_ps_output
1272 opt_log_ps_output = False
1273 use_kernel_client = False
f67539c2
TL
1274 global opt_use_ns
1275 opt_use_ns = False
1276 opt_brxnet= None
f6b5b4d7 1277 opt_verbose = True
f67539c2
TL
1278 global opt_rotate_logs
1279 opt_rotate_logs = False
1280 global opt_exit_on_test_failure
1281 opt_exit_on_test_failure = True
7c673cae
FG
1282
1283 args = sys.argv[1:]
1284 flags = [a for a in args if a.startswith("-")]
1285 modules = [a for a in args if not a.startswith("-")]
1286 for f in flags:
1287 if f == "--interactive":
9f95a23c 1288 opt_interactive_on_error = True
7c673cae 1289 elif f == "--create":
9f95a23c 1290 opt_create_cluster = True
11fdf7f2 1291 elif f == "--create-cluster-only":
9f95a23c 1292 opt_create_cluster_only = True
eafe8130 1293 elif f == "--ignore-missing-binaries":
9f95a23c
TL
1294 opt_ignore_missing_binaries = True
1295 elif f == '--teardown':
1296 opt_teardown_cluster = True
1297 elif f == '--log-ps-output':
1298 opt_log_ps_output = True
1299 elif f == '--clear-old-log':
1300 clear_old_log()
1301 elif f == "--kclient":
1302 use_kernel_client = True
f67539c2
TL
1303 elif f == '--usens':
1304 opt_use_ns = True
1305 elif '--brxnet' in f:
1306 if re.search(r'=[0-9./]+', f) is None:
1307 log.error("--brxnet=<ip/mask> option needs one argument: '{0}'".format(f))
1308 sys.exit(-1)
1309 opt_brxnet=f.split('=')[1]
1310 try:
1311 IP(opt_brxnet)
1312 if IP(opt_brxnet).iptype() == 'PUBLIC':
1313 raise RuntimeError('is public')
1314 except Exception as e:
1315 log.error("Invalid ip '{0}' {1}".format(opt_brxnet, e))
1316 sys.exit(-1)
f6b5b4d7
TL
1317 elif '--no-verbose' == f:
1318 opt_verbose = False
f67539c2
TL
1319 elif f == '--rotate-logs':
1320 opt_rotate_logs = True
1321 elif f == '--run-all-tests':
1322 opt_exit_on_test_failure = False
1323 elif f == '--debug':
1324 log.setLevel(logging.DEBUG)
7c673cae
FG
1325 else:
1326 log.error("Unknown option '{0}'".format(f))
1327 sys.exit(-1)
1328
1329 # Help developers by stopping up-front if their tree isn't built enough for all the
1330 # tools that the tests might want to use (add more here if needed)
1331 require_binaries = ["ceph-dencoder", "cephfs-journal-tool", "cephfs-data-scan",
f67539c2 1332 "cephfs-table-tool", "ceph-fuse", "rados", "cephfs-meta-injection"]
20effc67
TL
1333 # What binaries may be required is task specific
1334 require_binaries = ["ceph-dencoder", "rados"]
7c673cae 1335 missing_binaries = [b for b in require_binaries if not os.path.exists(os.path.join(BIN_PREFIX, b))]
9f95a23c 1336 if missing_binaries and not opt_ignore_missing_binaries:
7c673cae
FG
1337 log.error("Some ceph binaries missing, please build them: {0}".format(" ".join(missing_binaries)))
1338 sys.exit(-1)
1339
11fdf7f2
TL
1340 max_required_mds, max_required_clients, \
1341 max_required_mgr, require_memstore = scan_tests(modules)
7c673cae 1342
9f95a23c 1343 global remote
7c673cae
FG
1344 remote = LocalRemote()
1345
f67539c2
TL
1346 CephFSMount.cleanup_stale_netnses_and_bridge(remote)
1347
7c673cae 1348 # Tolerate no MDSs or clients running at start
f67539c2
TL
1349 ps_txt = remote.run(args=["ps", "-u"+str(os.getuid())],
1350 stdout=StringIO()).stdout.getvalue().strip()
7c673cae
FG
1351 lines = ps_txt.split("\n")[1:]
1352 for line in lines:
1353 if 'ceph-fuse' in line or 'ceph-mds' in line:
1354 pid = int(line.split()[0])
e306af50 1355 log.warning("Killing stray process {0}".format(line))
7c673cae
FG
1356 os.kill(pid, signal.SIGKILL)
1357
1358 # Fire up the Ceph cluster if the user requested it
9f95a23c 1359 if opt_create_cluster or opt_create_cluster_only:
7c673cae
FG
1360 log.info("Creating cluster with {0} MDS daemons".format(
1361 max_required_mds))
9f95a23c 1362 teardown_cluster()
7c673cae
FG
1363 vstart_env = os.environ.copy()
1364 vstart_env["FS"] = "0"
1365 vstart_env["MDS"] = max_required_mds.__str__()
11fdf7f2 1366 vstart_env["OSD"] = "4"
7c673cae
FG
1367 vstart_env["MGR"] = max(max_required_mgr, 1).__str__()
1368
f6b5b4d7
TL
1369 args = [
1370 os.path.join(SRC_PREFIX, "vstart.sh"),
1371 "-n",
1372 "--nolockdep",
1373 ]
11fdf7f2
TL
1374 if require_memstore:
1375 args.append("--memstore")
1376
f6b5b4d7
TL
1377 if opt_verbose:
1378 args.append("-d")
1379
20effc67 1380 log.info('\nrunning vstart.sh now...')
9f95a23c
TL
1381 # usually, i get vstart.sh running completely in less than 100
1382 # seconds.
1383 remote.run(args=args, env=vstart_env, timeout=(3 * 60))
20effc67 1384 log.info('\nvstart.sh finished running')
7c673cae
FG
1385
1386 # Wait for OSD to come up so that subsequent injectargs etc will
1387 # definitely succeed
c07f9fc5 1388 LocalCephCluster(LocalContext()).mon_manager.wait_for_all_osds_up(timeout=30)
7c673cae 1389
9f95a23c 1390 if opt_create_cluster_only:
11fdf7f2
TL
1391 return
1392
f67539c2
TL
1393 if opt_use_ns and mon_in_localhost() and not opt_create_cluster:
1394 raise RuntimeError("cluster is on localhost; '--usens' option is incompatible. Or you can pass an extra '--create' option to create a new cluster without localhost!")
1395
7c673cae
FG
1396 # List of client mounts, sufficient to run the selected tests
1397 clients = [i.__str__() for i in range(0, max_required_clients)]
1398
1399 test_dir = tempfile.mkdtemp()
1400 teuth_config['test_path'] = test_dir
1401
11fdf7f2
TL
1402 ctx = LocalContext()
1403 ceph_cluster = LocalCephCluster(ctx)
1404 mds_cluster = LocalMDSCluster(ctx)
1405 mgr_cluster = LocalMgrCluster(ctx)
1406
7c673cae
FG
1407 # Construct Mount classes
1408 mounts = []
1409 for client_id in clients:
1410 # Populate client keyring (it sucks to use client.admin for test clients
1411 # because it's awkward to find the logs later)
1412 client_name = "client.{0}".format(client_id)
1413
1414 if client_name not in open("./keyring").read():
20effc67 1415 p = remote.run(args=[CEPH_CMD, "auth", "get-or-create", client_name,
7c673cae
FG
1416 "osd", "allow rw",
1417 "mds", "allow",
f67539c2 1418 "mon", "allow r"], stdout=StringIO())
7c673cae 1419
f67539c2 1420 open("./keyring", "at").write(p.stdout.getvalue())
7c673cae 1421
9f95a23c 1422 if use_kernel_client:
f67539c2
TL
1423 mount = LocalKernelMount(ctx=ctx, test_dir=test_dir,
1424 client_id=client_id, brxnet=opt_brxnet)
9f95a23c 1425 else:
f67539c2
TL
1426 mount = LocalFuseMount(ctx=ctx, test_dir=test_dir,
1427 client_id=client_id, brxnet=opt_brxnet)
9f95a23c 1428
7c673cae 1429 mounts.append(mount)
f67539c2 1430 if os.path.exists(mount.hostfs_mntpt):
801d1391 1431 if mount.is_mounted():
f67539c2 1432 log.warning("unmounting {0}".format(mount.hostfs_mntpt))
801d1391
TL
1433 mount.umount_wait()
1434 else:
f67539c2 1435 os.rmdir(mount.hostfs_mntpt)
7c673cae 1436
7c673cae
FG
1437 from tasks.cephfs_test_runner import DecoratingLoader
1438
7c673cae
FG
1439 decorating_loader = DecoratingLoader({
1440 "ctx": ctx,
1441 "mounts": mounts,
1442 "ceph_cluster": ceph_cluster,
1443 "mds_cluster": mds_cluster,
1444 "mgr_cluster": mgr_cluster,
1445 })
1446
1447 # For the benefit of polling tests like test_full -- in teuthology land we set this
1448 # in a .yaml, here it's just a hardcoded thing for the developer's pleasure.
20effc67 1449 remote.run(args=[CEPH_CMD, "tell", "osd.*", "injectargs", "--osd-mon-report-interval", "5"])
11fdf7f2 1450 ceph_cluster.set_ceph_conf("osd", "osd_mon_report_interval", "5")
7c673cae
FG
1451
1452 # Vstart defaults to two segments, which very easily gets a "behind on trimming" health warning
1453 # from normal IO latency. Increase it for running teests.
1454 ceph_cluster.set_ceph_conf("mds", "mds log max segments", "10")
1455
1456 # Make sure the filesystem created in tests has uid/gid that will let us talk to
1457 # it after mounting it (without having to go root). Set in 'global' not just 'mds'
1458 # so that cephfs-data-scan will pick it up too.
1459 ceph_cluster.set_ceph_conf("global", "mds root ino uid", "%s" % os.getuid())
1460 ceph_cluster.set_ceph_conf("global", "mds root ino gid", "%s" % os.getgid())
1461
1462 # Monkeypatch get_package_version to avoid having to work out what kind of distro we're on
1463 def _get_package_version(remote, pkg_name):
1464 # Used in cephfs tests to find fuse version. Your development workstation *does* have >=2.9, right?
1465 return "2.9"
1466
1467 import teuthology.packaging
1468 teuthology.packaging.get_package_version = _get_package_version
1469
1470 overall_suite = load_tests(modules, decorating_loader)
1471
1472 # Filter out tests that don't lend themselves to interactive running,
1473 victims = []
1474 for case, method in enumerate_methods(overall_suite):
1475 fn = getattr(method, method._testMethodName)
1476
1477 drop_test = False
1478
1479 if hasattr(fn, 'is_for_teuthology') and getattr(fn, 'is_for_teuthology') is True:
1480 drop_test = True
e306af50 1481 log.warning("Dropping test because long running: {method_id}".format(method_id=method.id()))
7c673cae
FG
1482
1483 if getattr(fn, "needs_trimming", False) is True:
1484 drop_test = (os.getuid() != 0)
e306af50 1485 log.warning("Dropping test because client trim unavailable: {method_id}".format(method_id=method.id()))
7c673cae
FG
1486
1487 if drop_test:
1488 # Don't drop the test if it was explicitly requested in arguments
1489 is_named = False
1490 for named in modules:
1491 if named.endswith(method.id()):
1492 is_named = True
1493 break
1494
1495 if not is_named:
1496 victims.append((case, method))
1497
cd265ab1 1498 log.debug("Disabling {0} tests because of is_for_teuthology or needs_trimming".format(len(victims)))
7c673cae
FG
1499 for s, method in victims:
1500 s._tests.remove(method)
1501
f67539c2
TL
1502 overall_suite = load_tests(modules, loader.TestLoader())
1503 result = launch_tests(overall_suite)
7c673cae 1504
f67539c2 1505 CephFSMount.cleanup_stale_netnses_and_bridge(remote)
9f95a23c
TL
1506 if opt_teardown_cluster:
1507 teardown_cluster()
1508
7c673cae 1509 if not result.wasSuccessful():
f67539c2
TL
1510 # no point in duplicating if we can have multiple failures in same
1511 # run.
1512 if opt_exit_on_test_failure:
1513 result.printErrors() # duplicate output at end for convenience
7c673cae
FG
1514
1515 bad_tests = []
1516 for test, error in result.errors:
1517 bad_tests.append(str(test))
1518 for test, failure in result.failures:
1519 bad_tests.append(str(test))
1520
1521 sys.exit(-1)
1522 else:
1523 sys.exit(0)
1524
1525
1526if __name__ == "__main__":
1527 exec_test()