]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/systemd.py
import quincy beta 17.1.0
[ceph.git] / ceph / qa / tasks / systemd.py
CommitLineData
7c673cae
FG
1"""
2Systemd test
3"""
4import contextlib
5import logging
6import re
7import time
8
7c673cae
FG
9from teuthology.orchestra import run
10from teuthology.misc import reconnect, get_first_mon, wait_until_healthy
11
12log = logging.getLogger(__name__)
13
9f95a23c
TL
14def _remote_service_status(remote, service):
15 status = remote.sh('sudo systemctl status %s' % service,
16 check_status=False)
17 return status
7c673cae
FG
18
19@contextlib.contextmanager
20def task(ctx, config):
21 """
22 - tasks:
23 ceph-deploy:
24 systemd:
25
26 Test ceph systemd services can start, stop and restart and
27 check for any failed services and report back errors
28 """
9f95a23c 29 for remote, roles in ctx.cluster.remotes.items():
7c673cae
FG
30 remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
31 'grep', 'ceph'])
9f95a23c
TL
32 units = remote.sh('sudo systemctl list-units | grep ceph',
33 check_status=False)
34 log.info(units)
35 if units.find('failed'):
7c673cae
FG
36 log.info("Ceph services in failed state")
37
38 # test overall service stop and start using ceph.target
39 # ceph.target tests are meant for ceph systemd tests
40 # and not actual process testing using 'ps'
41 log.info("Stopping all Ceph services")
42 remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
9f95a23c
TL
43 status = _remote_service_status(remote, 'ceph.target')
44 log.info(status)
7c673cae 45 log.info("Checking process status")
9f95a23c
TL
46 ps_eaf = remote.sh('sudo ps -eaf | grep ceph')
47 if ps_eaf.find('Active: inactive'):
11fdf7f2 48 log.info("Successfully stopped all ceph services")
7c673cae
FG
49 else:
50 log.info("Failed to stop ceph services")
51
52 log.info("Starting all Ceph services")
53 remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target'])
9f95a23c
TL
54 status = _remote_service_status(remote, 'ceph.target')
55 log.info(status)
56 if status.find('Active: active'):
11fdf7f2 57 log.info("Successfully started all Ceph services")
7c673cae
FG
58 else:
59 log.info("info", "Failed to start Ceph services")
9f95a23c
TL
60 ps_eaf = remote.sh('sudo ps -eaf | grep ceph')
61 log.info(ps_eaf)
7c673cae
FG
62 time.sleep(4)
63
64 # test individual services start stop
65 name = remote.shortname
66 mon_name = 'ceph-mon@' + name + '.service'
67 mds_name = 'ceph-mds@' + name + '.service'
68 mgr_name = 'ceph-mgr@' + name + '.service'
69 mon_role_name = 'mon.' + name
70 mds_role_name = 'mds.' + name
71 mgr_role_name = 'mgr.' + name
f67539c2 72 m_osd = re.search('--id (\d+) --setuser ceph', ps_eaf)
7c673cae
FG
73 if m_osd:
74 osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1))
75 remote.run(args=['sudo', 'systemctl', 'status',
76 osd_service])
77 remote.run(args=['sudo', 'systemctl', 'stop',
78 osd_service])
79 time.sleep(4) # immediate check will result in deactivating state
9f95a23c
TL
80 status = _remote_service_status(remote, osd_service)
81 log.info(status)
82 if status.find('Active: inactive'):
11fdf7f2 83 log.info("Successfully stopped single osd ceph service")
7c673cae
FG
84 else:
85 log.info("Failed to stop ceph osd services")
9f95a23c 86 remote.sh(['sudo', 'systemctl', 'start', osd_service])
7c673cae
FG
87 time.sleep(4)
88 if mon_role_name in roles:
89 remote.run(args=['sudo', 'systemctl', 'status', mon_name])
90 remote.run(args=['sudo', 'systemctl', 'stop', mon_name])
91 time.sleep(4) # immediate check will result in deactivating state
9f95a23c
TL
92 status = _remote_service_status(remote, mon_name)
93 if status.find('Active: inactive'):
11fdf7f2 94 log.info("Successfully stopped single mon ceph service")
7c673cae
FG
95 else:
96 log.info("Failed to stop ceph mon service")
97 remote.run(args=['sudo', 'systemctl', 'start', mon_name])
98 time.sleep(4)
99 if mgr_role_name in roles:
100 remote.run(args=['sudo', 'systemctl', 'status', mgr_name])
101 remote.run(args=['sudo', 'systemctl', 'stop', mgr_name])
102 time.sleep(4) # immediate check will result in deactivating state
9f95a23c
TL
103 status = _remote_service_status(remote, mgr_name)
104 if status.find('Active: inactive'):
11fdf7f2 105 log.info("Successfully stopped single ceph mgr service")
7c673cae
FG
106 else:
107 log.info("Failed to stop ceph mgr service")
108 remote.run(args=['sudo', 'systemctl', 'start', mgr_name])
109 time.sleep(4)
110 if mds_role_name in roles:
111 remote.run(args=['sudo', 'systemctl', 'status', mds_name])
112 remote.run(args=['sudo', 'systemctl', 'stop', mds_name])
113 time.sleep(4) # immediate check will result in deactivating state
9f95a23c
TL
114 status = _remote_service_status(remote, mds_name)
115 if status.find('Active: inactive'):
11fdf7f2 116 log.info("Successfully stopped single ceph mds service")
7c673cae
FG
117 else:
118 log.info("Failed to stop ceph mds service")
119 remote.run(args=['sudo', 'systemctl', 'start', mds_name])
120 time.sleep(4)
121
122 # reboot all nodes and verify the systemd units restart
123 # workunit that runs would fail if any of the systemd unit doesnt start
124 ctx.cluster.run(args='sudo reboot', wait=False, check_status=False)
125 # avoid immediate reconnect
126 time.sleep(120)
127 reconnect(ctx, 480) # reconnect all nodes
128 # for debug info
129 ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
130 'grep', 'ceph'])
131 # wait for HEALTH_OK
132 mon = get_first_mon(ctx, config)
9f95a23c 133 (mon_remote,) = ctx.cluster.only(mon).remotes.keys()
7c673cae
FG
134 wait_until_healthy(ctx, mon_remote, use_sudo=True)
135 yield