]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/systemd.py
bump version to 18.2.4-pve3
[ceph.git] / ceph / qa / tasks / systemd.py
1 """
2 Systemd test
3 """
4 import contextlib
5 import logging
6 import re
7 import time
8
9 from teuthology.orchestra import run
10 from teuthology.misc import reconnect, get_first_mon, wait_until_healthy
11
12 log = logging.getLogger(__name__)
13
14 def _remote_service_status(remote, service):
15 status = remote.sh('sudo systemctl status %s' % service,
16 check_status=False)
17 return status
18
19 @contextlib.contextmanager
20 def task(ctx, config):
21 """
22 - tasks:
23 ceph-deploy:
24 systemd:
25
26 Test ceph systemd services can start, stop and restart and
27 check for any failed services and report back errors
28 """
29 for remote, roles in ctx.cluster.remotes.items():
30 remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
31 'grep', 'ceph'])
32 units = remote.sh('sudo systemctl list-units | grep ceph',
33 check_status=False)
34 log.info(units)
35 if units.find('failed'):
36 log.info("Ceph services in failed state")
37
38 # test overall service stop and start using ceph.target
39 # ceph.target tests are meant for ceph systemd tests
40 # and not actual process testing using 'ps'
41 log.info("Stopping all Ceph services")
42 remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
43 status = _remote_service_status(remote, 'ceph.target')
44 log.info(status)
45 log.info("Checking process status")
46 ps_eaf = remote.sh('sudo ps -eaf | grep ceph')
47 if ps_eaf.find('Active: inactive'):
48 log.info("Successfully stopped all ceph services")
49 else:
50 log.info("Failed to stop ceph services")
51
52 log.info("Starting all Ceph services")
53 remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target'])
54 status = _remote_service_status(remote, 'ceph.target')
55 log.info(status)
56 if status.find('Active: active'):
57 log.info("Successfully started all Ceph services")
58 else:
59 log.info("info", "Failed to start Ceph services")
60 ps_eaf = remote.sh('sudo ps -eaf | grep ceph')
61 log.info(ps_eaf)
62 time.sleep(4)
63
64 # test individual services start stop
65 name = remote.shortname
66 mon_name = 'ceph-mon@' + name + '.service'
67 mds_name = 'ceph-mds@' + name + '.service'
68 mgr_name = 'ceph-mgr@' + name + '.service'
69 mon_role_name = 'mon.' + name
70 mds_role_name = 'mds.' + name
71 mgr_role_name = 'mgr.' + name
72 m_osd = re.search('--id (\d+) --setuser ceph', ps_eaf)
73 if m_osd:
74 osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1))
75 remote.run(args=['sudo', 'systemctl', 'status',
76 osd_service])
77 remote.run(args=['sudo', 'systemctl', 'stop',
78 osd_service])
79 time.sleep(4) # immediate check will result in deactivating state
80 status = _remote_service_status(remote, osd_service)
81 log.info(status)
82 if status.find('Active: inactive'):
83 log.info("Successfully stopped single osd ceph service")
84 else:
85 log.info("Failed to stop ceph osd services")
86 remote.sh(['sudo', 'systemctl', 'start', osd_service])
87 time.sleep(4)
88 if mon_role_name in roles:
89 remote.run(args=['sudo', 'systemctl', 'status', mon_name])
90 remote.run(args=['sudo', 'systemctl', 'stop', mon_name])
91 time.sleep(4) # immediate check will result in deactivating state
92 status = _remote_service_status(remote, mon_name)
93 if status.find('Active: inactive'):
94 log.info("Successfully stopped single mon ceph service")
95 else:
96 log.info("Failed to stop ceph mon service")
97 remote.run(args=['sudo', 'systemctl', 'start', mon_name])
98 time.sleep(4)
99 if mgr_role_name in roles:
100 remote.run(args=['sudo', 'systemctl', 'status', mgr_name])
101 remote.run(args=['sudo', 'systemctl', 'stop', mgr_name])
102 time.sleep(4) # immediate check will result in deactivating state
103 status = _remote_service_status(remote, mgr_name)
104 if status.find('Active: inactive'):
105 log.info("Successfully stopped single ceph mgr service")
106 else:
107 log.info("Failed to stop ceph mgr service")
108 remote.run(args=['sudo', 'systemctl', 'start', mgr_name])
109 time.sleep(4)
110 if mds_role_name in roles:
111 remote.run(args=['sudo', 'systemctl', 'status', mds_name])
112 remote.run(args=['sudo', 'systemctl', 'stop', mds_name])
113 time.sleep(4) # immediate check will result in deactivating state
114 status = _remote_service_status(remote, mds_name)
115 if status.find('Active: inactive'):
116 log.info("Successfully stopped single ceph mds service")
117 else:
118 log.info("Failed to stop ceph mds service")
119 remote.run(args=['sudo', 'systemctl', 'start', mds_name])
120 time.sleep(4)
121
122 # reboot all nodes and verify the systemd units restart
123 # workunit that runs would fail if any of the systemd unit doesnt start
124 ctx.cluster.run(args='sudo reboot', wait=False, check_status=False)
125 # avoid immediate reconnect
126 time.sleep(120)
127 reconnect(ctx, 480) # reconnect all nodes
128 # for debug info
129 ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
130 'grep', 'ceph'])
131 # wait for HEALTH_OK
132 mon = get_first_mon(ctx, config)
133 (mon_remote,) = ctx.cluster.only(mon).remotes.keys()
134 wait_until_healthy(ctx, mon_remote, use_sudo=True)
135 yield