]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/systemd.py
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / qa / tasks / systemd.py
1 """
2 Systemd test
3 """
4 import contextlib
5 import logging
6 import re
7 import time
8
9 from cStringIO import StringIO
10 from teuthology.orchestra import run
11 from teuthology.misc import reconnect, get_first_mon, wait_until_healthy
12
13 log = logging.getLogger(__name__)
14
15
16 @contextlib.contextmanager
17 def task(ctx, config):
18 """
19 - tasks:
20 ceph-deploy:
21 systemd:
22
23 Test ceph systemd services can start, stop and restart and
24 check for any failed services and report back errors
25 """
26 for remote, roles in ctx.cluster.remotes.iteritems():
27 remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
28 'grep', 'ceph'])
29 r = remote.run(args=['sudo', 'systemctl', 'list-units', run.Raw('|'),
30 'grep', 'ceph'], stdout=StringIO(),
31 check_status=False)
32 log.info(r.stdout.getvalue())
33 if r.stdout.getvalue().find('failed'):
34 log.info("Ceph services in failed state")
35
36 # test overall service stop and start using ceph.target
37 # ceph.target tests are meant for ceph systemd tests
38 # and not actual process testing using 'ps'
39 log.info("Stopping all Ceph services")
40 remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
41 r = remote.run(args=['sudo', 'systemctl', 'status', 'ceph.target'],
42 stdout=StringIO(), check_status=False)
43 log.info(r.stdout.getvalue())
44 log.info("Checking process status")
45 r = remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
46 'grep', 'ceph'], stdout=StringIO())
47 if r.stdout.getvalue().find('Active: inactive'):
48 log.info("Successfully stopped all ceph services")
49 else:
50 log.info("Failed to stop ceph services")
51
52 log.info("Starting all Ceph services")
53 remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target'])
54 r = remote.run(args=['sudo', 'systemctl', 'status', 'ceph.target'],
55 stdout=StringIO())
56 log.info(r.stdout.getvalue())
57 if r.stdout.getvalue().find('Active: active'):
58 log.info("Successfully started all Ceph services")
59 else:
60 log.info("info", "Failed to start Ceph services")
61 r = remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
62 'grep', 'ceph'], stdout=StringIO())
63 log.info(r.stdout.getvalue())
64 time.sleep(4)
65
66 # test individual services start stop
67 name = remote.shortname
68 mon_name = 'ceph-mon@' + name + '.service'
69 mds_name = 'ceph-mds@' + name + '.service'
70 mgr_name = 'ceph-mgr@' + name + '.service'
71 mon_role_name = 'mon.' + name
72 mds_role_name = 'mds.' + name
73 mgr_role_name = 'mgr.' + name
74 m_osd = re.search('--id (\d+) --setuser ceph', r.stdout.getvalue())
75 if m_osd:
76 osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1))
77 remote.run(args=['sudo', 'systemctl', 'status',
78 osd_service])
79 remote.run(args=['sudo', 'systemctl', 'stop',
80 osd_service])
81 time.sleep(4) # immediate check will result in deactivating state
82 r = remote.run(args=['sudo', 'systemctl', 'status', osd_service],
83 stdout=StringIO(), check_status=False)
84 log.info(r.stdout.getvalue())
85 if r.stdout.getvalue().find('Active: inactive'):
86 log.info("Successfully stopped single osd ceph service")
87 else:
88 log.info("Failed to stop ceph osd services")
89 remote.run(args=['sudo', 'systemctl', 'start',
90 osd_service])
91 time.sleep(4)
92 if mon_role_name in roles:
93 remote.run(args=['sudo', 'systemctl', 'status', mon_name])
94 remote.run(args=['sudo', 'systemctl', 'stop', mon_name])
95 time.sleep(4) # immediate check will result in deactivating state
96 r = remote.run(args=['sudo', 'systemctl', 'status', mon_name],
97 stdout=StringIO(), check_status=False)
98 if r.stdout.getvalue().find('Active: inactive'):
99 log.info("Successfully stopped single mon ceph service")
100 else:
101 log.info("Failed to stop ceph mon service")
102 remote.run(args=['sudo', 'systemctl', 'start', mon_name])
103 time.sleep(4)
104 if mgr_role_name in roles:
105 remote.run(args=['sudo', 'systemctl', 'status', mgr_name])
106 remote.run(args=['sudo', 'systemctl', 'stop', mgr_name])
107 time.sleep(4) # immediate check will result in deactivating state
108 r = remote.run(args=['sudo', 'systemctl', 'status', mgr_name],
109 stdout=StringIO(), check_status=False)
110 if r.stdout.getvalue().find('Active: inactive'):
111 log.info("Successfully stopped single ceph mgr service")
112 else:
113 log.info("Failed to stop ceph mgr service")
114 remote.run(args=['sudo', 'systemctl', 'start', mgr_name])
115 time.sleep(4)
116 if mds_role_name in roles:
117 remote.run(args=['sudo', 'systemctl', 'status', mds_name])
118 remote.run(args=['sudo', 'systemctl', 'stop', mds_name])
119 time.sleep(4) # immediate check will result in deactivating state
120 r = remote.run(args=['sudo', 'systemctl', 'status', mds_name],
121 stdout=StringIO(), check_status=False)
122 if r.stdout.getvalue().find('Active: inactive'):
123 log.info("Successfully stopped single ceph mds service")
124 else:
125 log.info("Failed to stop ceph mds service")
126 remote.run(args=['sudo', 'systemctl', 'start', mds_name])
127 time.sleep(4)
128
129 # reboot all nodes and verify the systemd units restart
130 # workunit that runs would fail if any of the systemd unit doesnt start
131 ctx.cluster.run(args='sudo reboot', wait=False, check_status=False)
132 # avoid immediate reconnect
133 time.sleep(120)
134 reconnect(ctx, 480) # reconnect all nodes
135 # for debug info
136 ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
137 'grep', 'ceph'])
138 # wait for HEALTH_OK
139 mon = get_first_mon(ctx, config)
140 (mon_remote,) = ctx.cluster.only(mon).remotes.iterkeys()
141 wait_until_healthy(ctx, mon_remote, use_sudo=True)
142 yield