]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/systemd.py
9 from cStringIO
import StringIO
10 from teuthology
.orchestra
import run
11 from teuthology
.misc
import reconnect
, get_first_mon
, wait_until_healthy
13 log
= logging
.getLogger(__name__
)
16 @contextlib.contextmanager
17 def task(ctx
, config
):
23 Test ceph systemd services can start, stop and restart and
24 check for any failed services and report back errors
26 for remote
, roles
in ctx
.cluster
.remotes
.iteritems():
27 remote
.run(args
=['sudo', 'ps', '-eaf', run
.Raw('|'),
29 r
= remote
.run(args
=['sudo', 'systemctl', 'list-units', run
.Raw('|'),
30 'grep', 'ceph'], stdout
=StringIO(),
32 log
.info(r
.stdout
.getvalue())
33 if r
.stdout
.getvalue().find('failed'):
34 log
.info("Ceph services in failed state")
36 # test overall service stop and start using ceph.target
37 # ceph.target tests are meant for ceph systemd tests
38 # and not actual process testing using 'ps'
39 log
.info("Stopping all Ceph services")
40 remote
.run(args
=['sudo', 'systemctl', 'stop', 'ceph.target'])
41 r
= remote
.run(args
=['sudo', 'systemctl', 'status', 'ceph.target'],
42 stdout
=StringIO(), check_status
=False)
43 log
.info(r
.stdout
.getvalue())
44 log
.info("Checking process status")
45 r
= remote
.run(args
=['sudo', 'ps', '-eaf', run
.Raw('|'),
46 'grep', 'ceph'], stdout
=StringIO())
47 if r
.stdout
.getvalue().find('Active: inactive'):
48 log
.info("Successfully stopped all ceph services")
50 log
.info("Failed to stop ceph services")
52 log
.info("Starting all Ceph services")
53 remote
.run(args
=['sudo', 'systemctl', 'start', 'ceph.target'])
54 r
= remote
.run(args
=['sudo', 'systemctl', 'status', 'ceph.target'],
56 log
.info(r
.stdout
.getvalue())
57 if r
.stdout
.getvalue().find('Active: active'):
58 log
.info("Successfully started all Ceph services")
60 log
.info("info", "Failed to start Ceph services")
61 r
= remote
.run(args
=['sudo', 'ps', '-eaf', run
.Raw('|'),
62 'grep', 'ceph'], stdout
=StringIO())
63 log
.info(r
.stdout
.getvalue())
66 # test individual services start stop
67 name
= remote
.shortname
68 mon_name
= 'ceph-mon@' + name
+ '.service'
69 mds_name
= 'ceph-mds@' + name
+ '.service'
70 mgr_name
= 'ceph-mgr@' + name
+ '.service'
71 mon_role_name
= 'mon.' + name
72 mds_role_name
= 'mds.' + name
73 mgr_role_name
= 'mgr.' + name
74 m_osd
= re
.search('--id (\d+) --setuser ceph', r
.stdout
.getvalue())
76 osd_service
= 'ceph-osd@{m}.service'.format(m
=m_osd
.group(1))
77 remote
.run(args
=['sudo', 'systemctl', 'status',
79 remote
.run(args
=['sudo', 'systemctl', 'stop',
81 time
.sleep(4) # immediate check will result in deactivating state
82 r
= remote
.run(args
=['sudo', 'systemctl', 'status', osd_service
],
83 stdout
=StringIO(), check_status
=False)
84 log
.info(r
.stdout
.getvalue())
85 if r
.stdout
.getvalue().find('Active: inactive'):
86 log
.info("Successfully stopped single osd ceph service")
88 log
.info("Failed to stop ceph osd services")
89 remote
.run(args
=['sudo', 'systemctl', 'start',
92 if mon_role_name
in roles
:
93 remote
.run(args
=['sudo', 'systemctl', 'status', mon_name
])
94 remote
.run(args
=['sudo', 'systemctl', 'stop', mon_name
])
95 time
.sleep(4) # immediate check will result in deactivating state
96 r
= remote
.run(args
=['sudo', 'systemctl', 'status', mon_name
],
97 stdout
=StringIO(), check_status
=False)
98 if r
.stdout
.getvalue().find('Active: inactive'):
99 log
.info("Successfully stopped single mon ceph service")
101 log
.info("Failed to stop ceph mon service")
102 remote
.run(args
=['sudo', 'systemctl', 'start', mon_name
])
104 if mgr_role_name
in roles
:
105 remote
.run(args
=['sudo', 'systemctl', 'status', mgr_name
])
106 remote
.run(args
=['sudo', 'systemctl', 'stop', mgr_name
])
107 time
.sleep(4) # immediate check will result in deactivating state
108 r
= remote
.run(args
=['sudo', 'systemctl', 'status', mgr_name
],
109 stdout
=StringIO(), check_status
=False)
110 if r
.stdout
.getvalue().find('Active: inactive'):
111 log
.info("Successfully stopped single ceph mgr service")
113 log
.info("Failed to stop ceph mgr service")
114 remote
.run(args
=['sudo', 'systemctl', 'start', mgr_name
])
116 if mds_role_name
in roles
:
117 remote
.run(args
=['sudo', 'systemctl', 'status', mds_name
])
118 remote
.run(args
=['sudo', 'systemctl', 'stop', mds_name
])
119 time
.sleep(4) # immediate check will result in deactivating state
120 r
= remote
.run(args
=['sudo', 'systemctl', 'status', mds_name
],
121 stdout
=StringIO(), check_status
=False)
122 if r
.stdout
.getvalue().find('Active: inactive'):
123 log
.info("Successfully stopped single ceph mds service")
125 log
.info("Failed to stop ceph mds service")
126 remote
.run(args
=['sudo', 'systemctl', 'start', mds_name
])
129 # reboot all nodes and verify the systemd units restart
130 # workunit that runs would fail if any of the systemd unit doesnt start
131 ctx
.cluster
.run(args
='sudo reboot', wait
=False, check_status
=False)
132 # avoid immediate reconnect
134 reconnect(ctx
, 480) # reconnect all nodes
136 ctx
.cluster
.run(args
=['sudo', 'ps', '-eaf', run
.Raw('|'),
139 mon
= get_first_mon(ctx
, config
)
140 (mon_remote
,) = ctx
.cluster
.only(mon
).remotes
.iterkeys()
141 wait_until_healthy(ctx
, mon_remote
, use_sudo
=True)