]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Systemd test | |
3 | """ | |
4 | import contextlib | |
5 | import logging | |
6 | import re | |
7 | import time | |
8 | ||
7c673cae FG |
9 | from teuthology.orchestra import run |
10 | from teuthology.misc import reconnect, get_first_mon, wait_until_healthy | |
11 | ||
12 | log = logging.getLogger(__name__) | |
13 | ||
9f95a23c TL |
14 | def _remote_service_status(remote, service): |
15 | status = remote.sh('sudo systemctl status %s' % service, | |
16 | check_status=False) | |
17 | return status | |
7c673cae FG |
18 | |
19 | @contextlib.contextmanager | |
20 | def task(ctx, config): | |
21 | """ | |
22 | - tasks: | |
23 | ceph-deploy: | |
24 | systemd: | |
25 | ||
26 | Test ceph systemd services can start, stop and restart and | |
27 | check for any failed services and report back errors | |
28 | """ | |
9f95a23c | 29 | for remote, roles in ctx.cluster.remotes.items(): |
7c673cae FG |
30 | remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'), |
31 | 'grep', 'ceph']) | |
9f95a23c TL |
32 | units = remote.sh('sudo systemctl list-units | grep ceph', |
33 | check_status=False) | |
34 | log.info(units) | |
35 | if units.find('failed'): | |
7c673cae FG |
36 | log.info("Ceph services in failed state") |
37 | ||
38 | # test overall service stop and start using ceph.target | |
39 | # ceph.target tests are meant for ceph systemd tests | |
40 | # and not actual process testing using 'ps' | |
41 | log.info("Stopping all Ceph services") | |
42 | remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target']) | |
9f95a23c TL |
43 | status = _remote_service_status(remote, 'ceph.target') |
44 | log.info(status) | |
7c673cae | 45 | log.info("Checking process status") |
9f95a23c TL |
46 | ps_eaf = remote.sh('sudo ps -eaf | grep ceph') |
47 | if ps_eaf.find('Active: inactive'): | |
11fdf7f2 | 48 | log.info("Successfully stopped all ceph services") |
7c673cae FG |
49 | else: |
50 | log.info("Failed to stop ceph services") | |
51 | ||
52 | log.info("Starting all Ceph services") | |
53 | remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target']) | |
9f95a23c TL |
54 | status = _remote_service_status(remote, 'ceph.target') |
55 | log.info(status) | |
56 | if status.find('Active: active'): | |
11fdf7f2 | 57 | log.info("Successfully started all Ceph services") |
7c673cae FG |
58 | else: |
59 | log.info("info", "Failed to start Ceph services") | |
9f95a23c TL |
60 | ps_eaf = remote.sh('sudo ps -eaf | grep ceph') |
61 | log.info(ps_eaf) | |
7c673cae FG |
62 | time.sleep(4) |
63 | ||
64 | # test individual services start stop | |
65 | name = remote.shortname | |
66 | mon_name = 'ceph-mon@' + name + '.service' | |
67 | mds_name = 'ceph-mds@' + name + '.service' | |
68 | mgr_name = 'ceph-mgr@' + name + '.service' | |
69 | mon_role_name = 'mon.' + name | |
70 | mds_role_name = 'mds.' + name | |
71 | mgr_role_name = 'mgr.' + name | |
f67539c2 | 72 | m_osd = re.search('--id (\d+) --setuser ceph', ps_eaf) |
7c673cae FG |
73 | if m_osd: |
74 | osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1)) | |
75 | remote.run(args=['sudo', 'systemctl', 'status', | |
76 | osd_service]) | |
77 | remote.run(args=['sudo', 'systemctl', 'stop', | |
78 | osd_service]) | |
79 | time.sleep(4) # immediate check will result in deactivating state | |
9f95a23c TL |
80 | status = _remote_service_status(remote, osd_service) |
81 | log.info(status) | |
82 | if status.find('Active: inactive'): | |
11fdf7f2 | 83 | log.info("Successfully stopped single osd ceph service") |
7c673cae FG |
84 | else: |
85 | log.info("Failed to stop ceph osd services") | |
9f95a23c | 86 | remote.sh(['sudo', 'systemctl', 'start', osd_service]) |
7c673cae FG |
87 | time.sleep(4) |
88 | if mon_role_name in roles: | |
89 | remote.run(args=['sudo', 'systemctl', 'status', mon_name]) | |
90 | remote.run(args=['sudo', 'systemctl', 'stop', mon_name]) | |
91 | time.sleep(4) # immediate check will result in deactivating state | |
9f95a23c TL |
92 | status = _remote_service_status(remote, mon_name) |
93 | if status.find('Active: inactive'): | |
11fdf7f2 | 94 | log.info("Successfully stopped single mon ceph service") |
7c673cae FG |
95 | else: |
96 | log.info("Failed to stop ceph mon service") | |
97 | remote.run(args=['sudo', 'systemctl', 'start', mon_name]) | |
98 | time.sleep(4) | |
99 | if mgr_role_name in roles: | |
100 | remote.run(args=['sudo', 'systemctl', 'status', mgr_name]) | |
101 | remote.run(args=['sudo', 'systemctl', 'stop', mgr_name]) | |
102 | time.sleep(4) # immediate check will result in deactivating state | |
9f95a23c TL |
103 | status = _remote_service_status(remote, mgr_name) |
104 | if status.find('Active: inactive'): | |
11fdf7f2 | 105 | log.info("Successfully stopped single ceph mgr service") |
7c673cae FG |
106 | else: |
107 | log.info("Failed to stop ceph mgr service") | |
108 | remote.run(args=['sudo', 'systemctl', 'start', mgr_name]) | |
109 | time.sleep(4) | |
110 | if mds_role_name in roles: | |
111 | remote.run(args=['sudo', 'systemctl', 'status', mds_name]) | |
112 | remote.run(args=['sudo', 'systemctl', 'stop', mds_name]) | |
113 | time.sleep(4) # immediate check will result in deactivating state | |
9f95a23c TL |
114 | status = _remote_service_status(remote, mds_name) |
115 | if status.find('Active: inactive'): | |
11fdf7f2 | 116 | log.info("Successfully stopped single ceph mds service") |
7c673cae FG |
117 | else: |
118 | log.info("Failed to stop ceph mds service") | |
119 | remote.run(args=['sudo', 'systemctl', 'start', mds_name]) | |
120 | time.sleep(4) | |
121 | ||
122 | # reboot all nodes and verify the systemd units restart | |
123 | # workunit that runs would fail if any of the systemd unit doesnt start | |
124 | ctx.cluster.run(args='sudo reboot', wait=False, check_status=False) | |
125 | # avoid immediate reconnect | |
126 | time.sleep(120) | |
127 | reconnect(ctx, 480) # reconnect all nodes | |
128 | # for debug info | |
129 | ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('|'), | |
130 | 'grep', 'ceph']) | |
131 | # wait for HEALTH_OK | |
132 | mon = get_first_mon(ctx, config) | |
9f95a23c | 133 | (mon_remote,) = ctx.cluster.only(mon).remotes.keys() |
7c673cae FG |
134 | wait_until_healthy(ctx, mon_remote, use_sudo=True) |
135 | yield |