]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/kernel_mount.py
import 15.2.4
[ceph.git] / ceph / qa / tasks / cephfs / kernel_mount.py
CommitLineData
7c673cae
FG
1import json
2import logging
eafe8130 3import time
7c673cae
FG
4from textwrap import dedent
5from teuthology.orchestra.run import CommandFailedError
6from teuthology import misc
7
8from teuthology.orchestra import remote as orchestra_remote
9from teuthology.orchestra import run
10from teuthology.contextutil import MaxWhileTries
e306af50 11from tasks.cephfs.mount import CephFSMount
7c673cae
FG
12
13log = logging.getLogger(__name__)
14
15
16UMOUNT_TIMEOUT = 300
17
18
19class KernelMount(CephFSMount):
eafe8130 20 def __init__(self, ctx, test_dir, client_id, client_remote,
7c673cae 21 ipmi_user, ipmi_password, ipmi_domain):
11fdf7f2 22 super(KernelMount, self).__init__(ctx, test_dir, client_id, client_remote)
7c673cae
FG
23
24 self.mounted = False
25 self.ipmi_user = ipmi_user
26 self.ipmi_password = ipmi_password
27 self.ipmi_domain = ipmi_domain
28
9f95a23c
TL
29 def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None, mount_options=[]):
30 if mountpoint is not None:
31 self.mountpoint = mountpoint
11fdf7f2
TL
32 self.setupfs(name=mount_fs_name)
33
7c673cae
FG
34 log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format(
35 id=self.client_id, remote=self.client_remote, mnt=self.mountpoint))
36
9f95a23c
TL
37 self.client_remote.run(args=['mkdir', '-p', self.mountpoint],
38 timeout=(5*60))
7c673cae
FG
39
40 if mount_path is None:
41 mount_path = "/"
42
eafe8130
TL
43 opts = 'name={id},norequire_active_mds,conf={conf}'.format(id=self.client_id,
44 conf=self.config_path)
7c673cae
FG
45
46 if mount_fs_name is not None:
47 opts += ",mds_namespace={0}".format(mount_fs_name)
48
9f95a23c
TL
49 for mount_opt in mount_options :
50 opts += ",{0}".format(mount_opt)
51
7c673cae
FG
52 self.client_remote.run(
53 args=[
54 'sudo',
55 'adjust-ulimits',
56 'ceph-coverage',
57 '{tdir}/archive/coverage'.format(tdir=self.test_dir),
eafe8130
TL
58 '/bin/mount',
59 '-t',
60 'ceph',
61 ':{mount_path}'.format(mount_path=mount_path),
7c673cae
FG
62 self.mountpoint,
63 '-v',
64 '-o',
65 opts
66 ],
f64942e4 67 timeout=(30*60),
7c673cae
FG
68 )
69
70 self.client_remote.run(
f64942e4 71 args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(5*60))
7c673cae
FG
72
73 self.mounted = True
74
75 def umount(self, force=False):
1911f103
TL
76 if not self.is_mounted():
77 return
78
7c673cae
FG
79 log.debug('Unmounting client client.{id}...'.format(id=self.client_id))
80
81 cmd=['sudo', 'umount', self.mountpoint]
82 if force:
83 cmd.append('-f')
84
c07f9fc5 85 try:
f64942e4 86 self.client_remote.run(args=cmd, timeout=(15*60))
c07f9fc5
FG
87 except Exception as e:
88 self.client_remote.run(args=[
89 'sudo',
90 run.Raw('PATH=/usr/sbin:$PATH'),
91 'lsof',
92 run.Raw(';'),
93 'ps', 'auxf',
f64942e4 94 ], timeout=(15*60))
c07f9fc5 95 raise e
7c673cae
FG
96
97 rproc = self.client_remote.run(
98 args=[
99 'rmdir',
100 '--',
101 self.mountpoint,
102 ],
103 wait=False
104 )
105 run.wait([rproc], UMOUNT_TIMEOUT)
106 self.mounted = False
107
108 def cleanup(self):
109 pass
110
28e407b8 111 def umount_wait(self, force=False, require_clean=False, timeout=900):
7c673cae
FG
112 """
113 Unlike the fuse client, the kernel client's umount is immediate
114 """
115 if not self.is_mounted():
116 return
117
118 try:
119 self.umount(force)
120 except (CommandFailedError, MaxWhileTries):
121 if not force:
122 raise
123
124 self.kill()
125 self.kill_cleanup()
126
127 self.mounted = False
128
129 def is_mounted(self):
130 return self.mounted
131
132 def wait_until_mounted(self):
133 """
134 Unlike the fuse client, the kernel client is up and running as soon
135 as the initial mount() function returns.
136 """
137 assert self.mounted
138
139 def teardown(self):
140 super(KernelMount, self).teardown()
141 if self.mounted:
142 self.umount()
143
144 def kill(self):
145 """
146 The Ceph kernel client doesn't have a mechanism to kill itself (doing
147 that in side the kernel would be weird anyway), so we reboot the whole node
148 to get the same effect.
149
150 We use IPMI to reboot, because we don't want the client to send any
151 releases of capabilities.
152 """
153
154 con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
155 self.ipmi_user,
156 self.ipmi_password,
157 self.ipmi_domain)
eafe8130 158 con.hard_reset(wait_for_login=False)
7c673cae
FG
159
160 self.mounted = False
161
162 def kill_cleanup(self):
163 assert not self.mounted
164
eafe8130
TL
165 # We need to do a sleep here because we don't know how long it will
166 # take for a hard_reset to be effected.
167 time.sleep(30)
168
169 try:
170 # Wait for node to come back up after reboot
171 misc.reconnect(None, 300, [self.client_remote])
172 except:
173 # attempt to get some useful debug output:
174 con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
175 self.ipmi_user,
176 self.ipmi_password,
177 self.ipmi_domain)
178 con.check_status(timeout=60)
179 raise
7c673cae 180
eafe8130
TL
181 # Remove mount directory
182 self.client_remote.run(args=['uptime'], timeout=10)
7c673cae
FG
183
184 # Remove mount directory
185 self.client_remote.run(
186 args=[
187 'rmdir',
188 '--',
189 self.mountpoint,
190 ],
f64942e4 191 timeout=(5*60),
9f95a23c 192 check_status=False,
7c673cae
FG
193 )
194
195 def _find_debug_dir(self):
196 """
197 Find the debugfs folder for this mount
198 """
199 pyscript = dedent("""
200 import glob
201 import os
202 import json
203
204 def get_id_to_dir():
205 result = {}
206 for dir in glob.glob("/sys/kernel/debug/ceph/*"):
207 mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines()
208 client_id = mds_sessions_lines[1].split()[1].strip('"')
209
210 result[client_id] = dir
211 return result
212
9f95a23c 213 print(json.dumps(get_id_to_dir()))
7c673cae
FG
214 """)
215
e306af50 216 output = self.client_remote.sh([
9f95a23c 217 'sudo', 'python3', '-c', pyscript
e306af50
TL
218 ], timeout=(5*60))
219 client_id_to_dir = json.loads(output)
7c673cae
FG
220
221 try:
222 return client_id_to_dir[self.client_id]
223 except KeyError:
224 log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format(
225 self.client_id, ",".join(client_id_to_dir.keys())
226 ))
227 raise
228
229 def _read_debug_file(self, filename):
230 debug_dir = self._find_debug_dir()
231
232 pyscript = dedent("""
233 import os
234
9f95a23c 235 print(open(os.path.join("{debug_dir}", "{filename}")).read())
7c673cae
FG
236 """).format(debug_dir=debug_dir, filename=filename)
237
e306af50 238 output = self.client_remote.sh([
9f95a23c 239 'sudo', 'python3', '-c', pyscript
e306af50
TL
240 ], timeout=(5*60))
241 return output
7c673cae
FG
242
243 def get_global_id(self):
244 """
245 Look up the CephFS client ID for this mount, using debugfs.
246 """
247
248 assert self.mounted
249
250 mds_sessions = self._read_debug_file("mds_sessions")
251 lines = mds_sessions.split("\n")
252 return int(lines[0].split()[1])
253
254 def get_osd_epoch(self):
255 """
256 Return 2-tuple of osd_epoch, osd_epoch_barrier
257 """
258 osd_map = self._read_debug_file("osdmap")
259 lines = osd_map.split("\n")
260 first_line_tokens = lines[0].split()
261 epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3])
262
263 return epoch, barrier