]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/kernel_mount.py
import ceph quincy 17.2.4
[ceph.git] / ceph / qa / tasks / cephfs / kernel_mount.py
CommitLineData
2a845540 1import errno
7c673cae
FG
2import json
3import logging
522d829b 4import os
f67539c2
TL
5import re
6
7from io import StringIO
7c673cae 8from textwrap import dedent
7c673cae 9
20effc67 10from teuthology.exceptions import CommandFailedError
7c673cae
FG
11from teuthology.orchestra import run
12from teuthology.contextutil import MaxWhileTries
f67539c2 13
e306af50 14from tasks.cephfs.mount import CephFSMount
7c673cae
FG
15
16log = logging.getLogger(__name__)
17
18
19UMOUNT_TIMEOUT = 300
20effc67
TL
20# internal metadata directory
21DEBUGFS_META_DIR = 'meta'
7c673cae
FG
22
23class KernelMount(CephFSMount):
eafe8130 24 def __init__(self, ctx, test_dir, client_id, client_remote,
f67539c2 25 client_keyring_path=None, hostfs_mntpt=None,
20effc67
TL
26 cephfs_name=None, cephfs_mntpt=None, brxnet=None,
27 client_config={}):
f67539c2
TL
28 super(KernelMount, self).__init__(ctx=ctx, test_dir=test_dir,
29 client_id=client_id, client_remote=client_remote,
30 client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
31 cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet)
32
20effc67
TL
33 self.client_config = client_config
34 self.dynamic_debug = client_config.get('dynamic_debug', False)
35 self.rbytes = client_config.get('rbytes', False)
2a845540 36 self.snapdirname = client_config.get('snapdirname', '.snap')
20effc67 37 self.syntax_style = client_config.get('syntax', 'v2')
f67539c2
TL
38 self.inst = None
39 self.addr = None
20effc67
TL
40 self._mount_bin = ['adjust-ulimits', 'ceph-coverage', self.test_dir +\
41 '/archive/coverage', '/bin/mount', '-t', 'ceph']
f67539c2 42
20effc67 43 def mount(self, mntopts=[], check_status=True, **kwargs):
f67539c2
TL
44 self.update_attrs(**kwargs)
45 self.assert_and_log_minimum_mount_details()
46
47 self.setup_netns()
48
f67539c2
TL
49 if not self.cephfs_mntpt:
50 self.cephfs_mntpt = '/'
20effc67
TL
51 if not self.cephfs_name:
52 self.cephfs_name = 'cephfs'
f67539c2 53
20effc67 54 self._create_mntpt()
f67539c2
TL
55
56 retval = self._run_mount_cmd(mntopts, check_status)
57 if retval:
58 return retval
59
20effc67 60 self._set_filemode_on_mntpt()
7c673cae 61
20effc67
TL
62 if self.dynamic_debug:
63 kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}', 0)
64 if kmount_count == 0:
65 self.enable_dynamic_debug()
66 self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count + 1
7c673cae
FG
67
68 self.mounted = True
69
f67539c2 70 def _run_mount_cmd(self, mntopts, check_status):
20effc67 71 mount_cmd = self._get_mount_cmd(mntopts)
f67539c2 72 mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO()
20effc67 73
f67539c2 74 try:
20effc67 75 self.client_remote.run(args=mount_cmd, timeout=(30*60),
f67539c2 76 stdout=mountcmd_stdout,
20effc67 77 stderr=mountcmd_stderr, omit_sudo=False)
f67539c2
TL
78 except CommandFailedError as e:
79 log.info('mount command failed')
80 if check_status:
81 raise
82 else:
83 return (e, mountcmd_stdout.getvalue(),
84 mountcmd_stderr.getvalue())
85 log.info('mount command passed')
86
20effc67
TL
87 def _make_mount_cmd_old_or_new_style(self):
88 optd = {}
89 mnt_stx = ''
90 if self.syntax_style == 'v1':
91 mnt_stx = f':{self.cephfs_mntpt}'
92 if self.client_id:
93 optd['name'] = self.client_id
94 if self.cephfs_name:
95 optd['mds_namespace'] = self.cephfs_name
96 elif self.syntax_style == 'v2':
97 mnt_stx = f'{self.client_id}@.{self.cephfs_name}={self.cephfs_mntpt}'
98 else:
99 assert 0, f'invalid syntax style: {self.syntax_style}'
100 return (mnt_stx, optd)
101
102 def _get_mount_cmd(self, mntopts):
103 opts = 'norequire_active_mds'
104 if self.client_keyring_path and self.client_id:
105 opts += ',secret=' + self.get_key_from_keyfile()
106 if self.config_path:
107 opts += ',conf=' + self.config_path
108 if self.rbytes:
109 opts += ",rbytes"
110 else:
111 opts += ",norbytes"
2a845540
TL
112 if self.snapdirname != '.snap':
113 opts += f',snapdirname={self.snapdirname}'
20effc67
TL
114
115 mount_cmd = ['sudo'] + self._nsenter_args
116 stx_opt = self._make_mount_cmd_old_or_new_style()
117 for opt_name, opt_val in stx_opt[1].items():
118 opts += f',{opt_name}={opt_val}'
119 if mntopts:
120 opts += ',' + ','.join(mntopts)
121 log.info(f'mounting using device: {stx_opt[0]}')
122 # do not fall-back to old-style mount (catch new-style
123 # mount syntax bugs in the kernel). exclude this config
124 # when using v1-style syntax, since old mount helpers
125 # (pre-quincy) would pass this option to the kernel.
126 if self.syntax_style != 'v1':
127 opts += ",nofallback"
128 mount_cmd += self._mount_bin + [stx_opt[0], self.hostfs_mntpt, '-v',
129 '-o', opts]
130 return mount_cmd
131
7c673cae 132 def umount(self, force=False):
1911f103 133 if not self.is_mounted():
f67539c2 134 self.cleanup()
1911f103
TL
135 return
136
7c673cae
FG
137 log.debug('Unmounting client client.{id}...'.format(id=self.client_id))
138
c07f9fc5 139 try:
f67539c2
TL
140 cmd=['sudo', 'umount', self.hostfs_mntpt]
141 if force:
142 cmd.append('-f')
143 self.client_remote.run(args=cmd, timeout=(15*60), omit_sudo=False)
c07f9fc5 144 except Exception as e:
20effc67 145 log.debug('Killing processes on client.{id}...'.format(id=self.client_id))
f67539c2
TL
146 self.client_remote.run(
147 args=['sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof',
148 run.Raw(';'), 'ps', 'auxf'],
149 timeout=(15*60), omit_sudo=False)
c07f9fc5 150 raise e
7c673cae 151
20effc67
TL
152 if self.dynamic_debug:
153 kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}')
154 assert kmount_count
155 if kmount_count == 1:
156 self.disable_dynamic_debug()
157 self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count - 1
158
7c673cae 159 self.mounted = False
f67539c2 160 self.cleanup()
7c673cae 161
28e407b8 162 def umount_wait(self, force=False, require_clean=False, timeout=900):
7c673cae
FG
163 """
164 Unlike the fuse client, the kernel client's umount is immediate
165 """
166 if not self.is_mounted():
f67539c2 167 self.cleanup()
7c673cae
FG
168 return
169
170 try:
171 self.umount(force)
172 except (CommandFailedError, MaxWhileTries):
173 if not force:
174 raise
175
f67539c2 176 # force delete the netns and umount
20effc67 177 log.debug('Force/lazy unmounting on client.{id}...'.format(id=self.client_id))
f67539c2
TL
178 self.client_remote.run(args=['sudo', 'umount', '-f', '-l',
179 self.mountpoint],
180 timeout=(15*60), omit_sudo=False)
7c673cae 181
f67539c2
TL
182 self.mounted = False
183 self.cleanup()
7c673cae
FG
184
185 def wait_until_mounted(self):
186 """
187 Unlike the fuse client, the kernel client is up and running as soon
188 as the initial mount() function returns.
189 """
190 assert self.mounted
191
192 def teardown(self):
193 super(KernelMount, self).teardown()
194 if self.mounted:
195 self.umount()
196
522d829b 197 def _get_debug_dir(self):
7c673cae 198 """
522d829b 199 Get the debugfs folder for this mount
7c673cae 200 """
7c673cae 201
522d829b
TL
202 cluster_name = 'ceph'
203 fsid = self.ctx.ceph[cluster_name].fsid
7c673cae 204
522d829b 205 global_id = self._get_global_id()
7c673cae 206
522d829b 207 return os.path.join("/sys/kernel/debug/ceph/", f"{fsid}.client{global_id}")
7c673cae 208
f67539c2
TL
209 def read_debug_file(self, filename):
210 """
211 Read the debug file "filename", return None if the file doesn't exist.
212 """
7c673cae 213
522d829b 214 path = os.path.join(self._get_debug_dir(), filename)
7c673cae 215
522d829b 216 stdout = StringIO()
f67539c2
TL
217 stderr = StringIO()
218 try:
2a845540
TL
219 self.run_shell_payload(f"sudo dd if={path}", timeout=(5 * 60),
220 stdout=stdout, stderr=stderr)
522d829b 221 return stdout.getvalue()
f67539c2
TL
222 except CommandFailedError:
223 if 'no such file or directory' in stderr.getvalue().lower():
2a845540
TL
224 return errno.ENOENT
225 elif 'not a directory' in stderr.getvalue().lower():
226 return errno.ENOTDIR
227 elif 'permission denied' in stderr.getvalue().lower():
228 return errno.EACCES
f67539c2 229 raise
7c673cae 230
522d829b
TL
231 def _get_global_id(self):
232 try:
233 p = self.run_shell_payload("getfattr --only-values -n ceph.client_id .", stdout=StringIO())
234 v = p.stdout.getvalue()
235 prefix = "client"
236 assert v.startswith(prefix)
237 return int(v[len(prefix):])
238 except CommandFailedError:
239 # Probably this fallback can be deleted in a few releases when the kernel xattr is widely available.
240 log.debug("Falling back to messy global_id lookup via /sys...")
241
242 pyscript = dedent("""
243 import glob
244 import os
245 import json
246
247 def get_id_to_dir():
248 result = {}
249 for dir in glob.glob("/sys/kernel/debug/ceph/*"):
20effc67
TL
250 if os.path.basename(dir) == DEBUGFS_META_DIR:
251 continue
522d829b
TL
252 mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines()
253 global_id = mds_sessions_lines[0].split()[1].strip('"')
254 client_id = mds_sessions_lines[1].split()[1].strip('"')
255 result[client_id] = global_id
256 return result
257 print(json.dumps(get_id_to_dir()))
258 """)
259
260 output = self.client_remote.sh([
261 'sudo', 'python3', '-c', pyscript
262 ], timeout=(5*60))
263 client_id_to_global_id = json.loads(output)
264
265 try:
266 return client_id_to_global_id[self.client_id]
267 except KeyError:
268 log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format(
269 self.client_id, ",".join(client_id_to_global_id.keys())
270 ))
271 raise
272
20effc67
TL
273 def _dynamic_debug_control(self, enable):
274 """
275 Write to dynamic debug control file.
276 """
277 if enable:
278 fdata = "module ceph +p"
279 else:
280 fdata = "module ceph -p"
281
282 self.run_shell_payload(f"""
283sudo modprobe ceph
284echo '{fdata}' | sudo tee /sys/kernel/debug/dynamic_debug/control
285""")
286
287 def enable_dynamic_debug(self):
288 """
289 Enable the dynamic debug.
290 """
291 self._dynamic_debug_control(True)
292
293 def disable_dynamic_debug(self):
294 """
295 Disable the dynamic debug.
296 """
297 self._dynamic_debug_control(False)
298
7c673cae
FG
299 def get_global_id(self):
300 """
301 Look up the CephFS client ID for this mount, using debugfs.
302 """
303
304 assert self.mounted
305
522d829b 306 return self._get_global_id()
7c673cae 307
f67539c2
TL
308 @property
309 def _global_addr(self):
310 if self.addr is not None:
311 return self.addr
312
313 # The first line of the "status" file's output will be something
314 # like:
315 # "instance: client.4297 (0)10.72.47.117:0/1148470933"
316 # What we need here is only the string "10.72.47.117:0/1148470933"
317 status = self.read_debug_file("status")
318 if status is None:
319 return None
320
321 instance = re.findall(r'instance:.*', status)[0]
322 self.addr = instance.split()[2].split(')')[1]
323 return self.addr;
324
325 @property
326 def _global_inst(self):
327 if self.inst is not None:
328 return self.inst
329
330 client_gid = "client%d" % self.get_global_id()
331 self.inst = " ".join([client_gid, self._global_addr])
332 return self.inst
333
334 def get_global_inst(self):
335 """
336 Look up the CephFS client instance for this mount
337 """
338 return self._global_inst
339
340 def get_global_addr(self):
341 """
342 Look up the CephFS client addr for this mount
343 """
344 return self._global_addr
345
7c673cae
FG
346 def get_osd_epoch(self):
347 """
348 Return 2-tuple of osd_epoch, osd_epoch_barrier
349 """
f67539c2
TL
350 osd_map = self.read_debug_file("osdmap")
351 assert osd_map
352
7c673cae
FG
353 lines = osd_map.split("\n")
354 first_line_tokens = lines[0].split()
355 epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3])
356
357 return epoch, barrier
f67539c2
TL
358
359 def get_op_read_count(self):
2a845540
TL
360 stdout = StringIO()
361 stderr = StringIO()
362 try:
363 path = os.path.join(self._get_debug_dir(), "metrics/size")
364 self.run_shell(f"sudo stat {path}", stdout=stdout,
365 stderr=stderr, cwd=None)
366 buf = self.read_debug_file("metrics/size")
367 except CommandFailedError:
368 if 'no such file or directory' in stderr.getvalue().lower() \
369 or 'not a directory' in stderr.getvalue().lower():
370 try:
371 path = os.path.join(self._get_debug_dir(), "metrics")
372 self.run_shell(f"sudo stat {path}", stdout=stdout,
373 stderr=stderr, cwd=None)
374 buf = self.read_debug_file("metrics")
375 except CommandFailedError:
376 return errno.ENOENT
377 else:
378 return 0
379 return int(re.findall(r'read.*', buf)[0].split()[1])