]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/kernel_mount.py
750b6b5335a4b7eafd8cf037496646a72ba6106d
[ceph.git] / ceph / qa / tasks / cephfs / kernel_mount.py
1 import errno
2 import json
3 import logging
4 import os
5 import re
6
7 from io import StringIO
8 from textwrap import dedent
9
10 from teuthology.exceptions import CommandFailedError
11 from teuthology.orchestra import run
12 from teuthology.contextutil import MaxWhileTries
13
14 from tasks.cephfs.mount import CephFSMount, UMOUNT_TIMEOUT
15
16 log = logging.getLogger(__name__)
17
18
19 # internal metadata directory
20 DEBUGFS_META_DIR = 'meta'
21
22 class KernelMount(CephFSMount):
23 def __init__(self, ctx, test_dir, client_id, client_remote,
24 client_keyring_path=None, hostfs_mntpt=None,
25 cephfs_name=None, cephfs_mntpt=None, brxnet=None,
26 client_config={}):
27 super(KernelMount, self).__init__(ctx=ctx, test_dir=test_dir,
28 client_id=client_id, client_remote=client_remote,
29 client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
30 cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet,
31 client_config=client_config)
32
33 if client_config.get('debug', False):
34 self.client_remote.run(args=["sudo", "bash", "-c", "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"])
35 self.client_remote.run(args=["sudo", "bash", "-c", "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"])
36
37 self.dynamic_debug = self.client_config.get('dynamic_debug', False)
38 self.rbytes = self.client_config.get('rbytes', False)
39 self.snapdirname = client_config.get('snapdirname', '.snap')
40 self.syntax_style = self.client_config.get('syntax', 'v2')
41 self.inst = None
42 self.addr = None
43 self._mount_bin = ['adjust-ulimits', 'ceph-coverage', self.test_dir +\
44 '/archive/coverage', '/bin/mount', '-t', 'ceph']
45
46 def mount(self, mntopts=None, check_status=True, **kwargs):
47 self.update_attrs(**kwargs)
48 self.assert_and_log_minimum_mount_details()
49
50 self.setup_netns()
51
52 if not self.cephfs_mntpt:
53 self.cephfs_mntpt = '/'
54 if not self.cephfs_name:
55 self.cephfs_name = 'cephfs'
56
57 self._create_mntpt()
58
59 retval = self._run_mount_cmd(mntopts, check_status)
60 if retval:
61 return retval
62
63 self._set_filemode_on_mntpt()
64
65 if self.dynamic_debug:
66 kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}', 0)
67 if kmount_count == 0:
68 self.enable_dynamic_debug()
69 self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count + 1
70
71 self.gather_mount_info()
72
73 def gather_mount_info(self):
74 self.id = self._get_global_id()
75 self.get_global_inst()
76 self.get_global_addr()
77
78 def _run_mount_cmd(self, mntopts, check_status):
79 mount_cmd = self._get_mount_cmd(mntopts)
80 mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO()
81
82 try:
83 self.client_remote.run(args=mount_cmd, timeout=300,
84 stdout=mountcmd_stdout,
85 stderr=mountcmd_stderr, omit_sudo=False)
86 except CommandFailedError as e:
87 log.info('mount command failed')
88 if check_status:
89 raise
90 else:
91 return (e, mountcmd_stdout.getvalue(),
92 mountcmd_stderr.getvalue())
93 log.info('mount command passed')
94
95 def _make_mount_cmd_old_or_new_style(self):
96 optd = {}
97 mnt_stx = ''
98
99 self.validate_subvol_options()
100
101 assert(self.cephfs_mntpt)
102 if self.syntax_style == 'v1':
103 mnt_stx = f':{self.cephfs_mntpt}'
104 if self.client_id:
105 optd['name'] = self.client_id
106 if self.cephfs_name:
107 optd['mds_namespace'] = self.cephfs_name
108 elif self.syntax_style == 'v2':
109 mnt_stx = f'{self.client_id}@.{self.cephfs_name}={self.cephfs_mntpt}'
110 else:
111 assert 0, f'invalid syntax style: {self.syntax_style}'
112 return (mnt_stx, optd)
113
114 def _get_mount_cmd(self, mntopts):
115 opts = 'norequire_active_mds'
116 if self.client_keyring_path and self.client_id:
117 opts += ',secret=' + self.get_key_from_keyfile()
118 if self.config_path:
119 opts += ',conf=' + self.config_path
120 if self.rbytes:
121 opts += ",rbytes"
122 else:
123 opts += ",norbytes"
124 if self.snapdirname != '.snap':
125 opts += f',snapdirname={self.snapdirname}'
126
127 mount_cmd = ['sudo'] + self._nsenter_args
128 stx_opt = self._make_mount_cmd_old_or_new_style()
129 for opt_name, opt_val in stx_opt[1].items():
130 opts += f',{opt_name}={opt_val}'
131 if mntopts:
132 opts += ',' + ','.join(mntopts)
133 log.info(f'mounting using device: {stx_opt[0]}')
134 # do not fall-back to old-style mount (catch new-style
135 # mount syntax bugs in the kernel). exclude this config
136 # when using v1-style syntax, since old mount helpers
137 # (pre-quincy) would pass this option to the kernel.
138 if self.syntax_style != 'v1':
139 opts += ",nofallback"
140 mount_cmd += self._mount_bin + [stx_opt[0], self.hostfs_mntpt, '-v',
141 '-o', opts]
142 return mount_cmd
143
144 def umount(self, force=False):
145 if not self.is_mounted():
146 self.cleanup()
147 return
148
149 if self.is_blocked():
150 self._run_umount_lf()
151 self.cleanup()
152 return
153
154 log.debug('Unmounting client client.{id}...'.format(id=self.client_id))
155
156 try:
157 cmd=['sudo', 'umount', self.hostfs_mntpt]
158 if force:
159 cmd.append('-f')
160 self.client_remote.run(args=cmd, timeout=UMOUNT_TIMEOUT, omit_sudo=False)
161 except Exception as e:
162 log.debug('Killing processes on client.{id}...'.format(id=self.client_id))
163 self.client_remote.run(
164 args=['sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof',
165 run.Raw(';'), 'ps', 'auxf'],
166 timeout=UMOUNT_TIMEOUT, omit_sudo=False)
167 raise e
168
169 if self.dynamic_debug:
170 kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}')
171 assert kmount_count
172 if kmount_count == 1:
173 self.disable_dynamic_debug()
174 self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count - 1
175
176 self.cleanup()
177
178 def umount_wait(self, force=False, require_clean=False,
179 timeout=UMOUNT_TIMEOUT):
180 """
181 Unlike the fuse client, the kernel client's umount is immediate
182 """
183 if not self.is_mounted():
184 self.cleanup()
185 return
186
187 try:
188 self.umount(force)
189 except (CommandFailedError, MaxWhileTries):
190 if not force:
191 raise
192
193 # force delete the netns and umount
194 self._run_umount_lf()
195 self.cleanup()
196
197 def wait_until_mounted(self):
198 """
199 Unlike the fuse client, the kernel client is up and running as soon
200 as the initial mount() function returns.
201 """
202 assert self.is_mounted()
203
204 def teardown(self):
205 super(KernelMount, self).teardown()
206 if self.is_mounted():
207 self.umount()
208
209 def _get_debug_dir(self):
210 """
211 Get the debugfs folder for this mount
212 """
213
214 cluster_name = 'ceph'
215 fsid = self.ctx.ceph[cluster_name].fsid
216
217 global_id = self._get_global_id()
218
219 return os.path.join("/sys/kernel/debug/ceph/", f"{fsid}.client{global_id}")
220
221 def read_debug_file(self, filename):
222 """
223 Read the debug file "filename", return None if the file doesn't exist.
224 """
225
226 path = os.path.join(self._get_debug_dir(), filename)
227
228 stdout = StringIO()
229 stderr = StringIO()
230 try:
231 self.run_shell_payload(f"sudo dd if={path}", timeout=(5 * 60),
232 stdout=stdout, stderr=stderr)
233 return stdout.getvalue()
234 except CommandFailedError:
235 if 'no such file or directory' in stderr.getvalue().lower():
236 return errno.ENOENT
237 elif 'not a directory' in stderr.getvalue().lower():
238 return errno.ENOTDIR
239 elif 'permission denied' in stderr.getvalue().lower():
240 return errno.EACCES
241 raise
242
243 def _get_global_id(self):
244 try:
245 p = self.run_shell_payload("getfattr --only-values -n ceph.client_id .", stdout=StringIO())
246 v = p.stdout.getvalue()
247 prefix = "client"
248 assert v.startswith(prefix)
249 return int(v[len(prefix):])
250 except CommandFailedError:
251 # Probably this fallback can be deleted in a few releases when the kernel xattr is widely available.
252 log.debug("Falling back to messy global_id lookup via /sys...")
253
254 pyscript = dedent("""
255 import glob
256 import os
257 import json
258
259 def get_id_to_dir():
260 result = {}
261 for dir in glob.glob("/sys/kernel/debug/ceph/*"):
262 if os.path.basename(dir) == DEBUGFS_META_DIR:
263 continue
264 mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines()
265 global_id = mds_sessions_lines[0].split()[1].strip('"')
266 client_id = mds_sessions_lines[1].split()[1].strip('"')
267 result[client_id] = global_id
268 return result
269 print(json.dumps(get_id_to_dir()))
270 """)
271
272 output = self.client_remote.sh([
273 'sudo', 'python3', '-c', pyscript
274 ], timeout=(5*60))
275 client_id_to_global_id = json.loads(output)
276
277 try:
278 return client_id_to_global_id[self.client_id]
279 except KeyError:
280 log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format(
281 self.client_id, ",".join(client_id_to_global_id.keys())
282 ))
283 raise
284
285 def _dynamic_debug_control(self, enable):
286 """
287 Write to dynamic debug control file.
288 """
289 if enable:
290 fdata = "module ceph +p"
291 else:
292 fdata = "module ceph -p"
293
294 self.run_shell_payload(f"""
295 sudo modprobe ceph
296 echo '{fdata}' | sudo tee /sys/kernel/debug/dynamic_debug/control
297 """)
298
299 def enable_dynamic_debug(self):
300 """
301 Enable the dynamic debug.
302 """
303 self._dynamic_debug_control(True)
304
305 def disable_dynamic_debug(self):
306 """
307 Disable the dynamic debug.
308 """
309 self._dynamic_debug_control(False)
310
311 def get_global_id(self):
312 """
313 Look up the CephFS client ID for this mount, using debugfs.
314 """
315
316 assert self.is_mounted()
317
318 return self._get_global_id()
319
320 @property
321 def _global_addr(self):
322 if self.addr is not None:
323 return self.addr
324
325 # The first line of the "status" file's output will be something
326 # like:
327 # "instance: client.4297 (0)10.72.47.117:0/1148470933"
328 # What we need here is only the string "10.72.47.117:0/1148470933"
329 status = self.read_debug_file("status")
330 if status is None:
331 return None
332
333 instance = re.findall(r'instance:.*', status)[0]
334 self.addr = instance.split()[2].split(')')[1]
335 return self.addr;
336
337 @property
338 def _global_inst(self):
339 if self.inst is not None:
340 return self.inst
341
342 client_gid = "client%d" % self.get_global_id()
343 self.inst = " ".join([client_gid, self._global_addr])
344 return self.inst
345
346 def get_global_inst(self):
347 """
348 Look up the CephFS client instance for this mount
349 """
350 return self._global_inst
351
352 def get_global_addr(self):
353 """
354 Look up the CephFS client addr for this mount
355 """
356 return self._global_addr
357
358 def get_osd_epoch(self):
359 """
360 Return 2-tuple of osd_epoch, osd_epoch_barrier
361 """
362 osd_map = self.read_debug_file("osdmap")
363 assert osd_map
364
365 lines = osd_map.split("\n")
366 first_line_tokens = lines[0].split()
367 epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3])
368
369 return epoch, barrier
370
371 def get_op_read_count(self):
372 stdout = StringIO()
373 stderr = StringIO()
374 try:
375 path = os.path.join(self._get_debug_dir(), "metrics/size")
376 self.run_shell(f"sudo stat {path}", stdout=stdout,
377 stderr=stderr, cwd=None)
378 buf = self.read_debug_file("metrics/size")
379 except CommandFailedError:
380 if 'no such file or directory' in stderr.getvalue().lower() \
381 or 'not a directory' in stderr.getvalue().lower():
382 try:
383 path = os.path.join(self._get_debug_dir(), "metrics")
384 self.run_shell(f"sudo stat {path}", stdout=stdout,
385 stderr=stderr, cwd=None)
386 buf = self.read_debug_file("metrics")
387 except CommandFailedError:
388 return errno.ENOENT
389 else:
390 return 0
391 return int(re.findall(r'read.*', buf)[0].split()[1])