]>
Commit | Line | Data |
---|---|---|
2a845540 | 1 | import errno |
7c673cae FG |
2 | import json |
3 | import logging | |
522d829b | 4 | import os |
f67539c2 TL |
5 | import re |
6 | ||
7 | from io import StringIO | |
7c673cae | 8 | from textwrap import dedent |
7c673cae | 9 | |
20effc67 | 10 | from teuthology.exceptions import CommandFailedError |
7c673cae FG |
11 | from teuthology.orchestra import run |
12 | from teuthology.contextutil import MaxWhileTries | |
f67539c2 | 13 | |
e306af50 | 14 | from tasks.cephfs.mount import CephFSMount |
7c673cae FG |
15 | |
16 | log = logging.getLogger(__name__) | |
17 | ||
18 | ||
19 | UMOUNT_TIMEOUT = 300 | |
20effc67 TL |
20 | # internal metadata directory |
21 | DEBUGFS_META_DIR = 'meta' | |
7c673cae FG |
22 | |
23 | class KernelMount(CephFSMount): | |
eafe8130 | 24 | def __init__(self, ctx, test_dir, client_id, client_remote, |
f67539c2 | 25 | client_keyring_path=None, hostfs_mntpt=None, |
20effc67 TL |
26 | cephfs_name=None, cephfs_mntpt=None, brxnet=None, |
27 | client_config={}): | |
f67539c2 TL |
28 | super(KernelMount, self).__init__(ctx=ctx, test_dir=test_dir, |
29 | client_id=client_id, client_remote=client_remote, | |
30 | client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt, | |
31 | cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet) | |
32 | ||
20effc67 TL |
33 | self.client_config = client_config |
34 | self.dynamic_debug = client_config.get('dynamic_debug', False) | |
35 | self.rbytes = client_config.get('rbytes', False) | |
2a845540 | 36 | self.snapdirname = client_config.get('snapdirname', '.snap') |
20effc67 | 37 | self.syntax_style = client_config.get('syntax', 'v2') |
f67539c2 TL |
38 | self.inst = None |
39 | self.addr = None | |
20effc67 TL |
40 | self._mount_bin = ['adjust-ulimits', 'ceph-coverage', self.test_dir +\ |
41 | '/archive/coverage', '/bin/mount', '-t', 'ceph'] | |
f67539c2 | 42 | |
20effc67 | 43 | def mount(self, mntopts=[], check_status=True, **kwargs): |
f67539c2 TL |
44 | self.update_attrs(**kwargs) |
45 | self.assert_and_log_minimum_mount_details() | |
46 | ||
47 | self.setup_netns() | |
48 | ||
f67539c2 TL |
49 | if not self.cephfs_mntpt: |
50 | self.cephfs_mntpt = '/' | |
20effc67 TL |
51 | if not self.cephfs_name: |
52 | self.cephfs_name = 'cephfs' | |
f67539c2 | 53 | |
20effc67 | 54 | self._create_mntpt() |
f67539c2 TL |
55 | |
56 | retval = self._run_mount_cmd(mntopts, check_status) | |
57 | if retval: | |
58 | return retval | |
59 | ||
20effc67 | 60 | self._set_filemode_on_mntpt() |
7c673cae | 61 | |
20effc67 TL |
62 | if self.dynamic_debug: |
63 | kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}', 0) | |
64 | if kmount_count == 0: | |
65 | self.enable_dynamic_debug() | |
66 | self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count + 1 | |
7c673cae FG |
67 | |
68 | self.mounted = True | |
69 | ||
f67539c2 | 70 | def _run_mount_cmd(self, mntopts, check_status): |
20effc67 | 71 | mount_cmd = self._get_mount_cmd(mntopts) |
f67539c2 | 72 | mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO() |
20effc67 | 73 | |
f67539c2 | 74 | try: |
20effc67 | 75 | self.client_remote.run(args=mount_cmd, timeout=(30*60), |
f67539c2 | 76 | stdout=mountcmd_stdout, |
20effc67 | 77 | stderr=mountcmd_stderr, omit_sudo=False) |
f67539c2 TL |
78 | except CommandFailedError as e: |
79 | log.info('mount command failed') | |
80 | if check_status: | |
81 | raise | |
82 | else: | |
83 | return (e, mountcmd_stdout.getvalue(), | |
84 | mountcmd_stderr.getvalue()) | |
85 | log.info('mount command passed') | |
86 | ||
20effc67 TL |
87 | def _make_mount_cmd_old_or_new_style(self): |
88 | optd = {} | |
89 | mnt_stx = '' | |
90 | if self.syntax_style == 'v1': | |
91 | mnt_stx = f':{self.cephfs_mntpt}' | |
92 | if self.client_id: | |
93 | optd['name'] = self.client_id | |
94 | if self.cephfs_name: | |
95 | optd['mds_namespace'] = self.cephfs_name | |
96 | elif self.syntax_style == 'v2': | |
97 | mnt_stx = f'{self.client_id}@.{self.cephfs_name}={self.cephfs_mntpt}' | |
98 | else: | |
99 | assert 0, f'invalid syntax style: {self.syntax_style}' | |
100 | return (mnt_stx, optd) | |
101 | ||
102 | def _get_mount_cmd(self, mntopts): | |
103 | opts = 'norequire_active_mds' | |
104 | if self.client_keyring_path and self.client_id: | |
105 | opts += ',secret=' + self.get_key_from_keyfile() | |
106 | if self.config_path: | |
107 | opts += ',conf=' + self.config_path | |
108 | if self.rbytes: | |
109 | opts += ",rbytes" | |
110 | else: | |
111 | opts += ",norbytes" | |
2a845540 TL |
112 | if self.snapdirname != '.snap': |
113 | opts += f',snapdirname={self.snapdirname}' | |
20effc67 TL |
114 | |
115 | mount_cmd = ['sudo'] + self._nsenter_args | |
116 | stx_opt = self._make_mount_cmd_old_or_new_style() | |
117 | for opt_name, opt_val in stx_opt[1].items(): | |
118 | opts += f',{opt_name}={opt_val}' | |
119 | if mntopts: | |
120 | opts += ',' + ','.join(mntopts) | |
121 | log.info(f'mounting using device: {stx_opt[0]}') | |
122 | # do not fall-back to old-style mount (catch new-style | |
123 | # mount syntax bugs in the kernel). exclude this config | |
124 | # when using v1-style syntax, since old mount helpers | |
125 | # (pre-quincy) would pass this option to the kernel. | |
126 | if self.syntax_style != 'v1': | |
127 | opts += ",nofallback" | |
128 | mount_cmd += self._mount_bin + [stx_opt[0], self.hostfs_mntpt, '-v', | |
129 | '-o', opts] | |
130 | return mount_cmd | |
131 | ||
7c673cae | 132 | def umount(self, force=False): |
1911f103 | 133 | if not self.is_mounted(): |
f67539c2 | 134 | self.cleanup() |
1911f103 TL |
135 | return |
136 | ||
7c673cae FG |
137 | log.debug('Unmounting client client.{id}...'.format(id=self.client_id)) |
138 | ||
c07f9fc5 | 139 | try: |
f67539c2 TL |
140 | cmd=['sudo', 'umount', self.hostfs_mntpt] |
141 | if force: | |
142 | cmd.append('-f') | |
143 | self.client_remote.run(args=cmd, timeout=(15*60), omit_sudo=False) | |
c07f9fc5 | 144 | except Exception as e: |
20effc67 | 145 | log.debug('Killing processes on client.{id}...'.format(id=self.client_id)) |
f67539c2 TL |
146 | self.client_remote.run( |
147 | args=['sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof', | |
148 | run.Raw(';'), 'ps', 'auxf'], | |
149 | timeout=(15*60), omit_sudo=False) | |
c07f9fc5 | 150 | raise e |
7c673cae | 151 | |
20effc67 TL |
152 | if self.dynamic_debug: |
153 | kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}') | |
154 | assert kmount_count | |
155 | if kmount_count == 1: | |
156 | self.disable_dynamic_debug() | |
157 | self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count - 1 | |
158 | ||
7c673cae | 159 | self.mounted = False |
f67539c2 | 160 | self.cleanup() |
7c673cae | 161 | |
28e407b8 | 162 | def umount_wait(self, force=False, require_clean=False, timeout=900): |
7c673cae FG |
163 | """ |
164 | Unlike the fuse client, the kernel client's umount is immediate | |
165 | """ | |
166 | if not self.is_mounted(): | |
f67539c2 | 167 | self.cleanup() |
7c673cae FG |
168 | return |
169 | ||
170 | try: | |
171 | self.umount(force) | |
172 | except (CommandFailedError, MaxWhileTries): | |
173 | if not force: | |
174 | raise | |
175 | ||
f67539c2 | 176 | # force delete the netns and umount |
20effc67 | 177 | log.debug('Force/lazy unmounting on client.{id}...'.format(id=self.client_id)) |
f67539c2 TL |
178 | self.client_remote.run(args=['sudo', 'umount', '-f', '-l', |
179 | self.mountpoint], | |
180 | timeout=(15*60), omit_sudo=False) | |
7c673cae | 181 | |
f67539c2 TL |
182 | self.mounted = False |
183 | self.cleanup() | |
7c673cae FG |
184 | |
185 | def wait_until_mounted(self): | |
186 | """ | |
187 | Unlike the fuse client, the kernel client is up and running as soon | |
188 | as the initial mount() function returns. | |
189 | """ | |
190 | assert self.mounted | |
191 | ||
192 | def teardown(self): | |
193 | super(KernelMount, self).teardown() | |
194 | if self.mounted: | |
195 | self.umount() | |
196 | ||
522d829b | 197 | def _get_debug_dir(self): |
7c673cae | 198 | """ |
522d829b | 199 | Get the debugfs folder for this mount |
7c673cae | 200 | """ |
7c673cae | 201 | |
522d829b TL |
202 | cluster_name = 'ceph' |
203 | fsid = self.ctx.ceph[cluster_name].fsid | |
7c673cae | 204 | |
522d829b | 205 | global_id = self._get_global_id() |
7c673cae | 206 | |
522d829b | 207 | return os.path.join("/sys/kernel/debug/ceph/", f"{fsid}.client{global_id}") |
7c673cae | 208 | |
f67539c2 TL |
209 | def read_debug_file(self, filename): |
210 | """ | |
211 | Read the debug file "filename", return None if the file doesn't exist. | |
212 | """ | |
7c673cae | 213 | |
522d829b | 214 | path = os.path.join(self._get_debug_dir(), filename) |
7c673cae | 215 | |
522d829b | 216 | stdout = StringIO() |
f67539c2 TL |
217 | stderr = StringIO() |
218 | try: | |
2a845540 TL |
219 | self.run_shell_payload(f"sudo dd if={path}", timeout=(5 * 60), |
220 | stdout=stdout, stderr=stderr) | |
522d829b | 221 | return stdout.getvalue() |
f67539c2 TL |
222 | except CommandFailedError: |
223 | if 'no such file or directory' in stderr.getvalue().lower(): | |
2a845540 TL |
224 | return errno.ENOENT |
225 | elif 'not a directory' in stderr.getvalue().lower(): | |
226 | return errno.ENOTDIR | |
227 | elif 'permission denied' in stderr.getvalue().lower(): | |
228 | return errno.EACCES | |
f67539c2 | 229 | raise |
7c673cae | 230 | |
522d829b TL |
231 | def _get_global_id(self): |
232 | try: | |
233 | p = self.run_shell_payload("getfattr --only-values -n ceph.client_id .", stdout=StringIO()) | |
234 | v = p.stdout.getvalue() | |
235 | prefix = "client" | |
236 | assert v.startswith(prefix) | |
237 | return int(v[len(prefix):]) | |
238 | except CommandFailedError: | |
239 | # Probably this fallback can be deleted in a few releases when the kernel xattr is widely available. | |
240 | log.debug("Falling back to messy global_id lookup via /sys...") | |
241 | ||
242 | pyscript = dedent(""" | |
243 | import glob | |
244 | import os | |
245 | import json | |
246 | ||
247 | def get_id_to_dir(): | |
248 | result = {} | |
249 | for dir in glob.glob("/sys/kernel/debug/ceph/*"): | |
20effc67 TL |
250 | if os.path.basename(dir) == DEBUGFS_META_DIR: |
251 | continue | |
522d829b TL |
252 | mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines() |
253 | global_id = mds_sessions_lines[0].split()[1].strip('"') | |
254 | client_id = mds_sessions_lines[1].split()[1].strip('"') | |
255 | result[client_id] = global_id | |
256 | return result | |
257 | print(json.dumps(get_id_to_dir())) | |
258 | """) | |
259 | ||
260 | output = self.client_remote.sh([ | |
261 | 'sudo', 'python3', '-c', pyscript | |
262 | ], timeout=(5*60)) | |
263 | client_id_to_global_id = json.loads(output) | |
264 | ||
265 | try: | |
266 | return client_id_to_global_id[self.client_id] | |
267 | except KeyError: | |
268 | log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format( | |
269 | self.client_id, ",".join(client_id_to_global_id.keys()) | |
270 | )) | |
271 | raise | |
272 | ||
20effc67 TL |
273 | def _dynamic_debug_control(self, enable): |
274 | """ | |
275 | Write to dynamic debug control file. | |
276 | """ | |
277 | if enable: | |
278 | fdata = "module ceph +p" | |
279 | else: | |
280 | fdata = "module ceph -p" | |
281 | ||
282 | self.run_shell_payload(f""" | |
283 | sudo modprobe ceph | |
284 | echo '{fdata}' | sudo tee /sys/kernel/debug/dynamic_debug/control | |
285 | """) | |
286 | ||
287 | def enable_dynamic_debug(self): | |
288 | """ | |
289 | Enable the dynamic debug. | |
290 | """ | |
291 | self._dynamic_debug_control(True) | |
292 | ||
293 | def disable_dynamic_debug(self): | |
294 | """ | |
295 | Disable the dynamic debug. | |
296 | """ | |
297 | self._dynamic_debug_control(False) | |
298 | ||
7c673cae FG |
299 | def get_global_id(self): |
300 | """ | |
301 | Look up the CephFS client ID for this mount, using debugfs. | |
302 | """ | |
303 | ||
304 | assert self.mounted | |
305 | ||
522d829b | 306 | return self._get_global_id() |
7c673cae | 307 | |
f67539c2 TL |
308 | @property |
309 | def _global_addr(self): | |
310 | if self.addr is not None: | |
311 | return self.addr | |
312 | ||
313 | # The first line of the "status" file's output will be something | |
314 | # like: | |
315 | # "instance: client.4297 (0)10.72.47.117:0/1148470933" | |
316 | # What we need here is only the string "10.72.47.117:0/1148470933" | |
317 | status = self.read_debug_file("status") | |
318 | if status is None: | |
319 | return None | |
320 | ||
321 | instance = re.findall(r'instance:.*', status)[0] | |
322 | self.addr = instance.split()[2].split(')')[1] | |
323 | return self.addr; | |
324 | ||
325 | @property | |
326 | def _global_inst(self): | |
327 | if self.inst is not None: | |
328 | return self.inst | |
329 | ||
330 | client_gid = "client%d" % self.get_global_id() | |
331 | self.inst = " ".join([client_gid, self._global_addr]) | |
332 | return self.inst | |
333 | ||
334 | def get_global_inst(self): | |
335 | """ | |
336 | Look up the CephFS client instance for this mount | |
337 | """ | |
338 | return self._global_inst | |
339 | ||
340 | def get_global_addr(self): | |
341 | """ | |
342 | Look up the CephFS client addr for this mount | |
343 | """ | |
344 | return self._global_addr | |
345 | ||
7c673cae FG |
346 | def get_osd_epoch(self): |
347 | """ | |
348 | Return 2-tuple of osd_epoch, osd_epoch_barrier | |
349 | """ | |
f67539c2 TL |
350 | osd_map = self.read_debug_file("osdmap") |
351 | assert osd_map | |
352 | ||
7c673cae FG |
353 | lines = osd_map.split("\n") |
354 | first_line_tokens = lines[0].split() | |
355 | epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3]) | |
356 | ||
357 | return epoch, barrier | |
f67539c2 TL |
358 | |
359 | def get_op_read_count(self): | |
2a845540 TL |
360 | stdout = StringIO() |
361 | stderr = StringIO() | |
362 | try: | |
363 | path = os.path.join(self._get_debug_dir(), "metrics/size") | |
364 | self.run_shell(f"sudo stat {path}", stdout=stdout, | |
365 | stderr=stderr, cwd=None) | |
366 | buf = self.read_debug_file("metrics/size") | |
367 | except CommandFailedError: | |
368 | if 'no such file or directory' in stderr.getvalue().lower() \ | |
369 | or 'not a directory' in stderr.getvalue().lower(): | |
370 | try: | |
371 | path = os.path.join(self._get_debug_dir(), "metrics") | |
372 | self.run_shell(f"sudo stat {path}", stdout=stdout, | |
373 | stderr=stderr, cwd=None) | |
374 | buf = self.read_debug_file("metrics") | |
375 | except CommandFailedError: | |
376 | return errno.ENOENT | |
377 | else: | |
378 | return 0 | |
379 | return int(re.findall(r'read.*', buf)[0].split()[1]) |