]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/kernel_mount.py
import 15.2.4
[ceph.git] / ceph / qa / tasks / cephfs / kernel_mount.py
1 import json
2 import logging
3 import time
4 from textwrap import dedent
5 from teuthology.orchestra.run import CommandFailedError
6 from teuthology import misc
7
8 from teuthology.orchestra import remote as orchestra_remote
9 from teuthology.orchestra import run
10 from teuthology.contextutil import MaxWhileTries
11 from tasks.cephfs.mount import CephFSMount
12
13 log = logging.getLogger(__name__)
14
15
16 UMOUNT_TIMEOUT = 300
17
18
19 class KernelMount(CephFSMount):
20 def __init__(self, ctx, test_dir, client_id, client_remote,
21 ipmi_user, ipmi_password, ipmi_domain):
22 super(KernelMount, self).__init__(ctx, test_dir, client_id, client_remote)
23
24 self.mounted = False
25 self.ipmi_user = ipmi_user
26 self.ipmi_password = ipmi_password
27 self.ipmi_domain = ipmi_domain
28
29 def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None, mount_options=[]):
30 if mountpoint is not None:
31 self.mountpoint = mountpoint
32 self.setupfs(name=mount_fs_name)
33
34 log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format(
35 id=self.client_id, remote=self.client_remote, mnt=self.mountpoint))
36
37 self.client_remote.run(args=['mkdir', '-p', self.mountpoint],
38 timeout=(5*60))
39
40 if mount_path is None:
41 mount_path = "/"
42
43 opts = 'name={id},norequire_active_mds,conf={conf}'.format(id=self.client_id,
44 conf=self.config_path)
45
46 if mount_fs_name is not None:
47 opts += ",mds_namespace={0}".format(mount_fs_name)
48
49 for mount_opt in mount_options :
50 opts += ",{0}".format(mount_opt)
51
52 self.client_remote.run(
53 args=[
54 'sudo',
55 'adjust-ulimits',
56 'ceph-coverage',
57 '{tdir}/archive/coverage'.format(tdir=self.test_dir),
58 '/bin/mount',
59 '-t',
60 'ceph',
61 ':{mount_path}'.format(mount_path=mount_path),
62 self.mountpoint,
63 '-v',
64 '-o',
65 opts
66 ],
67 timeout=(30*60),
68 )
69
70 self.client_remote.run(
71 args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(5*60))
72
73 self.mounted = True
74
75 def umount(self, force=False):
76 if not self.is_mounted():
77 return
78
79 log.debug('Unmounting client client.{id}...'.format(id=self.client_id))
80
81 cmd=['sudo', 'umount', self.mountpoint]
82 if force:
83 cmd.append('-f')
84
85 try:
86 self.client_remote.run(args=cmd, timeout=(15*60))
87 except Exception as e:
88 self.client_remote.run(args=[
89 'sudo',
90 run.Raw('PATH=/usr/sbin:$PATH'),
91 'lsof',
92 run.Raw(';'),
93 'ps', 'auxf',
94 ], timeout=(15*60))
95 raise e
96
97 rproc = self.client_remote.run(
98 args=[
99 'rmdir',
100 '--',
101 self.mountpoint,
102 ],
103 wait=False
104 )
105 run.wait([rproc], UMOUNT_TIMEOUT)
106 self.mounted = False
107
108 def cleanup(self):
109 pass
110
111 def umount_wait(self, force=False, require_clean=False, timeout=900):
112 """
113 Unlike the fuse client, the kernel client's umount is immediate
114 """
115 if not self.is_mounted():
116 return
117
118 try:
119 self.umount(force)
120 except (CommandFailedError, MaxWhileTries):
121 if not force:
122 raise
123
124 self.kill()
125 self.kill_cleanup()
126
127 self.mounted = False
128
129 def is_mounted(self):
130 return self.mounted
131
132 def wait_until_mounted(self):
133 """
134 Unlike the fuse client, the kernel client is up and running as soon
135 as the initial mount() function returns.
136 """
137 assert self.mounted
138
139 def teardown(self):
140 super(KernelMount, self).teardown()
141 if self.mounted:
142 self.umount()
143
144 def kill(self):
145 """
146 The Ceph kernel client doesn't have a mechanism to kill itself (doing
147 that in side the kernel would be weird anyway), so we reboot the whole node
148 to get the same effect.
149
150 We use IPMI to reboot, because we don't want the client to send any
151 releases of capabilities.
152 """
153
154 con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
155 self.ipmi_user,
156 self.ipmi_password,
157 self.ipmi_domain)
158 con.hard_reset(wait_for_login=False)
159
160 self.mounted = False
161
162 def kill_cleanup(self):
163 assert not self.mounted
164
165 # We need to do a sleep here because we don't know how long it will
166 # take for a hard_reset to be effected.
167 time.sleep(30)
168
169 try:
170 # Wait for node to come back up after reboot
171 misc.reconnect(None, 300, [self.client_remote])
172 except:
173 # attempt to get some useful debug output:
174 con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
175 self.ipmi_user,
176 self.ipmi_password,
177 self.ipmi_domain)
178 con.check_status(timeout=60)
179 raise
180
181 # Remove mount directory
182 self.client_remote.run(args=['uptime'], timeout=10)
183
184 # Remove mount directory
185 self.client_remote.run(
186 args=[
187 'rmdir',
188 '--',
189 self.mountpoint,
190 ],
191 timeout=(5*60),
192 check_status=False,
193 )
194
195 def _find_debug_dir(self):
196 """
197 Find the debugfs folder for this mount
198 """
199 pyscript = dedent("""
200 import glob
201 import os
202 import json
203
204 def get_id_to_dir():
205 result = {}
206 for dir in glob.glob("/sys/kernel/debug/ceph/*"):
207 mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines()
208 client_id = mds_sessions_lines[1].split()[1].strip('"')
209
210 result[client_id] = dir
211 return result
212
213 print(json.dumps(get_id_to_dir()))
214 """)
215
216 output = self.client_remote.sh([
217 'sudo', 'python3', '-c', pyscript
218 ], timeout=(5*60))
219 client_id_to_dir = json.loads(output)
220
221 try:
222 return client_id_to_dir[self.client_id]
223 except KeyError:
224 log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format(
225 self.client_id, ",".join(client_id_to_dir.keys())
226 ))
227 raise
228
229 def _read_debug_file(self, filename):
230 debug_dir = self._find_debug_dir()
231
232 pyscript = dedent("""
233 import os
234
235 print(open(os.path.join("{debug_dir}", "{filename}")).read())
236 """).format(debug_dir=debug_dir, filename=filename)
237
238 output = self.client_remote.sh([
239 'sudo', 'python3', '-c', pyscript
240 ], timeout=(5*60))
241 return output
242
243 def get_global_id(self):
244 """
245 Look up the CephFS client ID for this mount, using debugfs.
246 """
247
248 assert self.mounted
249
250 mds_sessions = self._read_debug_file("mds_sessions")
251 lines = mds_sessions.split("\n")
252 return int(lines[0].split()[1])
253
254 def get_osd_epoch(self):
255 """
256 Return 2-tuple of osd_epoch, osd_epoch_barrier
257 """
258 osd_map = self._read_debug_file("osdmap")
259 lines = osd_map.split("\n")
260 first_line_tokens = lines[0].split()
261 epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3])
262
263 return epoch, barrier