]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/fs.py
bump version to 18.2.4-pve3
[ceph.git] / ceph / qa / tasks / fs.py
CommitLineData
11fdf7f2
TL
1"""
2CephFS sub-tasks.
3"""
4
11fdf7f2
TL
5import logging
6import re
11fdf7f2 7
f67539c2 8from tasks.cephfs.filesystem import Filesystem, MDSCluster
11fdf7f2
TL
9
10log = logging.getLogger(__name__)
11
a4b75251
TL
12# Everything up to CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
13CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1<<5)
1e59de90 14CEPH_MDSMAP_NOT_JOINABLE = (1 << 0)
a4b75251
TL
15CEPH_MDSMAP_LAST = CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
16UPGRADE_FLAGS_MASK = ((CEPH_MDSMAP_LAST<<1) - 1)
17def pre_upgrade_save(ctx, config):
18 """
19 That the upgrade procedure doesn't clobber state: save state.
20 """
21
22 mdsc = MDSCluster(ctx)
23 status = mdsc.status()
24
25 state = {}
26 ctx['mds-upgrade-state'] = state
27
28 for fs in list(status.get_filesystems()):
29 fscid = fs['id']
30 mdsmap = fs['mdsmap']
31 fs_state = {}
32 fs_state['epoch'] = mdsmap['epoch']
33 fs_state['max_mds'] = mdsmap['max_mds']
34 fs_state['flags'] = mdsmap['flags'] & UPGRADE_FLAGS_MASK
35 state[fscid] = fs_state
36 log.debug(f"fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}")
37
38
39def post_upgrade_checks(ctx, config):
40 """
41 That the upgrade procedure doesn't clobber state.
42 """
43
44 state = ctx['mds-upgrade-state']
45
46 mdsc = MDSCluster(ctx)
47 status = mdsc.status()
48
49 for fs in list(status.get_filesystems()):
50 fscid = fs['id']
51 mdsmap = fs['mdsmap']
52 fs_state = state[fscid]
53 log.debug(f"checking fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}")
54
55 # check state was restored to previous values
56 assert fs_state['max_mds'] == mdsmap['max_mds']
57 assert fs_state['flags'] == (mdsmap['flags'] & UPGRADE_FLAGS_MASK)
58
59 # now confirm that the upgrade procedure was followed
60 epoch = mdsmap['epoch']
61 pre_upgrade_epoch = fs_state['epoch']
62 assert pre_upgrade_epoch < epoch
1e59de90 63 multiple_max_mds = fs_state['max_mds'] > 1
a4b75251
TL
64 did_decrease_max_mds = False
65 should_disable_allow_standby_replay = fs_state['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
66 did_disable_allow_standby_replay = False
1e59de90 67 did_fail_fs = False
a4b75251
TL
68 for i in range(pre_upgrade_epoch+1, mdsmap['epoch']):
69 old_status = mdsc.status(epoch=i)
70 old_fs = old_status.get_fsmap(fscid)
71 old_mdsmap = old_fs['mdsmap']
1e59de90
TL
72 if not multiple_max_mds \
73 and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE):
74 raise RuntimeError('mgr is failing fs when there is only one '
75 f'rank in epoch {i}.')
76 if multiple_max_mds \
77 and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE) \
78 and old_mdsmap['max_mds'] == 1:
79 raise RuntimeError('mgr is failing fs as well the max_mds '
80 f'is reduced in epoch {i}')
81 if old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE:
82 log.debug(f"max_mds not reduced in epoch {i} as fs was failed "
83 "for carrying out rapid multi-rank mds upgrade")
84 did_fail_fs = True
85 if multiple_max_mds and old_mdsmap['max_mds'] == 1:
a4b75251
TL
86 log.debug(f"max_mds reduced in epoch {i}")
87 did_decrease_max_mds = True
88 if should_disable_allow_standby_replay and not (old_mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY):
89 log.debug(f"allow_standby_replay disabled in epoch {i}")
90 did_disable_allow_standby_replay = True
1e59de90 91 assert not multiple_max_mds or did_fail_fs or did_decrease_max_mds
a4b75251
TL
92 assert not should_disable_allow_standby_replay or did_disable_allow_standby_replay
93
94
f67539c2
TL
95def ready(ctx, config):
96 """
97 That the file system is ready for clients.
98 """
99
100 if config is None:
101 config = {}
102 assert isinstance(config, dict), \
103 'task only accepts a dict for configuration'
104
105 timeout = config.get('timeout', 300)
106
107 mdsc = MDSCluster(ctx)
108 status = mdsc.status()
109
110 for filesystem in status.get_filesystems():
111 fs = Filesystem(ctx, fscid=filesystem['id'])
112 fs.wait_for_daemons(timeout=timeout, status=status)
113
11fdf7f2
TL
114def clients_evicted(ctx, config):
115 """
116 Check clients are evicted, unmount (cleanup) if so.
117 """
118
119 if config is None:
120 config = {}
121 assert isinstance(config, dict), \
122 'task only accepts a dict for configuration'
123
124 clients = config.get('clients')
125
126 if clients is None:
127 clients = {("client."+client_id): True for client_id in ctx.mounts}
128
129 log.info("clients is {}".format(str(clients)))
130
131 fs = Filesystem(ctx)
132 status = fs.status()
133
134 has_session = set()
135 mounts = {}
136 for client in clients:
137 client_id = re.match("^client.([0-9]+)$", client).groups(1)[0]
138 mounts[client] = ctx.mounts.get(client_id)
139
140 for rank in fs.get_ranks(status=status):
141 ls = fs.rank_asok(['session', 'ls'], rank=rank['rank'], status=status)
142 for session in ls:
f67539c2 143 for client, evicted in clients.items():
11fdf7f2
TL
144 mount = mounts.get(client)
145 if mount is not None:
146 global_id = mount.get_global_id()
147 if session['id'] == global_id:
148 if evicted:
149 raise RuntimeError("client still has session: {}".format(str(session)))
150 else:
151 log.info("client {} has a session with MDS {}.{}".format(client, fs.id, rank['rank']))
152 has_session.add(client)
153
154 no_session = set(clients) - has_session
155 should_assert = False
f67539c2 156 for client, evicted in clients.items():
11fdf7f2
TL
157 mount = mounts.get(client)
158 if mount is not None:
159 if evicted:
f67539c2
TL
160 log.info("confirming client {} is blocklisted".format(client))
161 assert fs.is_addr_blocklisted(mount.get_global_addr())
11fdf7f2
TL
162 elif client in no_session:
163 log.info("client {} should not be evicted but has no session with an MDS".format(client))
f67539c2 164 fs.is_addr_blocklisted(mount.get_global_addr()) # for debugging
11fdf7f2
TL
165 should_assert = True
166 if should_assert:
167 raise RuntimeError("some clients which should not be evicted have no session with an MDS?")