]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | """ |
2 | CephFS sub-tasks. | |
3 | """ | |
4 | ||
11fdf7f2 TL |
5 | import logging |
6 | import re | |
11fdf7f2 | 7 | |
f67539c2 | 8 | from tasks.cephfs.filesystem import Filesystem, MDSCluster |
11fdf7f2 TL |
9 | |
10 | log = logging.getLogger(__name__) | |
11 | ||
a4b75251 TL |
12 | # Everything up to CEPH_MDSMAP_ALLOW_STANDBY_REPLAY |
13 | CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1<<5) | |
1e59de90 | 14 | CEPH_MDSMAP_NOT_JOINABLE = (1 << 0) |
a4b75251 TL |
15 | CEPH_MDSMAP_LAST = CEPH_MDSMAP_ALLOW_STANDBY_REPLAY |
16 | UPGRADE_FLAGS_MASK = ((CEPH_MDSMAP_LAST<<1) - 1) | |
17 | def pre_upgrade_save(ctx, config): | |
18 | """ | |
19 | That the upgrade procedure doesn't clobber state: save state. | |
20 | """ | |
21 | ||
22 | mdsc = MDSCluster(ctx) | |
23 | status = mdsc.status() | |
24 | ||
25 | state = {} | |
26 | ctx['mds-upgrade-state'] = state | |
27 | ||
28 | for fs in list(status.get_filesystems()): | |
29 | fscid = fs['id'] | |
30 | mdsmap = fs['mdsmap'] | |
31 | fs_state = {} | |
32 | fs_state['epoch'] = mdsmap['epoch'] | |
33 | fs_state['max_mds'] = mdsmap['max_mds'] | |
34 | fs_state['flags'] = mdsmap['flags'] & UPGRADE_FLAGS_MASK | |
35 | state[fscid] = fs_state | |
36 | log.debug(f"fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}") | |
37 | ||
38 | ||
39 | def post_upgrade_checks(ctx, config): | |
40 | """ | |
41 | That the upgrade procedure doesn't clobber state. | |
42 | """ | |
43 | ||
44 | state = ctx['mds-upgrade-state'] | |
45 | ||
46 | mdsc = MDSCluster(ctx) | |
47 | status = mdsc.status() | |
48 | ||
49 | for fs in list(status.get_filesystems()): | |
50 | fscid = fs['id'] | |
51 | mdsmap = fs['mdsmap'] | |
52 | fs_state = state[fscid] | |
53 | log.debug(f"checking fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}") | |
54 | ||
55 | # check state was restored to previous values | |
56 | assert fs_state['max_mds'] == mdsmap['max_mds'] | |
57 | assert fs_state['flags'] == (mdsmap['flags'] & UPGRADE_FLAGS_MASK) | |
58 | ||
59 | # now confirm that the upgrade procedure was followed | |
60 | epoch = mdsmap['epoch'] | |
61 | pre_upgrade_epoch = fs_state['epoch'] | |
62 | assert pre_upgrade_epoch < epoch | |
1e59de90 | 63 | multiple_max_mds = fs_state['max_mds'] > 1 |
a4b75251 TL |
64 | did_decrease_max_mds = False |
65 | should_disable_allow_standby_replay = fs_state['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY | |
66 | did_disable_allow_standby_replay = False | |
1e59de90 | 67 | did_fail_fs = False |
a4b75251 TL |
68 | for i in range(pre_upgrade_epoch+1, mdsmap['epoch']): |
69 | old_status = mdsc.status(epoch=i) | |
70 | old_fs = old_status.get_fsmap(fscid) | |
71 | old_mdsmap = old_fs['mdsmap'] | |
1e59de90 TL |
72 | if not multiple_max_mds \ |
73 | and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE): | |
74 | raise RuntimeError('mgr is failing fs when there is only one ' | |
75 | f'rank in epoch {i}.') | |
76 | if multiple_max_mds \ | |
77 | and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE) \ | |
78 | and old_mdsmap['max_mds'] == 1: | |
79 | raise RuntimeError('mgr is failing fs as well the max_mds ' | |
80 | f'is reduced in epoch {i}') | |
81 | if old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE: | |
82 | log.debug(f"max_mds not reduced in epoch {i} as fs was failed " | |
83 | "for carrying out rapid multi-rank mds upgrade") | |
84 | did_fail_fs = True | |
85 | if multiple_max_mds and old_mdsmap['max_mds'] == 1: | |
a4b75251 TL |
86 | log.debug(f"max_mds reduced in epoch {i}") |
87 | did_decrease_max_mds = True | |
88 | if should_disable_allow_standby_replay and not (old_mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY): | |
89 | log.debug(f"allow_standby_replay disabled in epoch {i}") | |
90 | did_disable_allow_standby_replay = True | |
1e59de90 | 91 | assert not multiple_max_mds or did_fail_fs or did_decrease_max_mds |
a4b75251 TL |
92 | assert not should_disable_allow_standby_replay or did_disable_allow_standby_replay |
93 | ||
94 | ||
f67539c2 TL |
95 | def ready(ctx, config): |
96 | """ | |
97 | That the file system is ready for clients. | |
98 | """ | |
99 | ||
100 | if config is None: | |
101 | config = {} | |
102 | assert isinstance(config, dict), \ | |
103 | 'task only accepts a dict for configuration' | |
104 | ||
105 | timeout = config.get('timeout', 300) | |
106 | ||
107 | mdsc = MDSCluster(ctx) | |
108 | status = mdsc.status() | |
109 | ||
110 | for filesystem in status.get_filesystems(): | |
111 | fs = Filesystem(ctx, fscid=filesystem['id']) | |
112 | fs.wait_for_daemons(timeout=timeout, status=status) | |
113 | ||
11fdf7f2 TL |
114 | def clients_evicted(ctx, config): |
115 | """ | |
116 | Check clients are evicted, unmount (cleanup) if so. | |
117 | """ | |
118 | ||
119 | if config is None: | |
120 | config = {} | |
121 | assert isinstance(config, dict), \ | |
122 | 'task only accepts a dict for configuration' | |
123 | ||
124 | clients = config.get('clients') | |
125 | ||
126 | if clients is None: | |
127 | clients = {("client."+client_id): True for client_id in ctx.mounts} | |
128 | ||
129 | log.info("clients is {}".format(str(clients))) | |
130 | ||
131 | fs = Filesystem(ctx) | |
132 | status = fs.status() | |
133 | ||
134 | has_session = set() | |
135 | mounts = {} | |
136 | for client in clients: | |
137 | client_id = re.match("^client.([0-9]+)$", client).groups(1)[0] | |
138 | mounts[client] = ctx.mounts.get(client_id) | |
139 | ||
140 | for rank in fs.get_ranks(status=status): | |
141 | ls = fs.rank_asok(['session', 'ls'], rank=rank['rank'], status=status) | |
142 | for session in ls: | |
f67539c2 | 143 | for client, evicted in clients.items(): |
11fdf7f2 TL |
144 | mount = mounts.get(client) |
145 | if mount is not None: | |
146 | global_id = mount.get_global_id() | |
147 | if session['id'] == global_id: | |
148 | if evicted: | |
149 | raise RuntimeError("client still has session: {}".format(str(session))) | |
150 | else: | |
151 | log.info("client {} has a session with MDS {}.{}".format(client, fs.id, rank['rank'])) | |
152 | has_session.add(client) | |
153 | ||
154 | no_session = set(clients) - has_session | |
155 | should_assert = False | |
f67539c2 | 156 | for client, evicted in clients.items(): |
11fdf7f2 TL |
157 | mount = mounts.get(client) |
158 | if mount is not None: | |
159 | if evicted: | |
f67539c2 TL |
160 | log.info("confirming client {} is blocklisted".format(client)) |
161 | assert fs.is_addr_blocklisted(mount.get_global_addr()) | |
11fdf7f2 TL |
162 | elif client in no_session: |
163 | log.info("client {} should not be evicted but has no session with an MDS".format(client)) | |
f67539c2 | 164 | fs.is_addr_blocklisted(mount.get_global_addr()) # for debugging |
11fdf7f2 TL |
165 | should_assert = True |
166 | if should_assert: | |
167 | raise RuntimeError("some clients which should not be evicted have no session with an MDS?") |