]>
Commit | Line | Data |
---|---|---|
9cc6fc50 DH |
1 | /* Handle fileserver selection and rotation. |
2 | * | |
3 | * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. | |
4 | * Written by David Howells (dhowells@redhat.com) | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public Licence | |
8 | * as published by the Free Software Foundation; either version | |
9 | * 2 of the Licence, or (at your option) any later version. | |
10 | */ | |
11 | ||
12 | #include <linux/kernel.h> | |
13 | #include <linux/slab.h> | |
d2ddc776 DH |
14 | #include <linux/fs.h> |
15 | #include <linux/sched.h> | |
16 | #include <linux/delay.h> | |
17 | #include <linux/sched/signal.h> | |
9cc6fc50 | 18 | #include "internal.h" |
d2ddc776 | 19 | #include "afs_fs.h" |
9cc6fc50 DH |
20 | |
21 | /* | |
22 | * Initialise a filesystem server cursor for iterating over FS servers. | |
23 | */ | |
fe342cf7 | 24 | static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) |
9cc6fc50 DH |
25 | { |
26 | memset(fc, 0, sizeof(*fc)); | |
27 | } | |
28 | ||
d2ddc776 DH |
29 | /* |
30 | * Begin an operation on the fileserver. | |
31 | * | |
32 | * Fileserver operations are serialised on the server by vnode, so we serialise | |
33 | * them here also using the io_lock. | |
34 | */ | |
35 | bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, | |
36 | struct key *key) | |
37 | { | |
38 | afs_init_fs_cursor(fc, vnode); | |
39 | fc->vnode = vnode; | |
40 | fc->key = key; | |
41 | fc->ac.error = SHRT_MAX; | |
e7f680f4 | 42 | fc->error = -EDESTADDRREQ; |
d2ddc776 DH |
43 | |
44 | if (mutex_lock_interruptible(&vnode->io_lock) < 0) { | |
e7f680f4 | 45 | fc->error = -EINTR; |
d2ddc776 DH |
46 | fc->flags |= AFS_FS_CURSOR_STOP; |
47 | return false; | |
48 | } | |
49 | ||
0fafdc9f | 50 | if (vnode->lock_state != AFS_VNODE_LOCK_NONE) |
d2ddc776 DH |
51 | fc->flags |= AFS_FS_CURSOR_CUR_ONLY; |
52 | return true; | |
53 | } | |
54 | ||
55 | /* | |
56 | * Begin iteration through a server list, starting with the vnode's last used | |
57 | * server if possible, or the last recorded good server if not. | |
58 | */ | |
59 | static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, | |
60 | struct afs_vnode *vnode) | |
61 | { | |
62 | struct afs_cb_interest *cbi; | |
63 | int i; | |
64 | ||
65 | read_lock(&vnode->volume->servers_lock); | |
66 | fc->server_list = afs_get_serverlist(vnode->volume->servers); | |
67 | read_unlock(&vnode->volume->servers_lock); | |
68 | ||
69 | cbi = vnode->cb_interest; | |
70 | if (cbi) { | |
71 | /* See if the vnode's preferred record is still available */ | |
72 | for (i = 0; i < fc->server_list->nr_servers; i++) { | |
73 | if (fc->server_list->servers[i].cb_interest == cbi) { | |
74 | fc->start = i; | |
75 | goto found_interest; | |
76 | } | |
77 | } | |
78 | ||
79 | /* If we have a lock outstanding on a server that's no longer | |
80 | * serving this vnode, then we can't switch to another server | |
81 | * and have to return an error. | |
82 | */ | |
83 | if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { | |
e7f680f4 | 84 | fc->error = -ESTALE; |
d2ddc776 DH |
85 | return false; |
86 | } | |
87 | ||
88 | /* Note that the callback promise is effectively broken */ | |
89 | write_seqlock(&vnode->cb_lock); | |
90 | ASSERTCMP(cbi, ==, vnode->cb_interest); | |
91 | vnode->cb_interest = NULL; | |
92 | if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) | |
93 | vnode->cb_break++; | |
94 | write_sequnlock(&vnode->cb_lock); | |
95 | ||
96 | afs_put_cb_interest(afs_v2net(vnode), cbi); | |
97 | cbi = NULL; | |
98 | } else { | |
99 | fc->start = READ_ONCE(fc->server_list->index); | |
100 | } | |
101 | ||
102 | found_interest: | |
103 | fc->index = fc->start; | |
104 | return true; | |
105 | } | |
106 | ||
107 | /* | |
108 | * Post volume busy note. | |
109 | */ | |
110 | static void afs_busy(struct afs_volume *volume, u32 abort_code) | |
111 | { | |
112 | const char *m; | |
113 | ||
114 | switch (abort_code) { | |
115 | case VOFFLINE: m = "offline"; break; | |
116 | case VRESTARTING: m = "restarting"; break; | |
117 | case VSALVAGING: m = "being salvaged"; break; | |
118 | default: m = "busy"; break; | |
119 | } | |
0fafdc9f | 120 | |
d2ddc776 DH |
121 | pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m); |
122 | } | |
123 | ||
124 | /* | |
125 | * Sleep and retry the operation to the same fileserver. | |
126 | */ | |
127 | static bool afs_sleep_and_retry(struct afs_fs_cursor *fc) | |
128 | { | |
129 | msleep_interruptible(1000); | |
130 | if (signal_pending(current)) { | |
e7f680f4 | 131 | fc->error = -ERESTARTSYS; |
d2ddc776 DH |
132 | return false; |
133 | } | |
134 | ||
135 | return true; | |
136 | } | |
137 | ||
138 | /* | |
139 | * Select the fileserver to use. May be called multiple times to rotate | |
140 | * through the fileservers. | |
141 | */ | |
142 | bool afs_select_fileserver(struct afs_fs_cursor *fc) | |
143 | { | |
144 | struct afs_addr_list *alist; | |
145 | struct afs_server *server; | |
146 | struct afs_vnode *vnode = fc->vnode; | |
e7f680f4 | 147 | int error = fc->ac.error; |
d2ddc776 DH |
148 | |
149 | _enter("%u/%u,%u/%u,%d,%d", | |
150 | fc->index, fc->start, | |
151 | fc->ac.index, fc->ac.start, | |
e7f680f4 | 152 | error, fc->ac.abort_code); |
d2ddc776 DH |
153 | |
154 | if (fc->flags & AFS_FS_CURSOR_STOP) { | |
155 | _leave(" = f [stopped]"); | |
156 | return false; | |
157 | } | |
158 | ||
159 | /* Evaluate the result of the previous operation, if there was one. */ | |
e7f680f4 | 160 | switch (error) { |
d2ddc776 DH |
161 | case SHRT_MAX: |
162 | goto start; | |
163 | ||
164 | case 0: | |
165 | default: | |
166 | /* Success or local failure. Stop. */ | |
e7f680f4 | 167 | fc->error = error; |
d2ddc776 | 168 | fc->flags |= AFS_FS_CURSOR_STOP; |
e7f680f4 | 169 | _leave(" = f [okay/local %d]", error); |
d2ddc776 DH |
170 | return false; |
171 | ||
172 | case -ECONNABORTED: | |
173 | /* The far side rejected the operation on some grounds. This | |
174 | * might involve the server being busy or the volume having been moved. | |
175 | */ | |
176 | switch (fc->ac.abort_code) { | |
177 | case VNOVOL: | |
178 | /* This fileserver doesn't know about the volume. | |
179 | * - May indicate that the VL is wrong - retry once and compare | |
180 | * the results. | |
181 | * - May indicate that the fileserver couldn't attach to the vol. | |
182 | */ | |
183 | if (fc->flags & AFS_FS_CURSOR_VNOVOL) { | |
e7f680f4 | 184 | fc->error = -EREMOTEIO; |
3d9fa911 | 185 | goto next_server; |
d2ddc776 DH |
186 | } |
187 | ||
188 | write_lock(&vnode->volume->servers_lock); | |
189 | fc->server_list->vnovol_mask |= 1 << fc->index; | |
190 | write_unlock(&vnode->volume->servers_lock); | |
191 | ||
192 | set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); | |
e7f680f4 DH |
193 | error = afs_check_volume_status(vnode->volume, fc->key); |
194 | if (error < 0) | |
195 | goto failed_set_error; | |
d2ddc776 DH |
196 | |
197 | if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) { | |
e7f680f4 | 198 | fc->error = -ENOMEDIUM; |
d2ddc776 DH |
199 | goto failed; |
200 | } | |
201 | ||
202 | /* If the server list didn't change, then assume that | |
203 | * it's the fileserver having trouble. | |
204 | */ | |
205 | if (vnode->volume->servers == fc->server_list) { | |
e7f680f4 | 206 | fc->error = -EREMOTEIO; |
3d9fa911 | 207 | goto next_server; |
d2ddc776 DH |
208 | } |
209 | ||
210 | /* Try again */ | |
211 | fc->flags |= AFS_FS_CURSOR_VNOVOL; | |
212 | _leave(" = t [vnovol]"); | |
213 | return true; | |
214 | ||
215 | case VSALVAGE: /* TODO: Should this return an error or iterate? */ | |
216 | case VVOLEXISTS: | |
217 | case VNOSERVICE: | |
218 | case VONLINE: | |
219 | case VDISKFULL: | |
220 | case VOVERQUOTA: | |
e7f680f4 | 221 | fc->error = afs_abort_to_error(fc->ac.abort_code); |
d2ddc776 DH |
222 | goto next_server; |
223 | ||
224 | case VOFFLINE: | |
225 | if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) { | |
226 | afs_busy(vnode->volume, fc->ac.abort_code); | |
227 | clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); | |
228 | } | |
229 | if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { | |
e7f680f4 | 230 | fc->error = -EADV; |
d2ddc776 DH |
231 | goto failed; |
232 | } | |
233 | if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { | |
e7f680f4 | 234 | fc->error = -ESTALE; |
d2ddc776 DH |
235 | goto failed; |
236 | } | |
237 | goto busy; | |
238 | ||
239 | case VSALVAGING: | |
240 | case VRESTARTING: | |
241 | case VBUSY: | |
242 | /* Retry after going round all the servers unless we | |
243 | * have a file lock we need to maintain. | |
244 | */ | |
245 | if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { | |
e7f680f4 | 246 | fc->error = -EBUSY; |
d2ddc776 DH |
247 | goto failed; |
248 | } | |
249 | if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) { | |
250 | afs_busy(vnode->volume, fc->ac.abort_code); | |
251 | clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); | |
252 | } | |
253 | busy: | |
254 | if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { | |
255 | if (!afs_sleep_and_retry(fc)) | |
256 | goto failed; | |
257 | ||
258 | /* Retry with same server & address */ | |
259 | _leave(" = t [vbusy]"); | |
260 | return true; | |
261 | } | |
262 | ||
263 | fc->flags |= AFS_FS_CURSOR_VBUSY; | |
264 | goto next_server; | |
265 | ||
266 | case VMOVED: | |
267 | /* The volume migrated to another server. We consider | |
268 | * consider all locks and callbacks broken and request | |
269 | * an update from the VLDB. | |
270 | * | |
271 | * We also limit the number of VMOVED hops we will | |
272 | * honour, just in case someone sets up a loop. | |
273 | */ | |
274 | if (fc->flags & AFS_FS_CURSOR_VMOVED) { | |
e7f680f4 | 275 | fc->error = -EREMOTEIO; |
d2ddc776 DH |
276 | goto failed; |
277 | } | |
278 | fc->flags |= AFS_FS_CURSOR_VMOVED; | |
279 | ||
280 | set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags); | |
281 | set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); | |
e7f680f4 DH |
282 | error = afs_check_volume_status(vnode->volume, fc->key); |
283 | if (error < 0) | |
284 | goto failed_set_error; | |
d2ddc776 DH |
285 | |
286 | /* If the server list didn't change, then the VLDB is | |
287 | * out of sync with the fileservers. This is hopefully | |
288 | * a temporary condition, however, so we don't want to | |
289 | * permanently block access to the file. | |
290 | * | |
291 | * TODO: Try other fileservers if we can. | |
292 | * | |
293 | * TODO: Retry a few times with sleeps. | |
294 | */ | |
295 | if (vnode->volume->servers == fc->server_list) { | |
e7f680f4 | 296 | fc->error = -ENOMEDIUM; |
d2ddc776 DH |
297 | goto failed; |
298 | } | |
299 | ||
300 | goto restart_from_beginning; | |
301 | ||
302 | default: | |
303 | clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); | |
304 | clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); | |
e7f680f4 | 305 | fc->error = afs_abort_to_error(fc->ac.abort_code); |
d2ddc776 DH |
306 | goto failed; |
307 | } | |
308 | ||
e7f680f4 DH |
309 | case -ETIMEDOUT: |
310 | case -ETIME: | |
311 | if (fc->error != -EDESTADDRREQ) | |
312 | goto iterate_address; | |
313 | /* Fall through */ | |
d2ddc776 DH |
314 | case -ENETUNREACH: |
315 | case -EHOSTUNREACH: | |
316 | case -ECONNREFUSED: | |
d2ddc776 | 317 | _debug("no conn"); |
e7f680f4 | 318 | fc->error = error; |
d2ddc776 | 319 | goto iterate_address; |
1a025028 DH |
320 | |
321 | case -ECONNRESET: | |
322 | _debug("call reset"); | |
e7f680f4 | 323 | fc->error = error; |
1a025028 | 324 | goto failed; |
d2ddc776 DH |
325 | } |
326 | ||
327 | restart_from_beginning: | |
328 | _debug("restart"); | |
329 | afs_end_cursor(&fc->ac); | |
330 | afs_put_cb_interest(afs_v2net(vnode), fc->cbi); | |
331 | fc->cbi = NULL; | |
332 | afs_put_serverlist(afs_v2net(vnode), fc->server_list); | |
333 | fc->server_list = NULL; | |
334 | start: | |
335 | _debug("start"); | |
336 | /* See if we need to do an update of the volume record. Note that the | |
337 | * volume may have moved or even have been deleted. | |
338 | */ | |
e7f680f4 DH |
339 | error = afs_check_volume_status(vnode->volume, fc->key); |
340 | if (error < 0) | |
341 | goto failed_set_error; | |
d2ddc776 DH |
342 | |
343 | if (!afs_start_fs_iteration(fc, vnode)) | |
344 | goto failed; | |
d2ddc776 DH |
345 | |
346 | use_server: | |
347 | _debug("use"); | |
348 | /* We're starting on a different fileserver from the list. We need to | |
349 | * check it, create a callback intercept, find its address list and | |
350 | * probe its capabilities before we use it. | |
351 | */ | |
352 | ASSERTCMP(fc->ac.alist, ==, NULL); | |
353 | server = fc->server_list->servers[fc->index].server; | |
354 | ||
355 | if (!afs_check_server_record(fc, server)) | |
356 | goto failed; | |
357 | ||
358 | _debug("USING SERVER: %pU", &server->uuid); | |
359 | ||
360 | /* Make sure we've got a callback interest record for this server. We | |
361 | * have to link it in before we send the request as we can be sent a | |
362 | * break request before we've finished decoding the reply and | |
363 | * installing the vnode. | |
364 | */ | |
e7f680f4 DH |
365 | error = afs_register_server_cb_interest(vnode, fc->server_list, |
366 | fc->index); | |
367 | if (error < 0) | |
368 | goto failed_set_error; | |
d2ddc776 DH |
369 | |
370 | fc->cbi = afs_get_cb_interest(vnode->cb_interest); | |
371 | ||
372 | read_lock(&server->fs_lock); | |
373 | alist = rcu_dereference_protected(server->addresses, | |
374 | lockdep_is_held(&server->fs_lock)); | |
375 | afs_get_addrlist(alist); | |
376 | read_unlock(&server->fs_lock); | |
377 | ||
8305e579 | 378 | memset(&fc->ac, 0, sizeof(fc->ac)); |
d2ddc776 DH |
379 | |
380 | /* Probe the current fileserver if we haven't done so yet. */ | |
381 | if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) { | |
382 | fc->ac.alist = afs_get_addrlist(alist); | |
383 | ||
ec5a3b4b DH |
384 | if (!afs_probe_fileserver(fc)) { |
385 | switch (fc->ac.error) { | |
386 | case -ENOMEM: | |
387 | case -ERESTARTSYS: | |
388 | case -EINTR: | |
389 | goto failed; | |
390 | default: | |
391 | goto next_server; | |
392 | } | |
393 | } | |
d2ddc776 DH |
394 | } |
395 | ||
396 | if (!fc->ac.alist) | |
397 | fc->ac.alist = alist; | |
398 | else | |
399 | afs_put_addrlist(alist); | |
400 | ||
d2ddc776 DH |
401 | fc->ac.start = READ_ONCE(alist->index); |
402 | fc->ac.index = fc->ac.start; | |
d2ddc776 DH |
403 | |
404 | iterate_address: | |
405 | ASSERT(fc->ac.alist); | |
406 | _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs); | |
407 | /* Iterate over the current server's address list to try and find an | |
408 | * address on which it will respond to us. | |
409 | */ | |
fe4d774c DH |
410 | if (!afs_iterate_addresses(&fc->ac)) |
411 | goto next_server; | |
d2ddc776 | 412 | |
fe4d774c DH |
413 | _leave(" = t"); |
414 | return true; | |
d2ddc776 | 415 | |
16280a15 DH |
416 | next_server: |
417 | _debug("next"); | |
418 | afs_end_cursor(&fc->ac); | |
419 | afs_put_cb_interest(afs_v2net(vnode), fc->cbi); | |
420 | fc->cbi = NULL; | |
421 | fc->index++; | |
422 | if (fc->index >= fc->server_list->nr_servers) | |
423 | fc->index = 0; | |
424 | if (fc->index != fc->start) | |
425 | goto use_server; | |
426 | ||
427 | /* That's all the servers poked to no good effect. Try again if some | |
428 | * of them were busy. | |
429 | */ | |
430 | if (fc->flags & AFS_FS_CURSOR_VBUSY) | |
431 | goto restart_from_beginning; | |
432 | ||
16280a15 DH |
433 | goto failed; |
434 | ||
e7f680f4 DH |
435 | failed_set_error: |
436 | fc->error = error; | |
d2ddc776 DH |
437 | failed: |
438 | fc->flags |= AFS_FS_CURSOR_STOP; | |
fe4d774c | 439 | afs_end_cursor(&fc->ac); |
e7f680f4 | 440 | _leave(" = f [failed %d]", fc->error); |
d2ddc776 DH |
441 | return false; |
442 | } | |
443 | ||
444 | /* | |
445 | * Select the same fileserver we used for a vnode before and only that | |
446 | * fileserver. We use this when we have a lock on that file, which is backed | |
447 | * only by the fileserver we obtained it from. | |
448 | */ | |
449 | bool afs_select_current_fileserver(struct afs_fs_cursor *fc) | |
450 | { | |
451 | struct afs_vnode *vnode = fc->vnode; | |
452 | struct afs_cb_interest *cbi = vnode->cb_interest; | |
453 | struct afs_addr_list *alist; | |
e7f680f4 | 454 | int error = fc->ac.error; |
d2ddc776 DH |
455 | |
456 | _enter(""); | |
457 | ||
e7f680f4 | 458 | switch (error) { |
0fafdc9f DH |
459 | case SHRT_MAX: |
460 | if (!cbi) { | |
e7f680f4 | 461 | fc->error = -ESTALE; |
0fafdc9f DH |
462 | fc->flags |= AFS_FS_CURSOR_STOP; |
463 | return false; | |
464 | } | |
465 | ||
466 | fc->cbi = afs_get_cb_interest(vnode->cb_interest); | |
467 | ||
468 | read_lock(&cbi->server->fs_lock); | |
469 | alist = rcu_dereference_protected(cbi->server->addresses, | |
470 | lockdep_is_held(&cbi->server->fs_lock)); | |
471 | afs_get_addrlist(alist); | |
472 | read_unlock(&cbi->server->fs_lock); | |
473 | if (!alist) { | |
e7f680f4 | 474 | fc->error = -ESTALE; |
0fafdc9f DH |
475 | fc->flags |= AFS_FS_CURSOR_STOP; |
476 | return false; | |
477 | } | |
478 | ||
8305e579 | 479 | memset(&fc->ac, 0, sizeof(fc->ac)); |
0fafdc9f | 480 | fc->ac.alist = alist; |
0fafdc9f DH |
481 | fc->ac.start = READ_ONCE(alist->index); |
482 | fc->ac.index = fc->ac.start; | |
0fafdc9f DH |
483 | goto iterate_address; |
484 | ||
485 | case 0: | |
486 | default: | |
487 | /* Success or local failure. Stop. */ | |
e7f680f4 | 488 | fc->error = error; |
d2ddc776 | 489 | fc->flags |= AFS_FS_CURSOR_STOP; |
e7f680f4 | 490 | _leave(" = f [okay/local %d]", error); |
d2ddc776 | 491 | return false; |
d2ddc776 | 492 | |
0fafdc9f | 493 | case -ECONNABORTED: |
e7f680f4 | 494 | fc->error = afs_abort_to_error(fc->ac.abort_code); |
d2ddc776 | 495 | fc->flags |= AFS_FS_CURSOR_STOP; |
0fafdc9f | 496 | _leave(" = f [abort]"); |
d2ddc776 | 497 | return false; |
0fafdc9f DH |
498 | |
499 | case -ENETUNREACH: | |
500 | case -EHOSTUNREACH: | |
501 | case -ECONNREFUSED: | |
502 | case -ETIMEDOUT: | |
503 | case -ETIME: | |
504 | _debug("no conn"); | |
e7f680f4 | 505 | fc->error = error; |
0fafdc9f | 506 | goto iterate_address; |
d2ddc776 DH |
507 | } |
508 | ||
0fafdc9f DH |
509 | iterate_address: |
510 | /* Iterate over the current server's address list to try and find an | |
511 | * address on which it will respond to us. | |
512 | */ | |
513 | if (afs_iterate_addresses(&fc->ac)) { | |
514 | _leave(" = t"); | |
515 | return true; | |
516 | } | |
517 | ||
518 | afs_end_cursor(&fc->ac); | |
519 | return false; | |
d2ddc776 DH |
520 | } |
521 | ||
522 | /* | |
523 | * Tidy up a filesystem cursor and unlock the vnode. | |
524 | */ | |
525 | int afs_end_vnode_operation(struct afs_fs_cursor *fc) | |
526 | { | |
527 | struct afs_net *net = afs_v2net(fc->vnode); | |
d2ddc776 DH |
528 | |
529 | mutex_unlock(&fc->vnode->io_lock); | |
530 | ||
531 | afs_end_cursor(&fc->ac); | |
532 | afs_put_cb_interest(net, fc->cbi); | |
533 | afs_put_serverlist(net, fc->server_list); | |
534 | ||
e7f680f4 DH |
535 | if (fc->error == -ECONNABORTED) |
536 | fc->error = afs_abort_to_error(fc->ac.abort_code); | |
d2ddc776 | 537 | |
e7f680f4 | 538 | return fc->error; |
d2ddc776 | 539 | } |