1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS fileserver probing
4 * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
8 #include <linux/sched.h>
9 #include <linux/slab.h>
12 #include "protocol_afs.h"
13 #include "protocol_yfs.h"
15 static unsigned int afs_fs_probe_fast_poll_interval
= 30 * HZ
;
16 static unsigned int afs_fs_probe_slow_poll_interval
= 5 * 60 * HZ
;
19 * Start the probe polling timer. We have to supply it with an inc on the
20 * outstanding server count.
22 static void afs_schedule_fs_probe(struct afs_net
*net
,
23 struct afs_server
*server
, bool fast
)
30 atj
= server
->probed_at
;
31 atj
+= fast
? afs_fs_probe_fast_poll_interval
: afs_fs_probe_slow_poll_interval
;
33 afs_inc_servers_outstanding(net
);
34 if (timer_reduce(&net
->fs_probe_timer
, atj
))
35 afs_dec_servers_outstanding(net
);
39 * Handle the completion of a set of probes.
41 static void afs_finished_fs_probe(struct afs_net
*net
, struct afs_server
*server
)
43 bool responded
= server
->probe
.responded
;
45 write_seqlock(&net
->fs_lock
);
47 list_add_tail(&server
->probe_link
, &net
->fs_probe_slow
);
49 server
->rtt
= UINT_MAX
;
50 clear_bit(AFS_SERVER_FL_RESPONDING
, &server
->flags
);
51 list_add_tail(&server
->probe_link
, &net
->fs_probe_fast
);
53 write_sequnlock(&net
->fs_lock
);
55 afs_schedule_fs_probe(net
, server
, !responded
);
59 * Handle the completion of a probe.
61 static void afs_done_one_fs_probe(struct afs_net
*net
, struct afs_server
*server
)
65 if (atomic_dec_and_test(&server
->probe_outstanding
))
66 afs_finished_fs_probe(net
, server
);
68 wake_up_all(&server
->probe_wq
);
72 * Handle inability to send a probe due to ENOMEM when trying to allocate a
75 static void afs_fs_probe_not_done(struct afs_net
*net
,
76 struct afs_server
*server
,
77 struct afs_addr_cursor
*ac
)
79 struct afs_addr_list
*alist
= ac
->alist
;
80 unsigned int index
= ac
->index
;
84 trace_afs_io_error(0, -ENOMEM
, afs_io_error_fs_probe_fail
);
85 spin_lock(&server
->probe_lock
);
87 server
->probe
.local_failure
= true;
88 if (server
->probe
.error
== 0)
89 server
->probe
.error
= -ENOMEM
;
91 set_bit(index
, &alist
->failed
);
93 spin_unlock(&server
->probe_lock
);
94 return afs_done_one_fs_probe(net
, server
);
98 * Process the result of probing a fileserver. This is called after successful
99 * or failed delivery of an FS.GetCapabilities operation.
101 void afs_fileserver_probe_result(struct afs_call
*call
)
103 struct afs_addr_list
*alist
= call
->alist
;
104 struct afs_server
*server
= call
->server
;
105 unsigned int index
= call
->addr_ix
;
106 unsigned int rtt_us
= 0, cap0
;
107 int ret
= call
->error
;
109 _enter("%pU,%u", &server
->uuid
, index
);
111 spin_lock(&server
->probe_lock
);
115 server
->probe
.error
= 0;
118 if (!server
->probe
.responded
) {
119 server
->probe
.abort_code
= call
->abort_code
;
120 server
->probe
.error
= ret
;
125 clear_bit(index
, &alist
->responded
);
126 server
->probe
.local_failure
= true;
127 trace_afs_io_error(call
->debug_id
, ret
, afs_io_error_fs_probe_fail
);
129 case -ECONNRESET
: /* Responded, but call expired. */
139 clear_bit(index
, &alist
->responded
);
140 set_bit(index
, &alist
->failed
);
141 if (!server
->probe
.responded
&&
142 (server
->probe
.error
== 0 ||
143 server
->probe
.error
== -ETIMEDOUT
||
144 server
->probe
.error
== -ETIME
))
145 server
->probe
.error
= ret
;
146 trace_afs_io_error(call
->debug_id
, ret
, afs_io_error_fs_probe_fail
);
151 clear_bit(index
, &alist
->failed
);
153 if (call
->service_id
== YFS_FS_SERVICE
) {
154 server
->probe
.is_yfs
= true;
155 set_bit(AFS_SERVER_FL_IS_YFS
, &server
->flags
);
156 alist
->addrs
[index
].srx_service
= call
->service_id
;
158 server
->probe
.not_yfs
= true;
159 if (!server
->probe
.is_yfs
) {
160 clear_bit(AFS_SERVER_FL_IS_YFS
, &server
->flags
);
161 alist
->addrs
[index
].srx_service
= call
->service_id
;
163 cap0
= ntohl(call
->tmp
);
164 if (cap0
& AFS3_VICED_CAPABILITY_64BITFILES
)
165 set_bit(AFS_SERVER_FL_HAS_FS64
, &server
->flags
);
167 clear_bit(AFS_SERVER_FL_HAS_FS64
, &server
->flags
);
170 if (rxrpc_kernel_get_srtt(call
->net
->socket
, call
->rxcall
, &rtt_us
) &&
171 rtt_us
< server
->probe
.rtt
) {
172 server
->probe
.rtt
= rtt_us
;
173 server
->rtt
= rtt_us
;
174 alist
->preferred
= index
;
177 smp_wmb(); /* Set rtt before responded. */
178 server
->probe
.responded
= true;
179 set_bit(index
, &alist
->responded
);
180 set_bit(AFS_SERVER_FL_RESPONDING
, &server
->flags
);
182 spin_unlock(&server
->probe_lock
);
184 _debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
185 &server
->uuid
, index
, &alist
->addrs
[index
].transport
,
188 return afs_done_one_fs_probe(call
->net
, server
);
192 * Probe one or all of a fileserver's addresses to find out the best route and
193 * to query its capabilities.
195 void afs_fs_probe_fileserver(struct afs_net
*net
, struct afs_server
*server
,
196 struct key
*key
, bool all
)
198 struct afs_addr_cursor ac
= {
202 _enter("%pU", &server
->uuid
);
204 read_lock(&server
->fs_lock
);
205 ac
.alist
= rcu_dereference_protected(server
->addresses
,
206 lockdep_is_held(&server
->fs_lock
));
207 afs_get_addrlist(ac
.alist
);
208 read_unlock(&server
->fs_lock
);
210 server
->probed_at
= jiffies
;
211 atomic_set(&server
->probe_outstanding
, all
? ac
.alist
->nr_addrs
: 1);
212 memset(&server
->probe
, 0, sizeof(server
->probe
));
213 server
->probe
.rtt
= UINT_MAX
;
215 ac
.index
= ac
.alist
->preferred
;
216 if (ac
.index
< 0 || ac
.index
>= ac
.alist
->nr_addrs
)
220 for (ac
.index
= 0; ac
.index
< ac
.alist
->nr_addrs
; ac
.index
++)
221 if (!afs_fs_get_capabilities(net
, server
, &ac
, key
))
222 afs_fs_probe_not_done(net
, server
, &ac
);
224 if (!afs_fs_get_capabilities(net
, server
, &ac
, key
))
225 afs_fs_probe_not_done(net
, server
, &ac
);
228 afs_put_addrlist(ac
.alist
);
232 * Wait for the first as-yet untried fileserver to respond.
234 int afs_wait_for_fs_probes(struct afs_server_list
*slist
, unsigned long untried
)
236 struct wait_queue_entry
*waits
;
237 struct afs_server
*server
;
238 unsigned int rtt
= UINT_MAX
, rtt_s
;
239 bool have_responders
= false;
242 _enter("%u,%lx", slist
->nr_servers
, untried
);
244 /* Only wait for servers that have a probe outstanding. */
245 for (i
= 0; i
< slist
->nr_servers
; i
++) {
246 if (test_bit(i
, &untried
)) {
247 server
= slist
->servers
[i
].server
;
248 if (!atomic_read(&server
->probe_outstanding
))
249 __clear_bit(i
, &untried
);
250 if (server
->probe
.responded
)
251 have_responders
= true;
254 if (have_responders
|| !untried
)
257 waits
= kmalloc(array_size(slist
->nr_servers
, sizeof(*waits
)), GFP_KERNEL
);
261 for (i
= 0; i
< slist
->nr_servers
; i
++) {
262 if (test_bit(i
, &untried
)) {
263 server
= slist
->servers
[i
].server
;
264 init_waitqueue_entry(&waits
[i
], current
);
265 add_wait_queue(&server
->probe_wq
, &waits
[i
]);
270 bool still_probing
= false;
272 set_current_state(TASK_INTERRUPTIBLE
);
273 for (i
= 0; i
< slist
->nr_servers
; i
++) {
274 if (test_bit(i
, &untried
)) {
275 server
= slist
->servers
[i
].server
;
276 if (server
->probe
.responded
)
278 if (atomic_read(&server
->probe_outstanding
))
279 still_probing
= true;
283 if (!still_probing
|| signal_pending(current
))
289 set_current_state(TASK_RUNNING
);
291 for (i
= 0; i
< slist
->nr_servers
; i
++) {
292 if (test_bit(i
, &untried
)) {
293 server
= slist
->servers
[i
].server
;
294 rtt_s
= READ_ONCE(server
->rtt
);
295 if (test_bit(AFS_SERVER_FL_RESPONDING
, &server
->flags
) &&
301 remove_wait_queue(&server
->probe_wq
, &waits
[i
]);
307 if (pref
== -1 && signal_pending(current
))
311 slist
->preferred
= pref
;
316 * Probe timer. We have an increment on fs_outstanding that we need to pass
317 * along to the work item.
319 void afs_fs_probe_timer(struct timer_list
*timer
)
321 struct afs_net
*net
= container_of(timer
, struct afs_net
, fs_probe_timer
);
323 if (!net
->live
|| !queue_work(afs_wq
, &net
->fs_prober
))
324 afs_dec_servers_outstanding(net
);
328 * Dispatch a probe to a server.
330 static void afs_dispatch_fs_probe(struct afs_net
*net
, struct afs_server
*server
, bool all
)
331 __releases(&net
->fs_lock
)
333 struct key
*key
= NULL
;
335 /* We remove it from the queues here - it will be added back to
336 * one of the queues on the completion of the probe.
338 list_del_init(&server
->probe_link
);
340 afs_get_server(server
, afs_server_trace_get_probe
);
341 write_sequnlock(&net
->fs_lock
);
343 afs_fs_probe_fileserver(net
, server
, key
, all
);
344 afs_put_server(net
, server
, afs_server_trace_put_probe
);
348 * Probe a server immediately without waiting for its due time to come
349 * round. This is used when all of the addresses have been tried.
351 void afs_probe_fileserver(struct afs_net
*net
, struct afs_server
*server
)
353 write_seqlock(&net
->fs_lock
);
354 if (!list_empty(&server
->probe_link
))
355 return afs_dispatch_fs_probe(net
, server
, true);
356 write_sequnlock(&net
->fs_lock
);
360 * Probe dispatcher to regularly dispatch probes to keep NAT alive.
362 void afs_fs_probe_dispatcher(struct work_struct
*work
)
364 struct afs_net
*net
= container_of(work
, struct afs_net
, fs_prober
);
365 struct afs_server
*fast
, *slow
, *server
;
366 unsigned long nowj
, timer_at
, poll_at
;
367 bool first_pass
= true, set_timer
= false;
374 if (list_empty(&net
->fs_probe_fast
) && list_empty(&net
->fs_probe_slow
)) {
380 write_seqlock(&net
->fs_lock
);
382 fast
= slow
= server
= NULL
;
384 timer_at
= nowj
+ MAX_JIFFY_OFFSET
;
386 if (!list_empty(&net
->fs_probe_fast
)) {
387 fast
= list_first_entry(&net
->fs_probe_fast
, struct afs_server
, probe_link
);
388 poll_at
= fast
->probed_at
+ afs_fs_probe_fast_poll_interval
;
389 if (time_before(nowj
, poll_at
)) {
396 if (!list_empty(&net
->fs_probe_slow
)) {
397 slow
= list_first_entry(&net
->fs_probe_slow
, struct afs_server
, probe_link
);
398 poll_at
= slow
->probed_at
+ afs_fs_probe_slow_poll_interval
;
399 if (time_before(nowj
, poll_at
)) {
400 if (time_before(poll_at
, timer_at
))
407 server
= fast
?: slow
;
409 _debug("probe %pU", &server
->uuid
);
411 if (server
&& (first_pass
|| !need_resched())) {
412 afs_dispatch_fs_probe(net
, server
, server
== fast
);
417 write_sequnlock(&net
->fs_lock
);
420 if (!queue_work(afs_wq
, &net
->fs_prober
))
421 afs_dec_servers_outstanding(net
);
422 _leave(" [requeue]");
423 } else if (set_timer
) {
424 if (timer_reduce(&net
->fs_probe_timer
, timer_at
))
425 afs_dec_servers_outstanding(net
);
428 afs_dec_servers_outstanding(net
);
429 _leave(" [quiesce]");
434 * Wait for a probe on a particular fileserver to complete for 2s.
436 int afs_wait_for_one_fs_probe(struct afs_server
*server
, bool is_intr
)
438 struct wait_queue_entry wait
;
439 unsigned long timo
= 2 * HZ
;
441 if (atomic_read(&server
->probe_outstanding
) == 0)
444 init_wait_entry(&wait
, 0);
446 prepare_to_wait_event(&server
->probe_wq
, &wait
,
447 is_intr
? TASK_INTERRUPTIBLE
: TASK_UNINTERRUPTIBLE
);
449 server
->probe
.responded
||
450 atomic_read(&server
->probe_outstanding
) == 0 ||
451 (is_intr
&& signal_pending(current
)))
453 timo
= schedule_timeout(timo
);
456 finish_wait(&server
->probe_wq
, &wait
);
459 if (server
->probe
.responded
)
461 if (is_intr
&& signal_pending(current
))
465 return -EDESTADDRREQ
;
469 * Clean up the probing when the namespace is killed off.
471 void afs_fs_probe_cleanup(struct afs_net
*net
)
473 if (del_timer_sync(&net
->fs_probe_timer
))
474 afs_dec_servers_outstanding(net
);