]>
Commit | Line | Data |
---|---|---|
1 | /* Management of Tx window, Tx resend, ACKs and out-of-sequence reception | |
2 | * | |
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | |
4 | * Written by David Howells (dhowells@redhat.com) | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version | |
9 | * 2 of the License, or (at your option) any later version. | |
10 | */ | |
11 | ||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
13 | ||
14 | #include <linux/module.h> | |
15 | #include <linux/circ_buf.h> | |
16 | #include <linux/net.h> | |
17 | #include <linux/skbuff.h> | |
18 | #include <linux/slab.h> | |
19 | #include <linux/udp.h> | |
20 | #include <net/sock.h> | |
21 | #include <net/af_rxrpc.h> | |
22 | #include "ar-internal.h" | |
23 | ||
24 | /* | |
25 | * Propose a PING ACK be sent. | |
26 | */ | |
27 | static void rxrpc_propose_ping(struct rxrpc_call *call, | |
28 | bool immediate, bool background) | |
29 | { | |
30 | if (immediate) { | |
31 | if (background && | |
32 | !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) | |
33 | rxrpc_queue_call(call); | |
34 | } else { | |
35 | unsigned long now = jiffies; | |
36 | unsigned long ping_at = now + rxrpc_idle_ack_delay; | |
37 | ||
38 | if (time_before(ping_at, call->ping_at)) { | |
39 | WRITE_ONCE(call->ping_at, ping_at); | |
40 | rxrpc_reduce_call_timer(call, ping_at, now, | |
41 | rxrpc_timer_set_for_ping); | |
42 | } | |
43 | } | |
44 | } | |
45 | ||
46 | /* | |
47 | * propose an ACK be sent | |
48 | */ | |
49 | static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, | |
50 | u16 skew, u32 serial, bool immediate, | |
51 | bool background, | |
52 | enum rxrpc_propose_ack_trace why) | |
53 | { | |
54 | enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; | |
55 | unsigned long expiry = rxrpc_soft_ack_delay; | |
56 | s8 prior = rxrpc_ack_priority[ack_reason]; | |
57 | ||
58 | /* Pings are handled specially because we don't want to accidentally | |
59 | * lose a ping response by subsuming it into a ping. | |
60 | */ | |
61 | if (ack_reason == RXRPC_ACK_PING) { | |
62 | rxrpc_propose_ping(call, immediate, background); | |
63 | goto trace; | |
64 | } | |
65 | ||
66 | /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial | |
67 | * numbers, but we don't alter the timeout. | |
68 | */ | |
69 | _debug("prior %u %u vs %u %u", | |
70 | ack_reason, prior, | |
71 | call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]); | |
72 | if (ack_reason == call->ackr_reason) { | |
73 | if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) { | |
74 | outcome = rxrpc_propose_ack_update; | |
75 | call->ackr_serial = serial; | |
76 | call->ackr_skew = skew; | |
77 | } | |
78 | if (!immediate) | |
79 | goto trace; | |
80 | } else if (prior > rxrpc_ack_priority[call->ackr_reason]) { | |
81 | call->ackr_reason = ack_reason; | |
82 | call->ackr_serial = serial; | |
83 | call->ackr_skew = skew; | |
84 | } else { | |
85 | outcome = rxrpc_propose_ack_subsume; | |
86 | } | |
87 | ||
88 | switch (ack_reason) { | |
89 | case RXRPC_ACK_REQUESTED: | |
90 | if (rxrpc_requested_ack_delay < expiry) | |
91 | expiry = rxrpc_requested_ack_delay; | |
92 | if (serial == 1) | |
93 | immediate = false; | |
94 | break; | |
95 | ||
96 | case RXRPC_ACK_DELAY: | |
97 | if (rxrpc_soft_ack_delay < expiry) | |
98 | expiry = rxrpc_soft_ack_delay; | |
99 | break; | |
100 | ||
101 | case RXRPC_ACK_IDLE: | |
102 | if (rxrpc_idle_ack_delay < expiry) | |
103 | expiry = rxrpc_idle_ack_delay; | |
104 | break; | |
105 | ||
106 | default: | |
107 | immediate = true; | |
108 | break; | |
109 | } | |
110 | ||
111 | if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { | |
112 | _debug("already scheduled"); | |
113 | } else if (immediate || expiry == 0) { | |
114 | _debug("immediate ACK %lx", call->events); | |
115 | if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events) && | |
116 | background) | |
117 | rxrpc_queue_call(call); | |
118 | } else { | |
119 | unsigned long now = jiffies, ack_at; | |
120 | ||
121 | if (call->peer->rtt_usage > 0) | |
122 | ack_at = nsecs_to_jiffies(call->peer->rtt); | |
123 | else | |
124 | ack_at = expiry; | |
125 | ||
126 | ack_at += now; | |
127 | if (time_before(ack_at, call->ack_at)) { | |
128 | WRITE_ONCE(call->ack_at, ack_at); | |
129 | rxrpc_reduce_call_timer(call, ack_at, now, | |
130 | rxrpc_timer_set_for_ack); | |
131 | } | |
132 | } | |
133 | ||
134 | trace: | |
135 | trace_rxrpc_propose_ack(call, why, ack_reason, serial, immediate, | |
136 | background, outcome); | |
137 | } | |
138 | ||
139 | /* | |
140 | * propose an ACK be sent, locking the call structure | |
141 | */ | |
142 | void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, | |
143 | u16 skew, u32 serial, bool immediate, bool background, | |
144 | enum rxrpc_propose_ack_trace why) | |
145 | { | |
146 | spin_lock_bh(&call->lock); | |
147 | __rxrpc_propose_ACK(call, ack_reason, skew, serial, | |
148 | immediate, background, why); | |
149 | spin_unlock_bh(&call->lock); | |
150 | } | |
151 | ||
152 | /* | |
153 | * Handle congestion being detected by the retransmit timeout. | |
154 | */ | |
155 | static void rxrpc_congestion_timeout(struct rxrpc_call *call) | |
156 | { | |
157 | set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags); | |
158 | } | |
159 | ||
160 | /* | |
161 | * Perform retransmission of NAK'd and unack'd packets. | |
162 | */ | |
163 | static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) | |
164 | { | |
165 | struct rxrpc_skb_priv *sp; | |
166 | struct sk_buff *skb; | |
167 | unsigned long resend_at; | |
168 | rxrpc_seq_t cursor, seq, top; | |
169 | ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo; | |
170 | int ix; | |
171 | u8 annotation, anno_type, retrans = 0, unacked = 0; | |
172 | ||
173 | _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); | |
174 | ||
175 | if (call->peer->rtt_usage > 1) | |
176 | timeout = ns_to_ktime(call->peer->rtt * 3 / 2); | |
177 | else | |
178 | timeout = ms_to_ktime(rxrpc_resend_timeout); | |
179 | min_timeo = ns_to_ktime((1000000000 / HZ) * 4); | |
180 | if (ktime_before(timeout, min_timeo)) | |
181 | timeout = min_timeo; | |
182 | ||
183 | now = ktime_get_real(); | |
184 | max_age = ktime_sub(now, timeout); | |
185 | ||
186 | spin_lock_bh(&call->lock); | |
187 | ||
188 | cursor = call->tx_hard_ack; | |
189 | top = call->tx_top; | |
190 | ASSERT(before_eq(cursor, top)); | |
191 | if (cursor == top) | |
192 | goto out_unlock; | |
193 | ||
194 | /* Scan the packet list without dropping the lock and decide which of | |
195 | * the packets in the Tx buffer we're going to resend and what the new | |
196 | * resend timeout will be. | |
197 | */ | |
198 | oldest = now; | |
199 | for (seq = cursor + 1; before_eq(seq, top); seq++) { | |
200 | ix = seq & RXRPC_RXTX_BUFF_MASK; | |
201 | annotation = call->rxtx_annotations[ix]; | |
202 | anno_type = annotation & RXRPC_TX_ANNO_MASK; | |
203 | annotation &= ~RXRPC_TX_ANNO_MASK; | |
204 | if (anno_type == RXRPC_TX_ANNO_ACK) | |
205 | continue; | |
206 | ||
207 | skb = call->rxtx_buffer[ix]; | |
208 | rxrpc_see_skb(skb, rxrpc_skb_tx_seen); | |
209 | sp = rxrpc_skb(skb); | |
210 | ||
211 | if (anno_type == RXRPC_TX_ANNO_UNACK) { | |
212 | if (ktime_after(skb->tstamp, max_age)) { | |
213 | if (ktime_before(skb->tstamp, oldest)) | |
214 | oldest = skb->tstamp; | |
215 | continue; | |
216 | } | |
217 | if (!(annotation & RXRPC_TX_ANNO_RESENT)) | |
218 | unacked++; | |
219 | } | |
220 | ||
221 | /* Okay, we need to retransmit a packet. */ | |
222 | call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; | |
223 | retrans++; | |
224 | trace_rxrpc_retransmit(call, seq, annotation | anno_type, | |
225 | ktime_to_ns(ktime_sub(skb->tstamp, max_age))); | |
226 | } | |
227 | ||
228 | resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(oldest, now))); | |
229 | resend_at += jiffies + rxrpc_resend_timeout; | |
230 | WRITE_ONCE(call->resend_at, resend_at); | |
231 | ||
232 | if (unacked) | |
233 | rxrpc_congestion_timeout(call); | |
234 | ||
235 | /* If there was nothing that needed retransmission then it's likely | |
236 | * that an ACK got lost somewhere. Send a ping to find out instead of | |
237 | * retransmitting data. | |
238 | */ | |
239 | if (!retrans) { | |
240 | rxrpc_reduce_call_timer(call, resend_at, now, | |
241 | rxrpc_timer_set_for_resend); | |
242 | spin_unlock_bh(&call->lock); | |
243 | ack_ts = ktime_sub(now, call->acks_latest_ts); | |
244 | if (ktime_to_ns(ack_ts) < call->peer->rtt) | |
245 | goto out; | |
246 | rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, | |
247 | rxrpc_propose_ack_ping_for_lost_ack); | |
248 | rxrpc_send_ack_packet(call, true, NULL); | |
249 | goto out; | |
250 | } | |
251 | ||
252 | /* Now go through the Tx window and perform the retransmissions. We | |
253 | * have to drop the lock for each send. If an ACK comes in whilst the | |
254 | * lock is dropped, it may clear some of the retransmission markers for | |
255 | * packets that it soft-ACKs. | |
256 | */ | |
257 | for (seq = cursor + 1; before_eq(seq, top); seq++) { | |
258 | ix = seq & RXRPC_RXTX_BUFF_MASK; | |
259 | annotation = call->rxtx_annotations[ix]; | |
260 | anno_type = annotation & RXRPC_TX_ANNO_MASK; | |
261 | if (anno_type != RXRPC_TX_ANNO_RETRANS) | |
262 | continue; | |
263 | ||
264 | skb = call->rxtx_buffer[ix]; | |
265 | rxrpc_get_skb(skb, rxrpc_skb_tx_got); | |
266 | spin_unlock_bh(&call->lock); | |
267 | ||
268 | if (rxrpc_send_data_packet(call, skb, true) < 0) { | |
269 | rxrpc_free_skb(skb, rxrpc_skb_tx_freed); | |
270 | return; | |
271 | } | |
272 | ||
273 | if (rxrpc_is_client_call(call)) | |
274 | rxrpc_expose_client_call(call); | |
275 | ||
276 | rxrpc_free_skb(skb, rxrpc_skb_tx_freed); | |
277 | spin_lock_bh(&call->lock); | |
278 | ||
279 | /* We need to clear the retransmit state, but there are two | |
280 | * things we need to be aware of: A new ACK/NAK might have been | |
281 | * received and the packet might have been hard-ACK'd (in which | |
282 | * case it will no longer be in the buffer). | |
283 | */ | |
284 | if (after(seq, call->tx_hard_ack)) { | |
285 | annotation = call->rxtx_annotations[ix]; | |
286 | anno_type = annotation & RXRPC_TX_ANNO_MASK; | |
287 | if (anno_type == RXRPC_TX_ANNO_RETRANS || | |
288 | anno_type == RXRPC_TX_ANNO_NAK) { | |
289 | annotation &= ~RXRPC_TX_ANNO_MASK; | |
290 | annotation |= RXRPC_TX_ANNO_UNACK; | |
291 | } | |
292 | annotation |= RXRPC_TX_ANNO_RESENT; | |
293 | call->rxtx_annotations[ix] = annotation; | |
294 | } | |
295 | ||
296 | if (after(call->tx_hard_ack, seq)) | |
297 | seq = call->tx_hard_ack; | |
298 | } | |
299 | ||
300 | out_unlock: | |
301 | spin_unlock_bh(&call->lock); | |
302 | out: | |
303 | _leave(""); | |
304 | } | |
305 | ||
306 | /* | |
307 | * Handle retransmission and deferred ACK/abort generation. | |
308 | */ | |
309 | void rxrpc_process_call(struct work_struct *work) | |
310 | { | |
311 | struct rxrpc_call *call = | |
312 | container_of(work, struct rxrpc_call, processor); | |
313 | rxrpc_serial_t *send_ack; | |
314 | unsigned long now, next, t; | |
315 | ||
316 | rxrpc_see_call(call); | |
317 | ||
318 | //printk("\n--------------------\n"); | |
319 | _enter("{%d,%s,%lx}", | |
320 | call->debug_id, rxrpc_call_states[call->state], call->events); | |
321 | ||
322 | recheck_state: | |
323 | if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) { | |
324 | rxrpc_send_abort_packet(call); | |
325 | goto recheck_state; | |
326 | } | |
327 | ||
328 | if (call->state == RXRPC_CALL_COMPLETE) { | |
329 | del_timer_sync(&call->timer); | |
330 | rxrpc_notify_socket(call); | |
331 | goto out_put; | |
332 | } | |
333 | ||
334 | /* Work out if any timeouts tripped */ | |
335 | now = jiffies; | |
336 | t = READ_ONCE(call->expect_rx_by); | |
337 | if (time_after_eq(now, t)) { | |
338 | trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now); | |
339 | set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); | |
340 | } | |
341 | ||
342 | t = READ_ONCE(call->expect_req_by); | |
343 | if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST && | |
344 | time_after_eq(now, t)) { | |
345 | trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now); | |
346 | set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); | |
347 | } | |
348 | ||
349 | t = READ_ONCE(call->expect_term_by); | |
350 | if (time_after_eq(now, t)) { | |
351 | trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now); | |
352 | set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); | |
353 | } | |
354 | ||
355 | t = READ_ONCE(call->ack_at); | |
356 | if (time_after_eq(now, t)) { | |
357 | trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); | |
358 | cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET); | |
359 | set_bit(RXRPC_CALL_EV_ACK, &call->events); | |
360 | } | |
361 | ||
362 | t = READ_ONCE(call->ack_lost_at); | |
363 | if (time_after_eq(now, t)) { | |
364 | trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now); | |
365 | cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET); | |
366 | set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); | |
367 | } | |
368 | ||
369 | t = READ_ONCE(call->keepalive_at); | |
370 | if (time_after_eq(now, t)) { | |
371 | trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now); | |
372 | cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET); | |
373 | rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true, | |
374 | rxrpc_propose_ack_ping_for_keepalive); | |
375 | set_bit(RXRPC_CALL_EV_PING, &call->events); | |
376 | } | |
377 | ||
378 | t = READ_ONCE(call->ping_at); | |
379 | if (time_after_eq(now, t)) { | |
380 | trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); | |
381 | cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET); | |
382 | set_bit(RXRPC_CALL_EV_PING, &call->events); | |
383 | } | |
384 | ||
385 | t = READ_ONCE(call->resend_at); | |
386 | if (time_after_eq(now, t)) { | |
387 | trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now); | |
388 | cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET); | |
389 | set_bit(RXRPC_CALL_EV_RESEND, &call->events); | |
390 | } | |
391 | ||
392 | /* Process events */ | |
393 | if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) { | |
394 | rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); | |
395 | set_bit(RXRPC_CALL_EV_ABORT, &call->events); | |
396 | goto recheck_state; | |
397 | } | |
398 | ||
399 | send_ack = NULL; | |
400 | if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) { | |
401 | call->acks_lost_top = call->tx_top; | |
402 | rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, | |
403 | rxrpc_propose_ack_ping_for_lost_ack); | |
404 | send_ack = &call->acks_lost_ping; | |
405 | } | |
406 | ||
407 | if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || | |
408 | send_ack) { | |
409 | if (call->ackr_reason) { | |
410 | rxrpc_send_ack_packet(call, false, send_ack); | |
411 | goto recheck_state; | |
412 | } | |
413 | } | |
414 | ||
415 | if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { | |
416 | rxrpc_send_ack_packet(call, true, NULL); | |
417 | goto recheck_state; | |
418 | } | |
419 | ||
420 | if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) { | |
421 | rxrpc_resend(call, now); | |
422 | goto recheck_state; | |
423 | } | |
424 | ||
425 | /* Make sure the timer is restarted */ | |
426 | next = call->expect_rx_by; | |
427 | ||
428 | #define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } | |
429 | ||
430 | set(call->expect_req_by); | |
431 | set(call->expect_term_by); | |
432 | set(call->ack_at); | |
433 | set(call->ack_lost_at); | |
434 | set(call->resend_at); | |
435 | set(call->keepalive_at); | |
436 | set(call->ping_at); | |
437 | ||
438 | now = jiffies; | |
439 | if (time_after_eq(now, next)) | |
440 | goto recheck_state; | |
441 | ||
442 | rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart); | |
443 | ||
444 | /* other events may have been raised since we started checking */ | |
445 | if (call->events && call->state < RXRPC_CALL_COMPLETE) { | |
446 | __rxrpc_queue_call(call); | |
447 | goto out; | |
448 | } | |
449 | ||
450 | out_put: | |
451 | rxrpc_put_call(call, rxrpc_call_put); | |
452 | out: | |
453 | _leave(""); | |
454 | } |