]>
Commit | Line | Data |
---|---|---|
2a91aa39 AB |
1 | /* |
2 | * net/dccp/ccids/ccid2.c | |
3 | * | |
4 | * Copyright (c) 2005, 2006 Andrea Bittau <a.bittau@cs.ucl.ac.uk> | |
5 | * | |
6 | * Changes to meet Linux coding standards, and DCCP infrastructure fixes. | |
7 | * | |
8 | * Copyright (c) 2006 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of the GNU General Public License as published by | |
12 | * the Free Software Foundation; either version 2 of the License, or | |
13 | * (at your option) any later version. | |
14 | * | |
15 | * This program is distributed in the hope that it will be useful, | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | * GNU General Public License for more details. | |
19 | * | |
20 | * You should have received a copy of the GNU General Public License | |
21 | * along with this program; if not, write to the Free Software | |
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
23 | */ | |
24 | ||
25 | /* | |
0e64e94e | 26 | * This implementation should follow RFC 4341 |
2a91aa39 | 27 | */ |
86349c8d | 28 | #include "../feat.h" |
2a91aa39 AB |
29 | #include "../ccid.h" |
30 | #include "../dccp.h" | |
31 | #include "ccid2.h" | |
32 | ||
2a91aa39 | 33 | |
8d424f6c | 34 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
84116716 GR |
35 | static int ccid2_debug; |
36 | #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) | |
2a91aa39 | 37 | #else |
84116716 | 38 | #define ccid2_pr_debug(format, a...) |
2a91aa39 AB |
39 | #endif |
40 | ||
cd1f7d34 | 41 | static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) |
07978aab AB |
42 | { |
43 | struct ccid2_seq *seqp; | |
44 | int i; | |
45 | ||
46 | /* check if we have space to preserve the pointer to the buffer */ | |
1fb87509 | 47 | if (hctx->seqbufc >= sizeof(hctx->seqbuf) / sizeof(struct ccid2_seq *)) |
07978aab AB |
48 | return -ENOMEM; |
49 | ||
50 | /* allocate buffer and initialize linked list */ | |
cd1f7d34 | 51 | seqp = kmalloc(CCID2_SEQBUF_LEN * sizeof(struct ccid2_seq), gfp_any()); |
07978aab AB |
52 | if (seqp == NULL) |
53 | return -ENOMEM; | |
54 | ||
cd1f7d34 | 55 | for (i = 0; i < (CCID2_SEQBUF_LEN - 1); i++) { |
07978aab AB |
56 | seqp[i].ccid2s_next = &seqp[i + 1]; |
57 | seqp[i + 1].ccid2s_prev = &seqp[i]; | |
58 | } | |
cd1f7d34 GR |
59 | seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = seqp; |
60 | seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; | |
07978aab AB |
61 | |
62 | /* This is the first allocation. Initiate the head and tail. */ | |
1fb87509 GR |
63 | if (hctx->seqbufc == 0) |
64 | hctx->seqh = hctx->seqt = seqp; | |
07978aab AB |
65 | else { |
66 | /* link the existing list with the one we just created */ | |
1fb87509 GR |
67 | hctx->seqh->ccid2s_next = seqp; |
68 | seqp->ccid2s_prev = hctx->seqh; | |
07978aab | 69 | |
1fb87509 GR |
70 | hctx->seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; |
71 | seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->seqt; | |
07978aab AB |
72 | } |
73 | ||
74 | /* store the original pointer to the buffer so we can free it */ | |
1fb87509 GR |
75 | hctx->seqbuf[hctx->seqbufc] = seqp; |
76 | hctx->seqbufc++; | |
07978aab AB |
77 | |
78 | return 0; | |
79 | } | |
80 | ||
6b57c93d | 81 | static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) |
2a91aa39 | 82 | { |
83337dae GR |
83 | if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk))) |
84 | return CCID_PACKET_WILL_DEQUEUE_LATER; | |
85 | return CCID_PACKET_SEND_AT_ONCE; | |
2a91aa39 AB |
86 | } |
87 | ||
df054e1d | 88 | static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) |
2a91aa39 AB |
89 | { |
90 | struct dccp_sock *dp = dccp_sk(sk); | |
1fb87509 | 91 | u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->cwnd, 2); |
d50ad163 | 92 | |
2a91aa39 | 93 | /* |
d50ad163 GR |
94 | * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from |
95 | * RFC 4341, 6.1.2. We ignore the statement that Ack Ratio 2 is always | |
96 | * acceptable since this causes starvation/deadlock whenever cwnd < 2. | |
97 | * The same problem arises when Ack Ratio is 0 (ie. Ack Ratio disabled). | |
2a91aa39 | 98 | */ |
d50ad163 GR |
99 | if (val == 0 || val > max_ratio) { |
100 | DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); | |
101 | val = max_ratio; | |
2a91aa39 | 102 | } |
86349c8d GR |
103 | if (val > DCCPF_ACK_RATIO_MAX) |
104 | val = DCCPF_ACK_RATIO_MAX; | |
2a91aa39 | 105 | |
d50ad163 GR |
106 | if (val == dp->dccps_l_ack_ratio) |
107 | return; | |
108 | ||
df054e1d | 109 | ccid2_pr_debug("changing local ack ratio to %u\n", val); |
2a91aa39 AB |
110 | dp->dccps_l_ack_ratio = val; |
111 | } | |
112 | ||
2a91aa39 AB |
113 | static void ccid2_hc_tx_rto_expire(unsigned long data) |
114 | { | |
115 | struct sock *sk = (struct sock *)data; | |
116 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | |
83337dae | 117 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx); |
2a91aa39 | 118 | |
2a91aa39 AB |
119 | bh_lock_sock(sk); |
120 | if (sock_owned_by_user(sk)) { | |
1fb87509 | 121 | sk_reset_timer(sk, &hctx->rtotimer, jiffies + HZ / 5); |
2a91aa39 AB |
122 | goto out; |
123 | } | |
124 | ||
125 | ccid2_pr_debug("RTO_EXPIRE\n"); | |
126 | ||
2a91aa39 | 127 | /* back-off timer */ |
1fb87509 | 128 | hctx->rto <<= 1; |
1435562d GR |
129 | if (hctx->rto > DCCP_RTO_MAX) |
130 | hctx->rto = DCCP_RTO_MAX; | |
2a91aa39 | 131 | |
2a91aa39 | 132 | /* adjust pipe, cwnd etc */ |
1fb87509 GR |
133 | hctx->ssthresh = hctx->cwnd / 2; |
134 | if (hctx->ssthresh < 2) | |
135 | hctx->ssthresh = 2; | |
136 | hctx->cwnd = 1; | |
137 | hctx->pipe = 0; | |
2a91aa39 AB |
138 | |
139 | /* clear state about stuff we sent */ | |
1fb87509 GR |
140 | hctx->seqt = hctx->seqh; |
141 | hctx->packets_acked = 0; | |
2a91aa39 AB |
142 | |
143 | /* clear ack ratio state. */ | |
1fb87509 GR |
144 | hctx->rpseq = 0; |
145 | hctx->rpdupack = -1; | |
2a91aa39 | 146 | ccid2_change_l_ack_ratio(sk, 1); |
83337dae GR |
147 | |
148 | /* if we were blocked before, we may now send cwnd=1 packet */ | |
149 | if (sender_was_blocked) | |
150 | tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); | |
20bbd0f7 GR |
151 | /* restart backed-off timer */ |
152 | sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto); | |
2a91aa39 AB |
153 | out: |
154 | bh_unlock_sock(sk); | |
77ff72d5 | 155 | sock_put(sk); |
2a91aa39 AB |
156 | } |
157 | ||
c506d91d | 158 | static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) |
2a91aa39 AB |
159 | { |
160 | struct dccp_sock *dp = dccp_sk(sk); | |
161 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | |
07978aab | 162 | struct ccid2_seq *next; |
2a91aa39 | 163 | |
1fb87509 | 164 | hctx->pipe++; |
2a91aa39 | 165 | |
1fb87509 GR |
166 | hctx->seqh->ccid2s_seq = dp->dccps_gss; |
167 | hctx->seqh->ccid2s_acked = 0; | |
168 | hctx->seqh->ccid2s_sent = jiffies; | |
2a91aa39 | 169 | |
1fb87509 | 170 | next = hctx->seqh->ccid2s_next; |
07978aab | 171 | /* check if we need to alloc more space */ |
1fb87509 | 172 | if (next == hctx->seqt) { |
7d9e8931 GR |
173 | if (ccid2_hc_tx_alloc_seq(hctx)) { |
174 | DCCP_CRIT("packet history - out of memory!"); | |
175 | /* FIXME: find a more graceful way to bail out */ | |
176 | return; | |
177 | } | |
1fb87509 GR |
178 | next = hctx->seqh->ccid2s_next; |
179 | BUG_ON(next == hctx->seqt); | |
2a91aa39 | 180 | } |
1fb87509 | 181 | hctx->seqh = next; |
07978aab | 182 | |
1fb87509 | 183 | ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->cwnd, hctx->pipe); |
2a91aa39 | 184 | |
900bfed4 GR |
185 | /* |
186 | * FIXME: The code below is broken and the variables have been removed | |
187 | * from the socket struct. The `ackloss' variable was always set to 0, | |
188 | * and with arsent there are several problems: | |
189 | * (i) it doesn't just count the number of Acks, but all sent packets; | |
190 | * (ii) it is expressed in # of packets, not # of windows, so the | |
191 | * comparison below uses the wrong formula: Appendix A of RFC 4341 | |
192 | * comes up with the number K = cwnd / (R^2 - R) of consecutive windows | |
193 | * of data with no lost or marked Ack packets. If arsent were the # of | |
194 | * consecutive Acks received without loss, then Ack Ratio needs to be | |
195 | * decreased by 1 when | |
196 | * arsent >= K * cwnd / R = cwnd^2 / (R^3 - R^2) | |
197 | * where cwnd / R is the number of Acks received per window of data | |
198 | * (cf. RFC 4341, App. A). The problems are that | |
199 | * - arsent counts other packets as well; | |
200 | * - the comparison uses a formula different from RFC 4341; | |
201 | * - computing a cubic/quadratic equation each time is too complicated. | |
202 | * Hence a different algorithm is needed. | |
203 | */ | |
204 | #if 0 | |
2a91aa39 | 205 | /* Ack Ratio. Need to maintain a concept of how many windows we sent */ |
1fb87509 | 206 | hctx->arsent++; |
2a91aa39 | 207 | /* We had an ack loss in this window... */ |
1fb87509 GR |
208 | if (hctx->ackloss) { |
209 | if (hctx->arsent >= hctx->cwnd) { | |
210 | hctx->arsent = 0; | |
211 | hctx->ackloss = 0; | |
2a91aa39 | 212 | } |
c0c736db ACM |
213 | } else { |
214 | /* No acks lost up to now... */ | |
2a91aa39 AB |
215 | /* decrease ack ratio if enough packets were sent */ |
216 | if (dp->dccps_l_ack_ratio > 1) { | |
217 | /* XXX don't calculate denominator each time */ | |
c0c736db ACM |
218 | int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio - |
219 | dp->dccps_l_ack_ratio; | |
2a91aa39 | 220 | |
1fb87509 | 221 | denom = hctx->cwnd * hctx->cwnd / denom; |
2a91aa39 | 222 | |
1fb87509 | 223 | if (hctx->arsent >= denom) { |
2a91aa39 | 224 | ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1); |
1fb87509 | 225 | hctx->arsent = 0; |
2a91aa39 | 226 | } |
c0c736db ACM |
227 | } else { |
228 | /* we can't increase ack ratio further [1] */ | |
1fb87509 | 229 | hctx->arsent = 0; /* or maybe set it to cwnd*/ |
2a91aa39 AB |
230 | } |
231 | } | |
900bfed4 | 232 | #endif |
2a91aa39 AB |
233 | |
234 | /* setup RTO timer */ | |
1fb87509 | 235 | if (!timer_pending(&hctx->rtotimer)) |
20bbd0f7 | 236 | sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto); |
c0c736db | 237 | |
8d424f6c | 238 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
2a91aa39 | 239 | do { |
1fb87509 | 240 | struct ccid2_seq *seqp = hctx->seqt; |
2a91aa39 | 241 | |
1fb87509 | 242 | while (seqp != hctx->seqh) { |
2a91aa39 | 243 | ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", |
8109b02b | 244 | (unsigned long long)seqp->ccid2s_seq, |
234af484 | 245 | seqp->ccid2s_acked, seqp->ccid2s_sent); |
2a91aa39 AB |
246 | seqp = seqp->ccid2s_next; |
247 | } | |
c0c736db | 248 | } while (0); |
2a91aa39 | 249 | ccid2_pr_debug("=========\n"); |
2a91aa39 AB |
250 | #endif |
251 | } | |
252 | ||
1435562d GR |
253 | /** |
254 | * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm | |
255 | * This code is almost identical with TCP's tcp_rtt_estimator(), since | |
256 | * - it has a higher sampling frequency (recommended by RFC 1323), | |
257 | * - the RTO does not collapse into RTT due to RTTVAR going towards zero, | |
258 | * - it is simple (cf. more complex proposals such as Eifel timer or research | |
259 | * which suggests that the gain should be set according to window size), | |
260 | * - in tests it was found to work well with CCID2 [gerrit]. | |
261 | */ | |
262 | static void ccid2_rtt_estimator(struct sock *sk, const long mrtt) | |
263 | { | |
264 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | |
265 | long m = mrtt ? : 1; | |
266 | ||
267 | if (hctx->srtt == 0) { | |
268 | /* First measurement m */ | |
269 | hctx->srtt = m << 3; | |
270 | hctx->mdev = m << 1; | |
271 | ||
272 | hctx->mdev_max = max(TCP_RTO_MIN, hctx->mdev); | |
273 | hctx->rttvar = hctx->mdev_max; | |
274 | hctx->rtt_seq = dccp_sk(sk)->dccps_gss; | |
275 | } else { | |
276 | /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */ | |
277 | m -= (hctx->srtt >> 3); | |
278 | hctx->srtt += m; | |
279 | ||
280 | /* Similarly, update scaled mdev with regard to |m| */ | |
281 | if (m < 0) { | |
282 | m = -m; | |
283 | m -= (hctx->mdev >> 2); | |
284 | /* | |
285 | * This neutralises RTO increase when RTT < SRTT - mdev | |
286 | * (see P. Sarolahti, A. Kuznetsov,"Congestion Control | |
287 | * in Linux TCP", USENIX 2002, pp. 49-62). | |
288 | */ | |
289 | if (m > 0) | |
290 | m >>= 3; | |
291 | } else { | |
292 | m -= (hctx->mdev >> 2); | |
293 | } | |
294 | hctx->mdev += m; | |
295 | ||
296 | if (hctx->mdev > hctx->mdev_max) { | |
297 | hctx->mdev_max = hctx->mdev; | |
298 | if (hctx->mdev_max > hctx->rttvar) | |
299 | hctx->rttvar = hctx->mdev_max; | |
300 | } | |
301 | ||
302 | /* | |
303 | * Decay RTTVAR at most once per flight, exploiting that | |
304 | * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2) | |
305 | * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1) | |
306 | * GAR is a useful bound for FlightSize = pipe, AWL is probably | |
307 | * too low as it over-estimates pipe. | |
308 | */ | |
309 | if (after48(dccp_sk(sk)->dccps_gar, hctx->rtt_seq)) { | |
310 | if (hctx->mdev_max < hctx->rttvar) | |
311 | hctx->rttvar -= (hctx->rttvar - | |
312 | hctx->mdev_max) >> 2; | |
313 | hctx->rtt_seq = dccp_sk(sk)->dccps_gss; | |
314 | hctx->mdev_max = TCP_RTO_MIN; | |
315 | } | |
316 | } | |
317 | ||
318 | /* | |
319 | * Set RTO from SRTT and RTTVAR | |
320 | * Clock granularity is ignored since the minimum error for RTTVAR is | |
321 | * clamped to 50msec (corresponding to HZ=20). This leads to a minimum | |
322 | * RTO of 200msec. This agrees with TCP and RFC 4341, 5.: "Because DCCP | |
323 | * does not retransmit data, DCCP does not require TCP's recommended | |
324 | * minimum timeout of one second". | |
325 | */ | |
326 | hctx->rto = (hctx->srtt >> 3) + hctx->rttvar; | |
327 | ||
328 | if (hctx->rto > DCCP_RTO_MAX) | |
329 | hctx->rto = DCCP_RTO_MAX; | |
330 | } | |
331 | ||
332 | static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, | |
333 | unsigned int *maxincr) | |
2a91aa39 AB |
334 | { |
335 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | |
336 | ||
1fb87509 GR |
337 | if (hctx->cwnd < hctx->ssthresh) { |
338 | if (*maxincr > 0 && ++hctx->packets_acked == 2) { | |
339 | hctx->cwnd += 1; | |
340 | *maxincr -= 1; | |
341 | hctx->packets_acked = 0; | |
2a91aa39 | 342 | } |
1fb87509 GR |
343 | } else if (++hctx->packets_acked >= hctx->cwnd) { |
344 | hctx->cwnd += 1; | |
345 | hctx->packets_acked = 0; | |
2a91aa39 | 346 | } |
1435562d GR |
347 | /* |
348 | * FIXME: RTT is sampled several times per acknowledgment (for each | |
349 | * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). | |
350 | * This causes the RTT to be over-estimated, since the older entries | |
351 | * in the Ack Vector have earlier sending times. | |
352 | * The cleanest solution is to not use the ccid2s_sent field at all | |
353 | * and instead use DCCP timestamps - need to be resolved at some time. | |
354 | */ | |
355 | ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent); | |
2a91aa39 AB |
356 | } |
357 | ||
d50ad163 | 358 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) |
374bcf32 | 359 | { |
d50ad163 GR |
360 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
361 | ||
1fb87509 | 362 | if (time_before(seqp->ccid2s_sent, hctx->last_cong)) { |
374bcf32 AB |
363 | ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); |
364 | return; | |
365 | } | |
366 | ||
1fb87509 | 367 | hctx->last_cong = jiffies; |
374bcf32 | 368 | |
1fb87509 GR |
369 | hctx->cwnd = hctx->cwnd / 2 ? : 1U; |
370 | hctx->ssthresh = max(hctx->cwnd, 2U); | |
d50ad163 GR |
371 | |
372 | /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ | |
1fb87509 GR |
373 | if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->cwnd) |
374 | ccid2_change_l_ack_ratio(sk, hctx->cwnd); | |
374bcf32 AB |
375 | } |
376 | ||
c8bf462b GR |
377 | static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, |
378 | u8 option, u8 *optval, u8 optlen) | |
379 | { | |
380 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | |
381 | ||
382 | switch (option) { | |
383 | case DCCPO_ACK_VECTOR_0: | |
384 | case DCCPO_ACK_VECTOR_1: | |
385 | return dccp_ackvec_parsed_add(&hctx->av_chunks, optval, optlen, | |
386 | option - DCCPO_ACK_VECTOR_0); | |
387 | } | |
388 | return 0; | |
389 | } | |
390 | ||
2a91aa39 AB |
391 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) |
392 | { | |
393 | struct dccp_sock *dp = dccp_sk(sk); | |
394 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | |
83337dae | 395 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx); |
c8bf462b | 396 | struct dccp_ackvec_parsed *avp; |
2a91aa39 AB |
397 | u64 ackno, seqno; |
398 | struct ccid2_seq *seqp; | |
2a91aa39 | 399 | int done = 0; |
2a91aa39 AB |
400 | unsigned int maxincr = 0; |
401 | ||
2a91aa39 AB |
402 | /* check reverse path congestion */ |
403 | seqno = DCCP_SKB_CB(skb)->dccpd_seq; | |
404 | ||
405 | /* XXX this whole "algorithm" is broken. Need to fix it to keep track | |
406 | * of the seqnos of the dupacks so that rpseq and rpdupack are correct | |
407 | * -sorbo. | |
408 | */ | |
409 | /* need to bootstrap */ | |
1fb87509 GR |
410 | if (hctx->rpdupack == -1) { |
411 | hctx->rpdupack = 0; | |
412 | hctx->rpseq = seqno; | |
c0c736db | 413 | } else { |
2a91aa39 | 414 | /* check if packet is consecutive */ |
1fb87509 GR |
415 | if (dccp_delta_seqno(hctx->rpseq, seqno) == 1) |
416 | hctx->rpseq = seqno; | |
2a91aa39 | 417 | /* it's a later packet */ |
1fb87509 GR |
418 | else if (after48(seqno, hctx->rpseq)) { |
419 | hctx->rpdupack++; | |
2a91aa39 AB |
420 | |
421 | /* check if we got enough dupacks */ | |
1fb87509 GR |
422 | if (hctx->rpdupack >= NUMDUPACK) { |
423 | hctx->rpdupack = -1; /* XXX lame */ | |
424 | hctx->rpseq = 0; | |
2a91aa39 | 425 | |
df054e1d | 426 | ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); |
2a91aa39 AB |
427 | } |
428 | } | |
429 | } | |
430 | ||
431 | /* check forward path congestion */ | |
c8bf462b | 432 | if (dccp_packet_without_ack(skb)) |
2a91aa39 AB |
433 | return; |
434 | ||
c8bf462b GR |
435 | /* still didn't send out new data packets */ |
436 | if (hctx->seqh == hctx->seqt) | |
437 | goto done; | |
2a91aa39 AB |
438 | |
439 | ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; | |
1fb87509 GR |
440 | if (after48(ackno, hctx->high_ack)) |
441 | hctx->high_ack = ackno; | |
32aac18d | 442 | |
1fb87509 | 443 | seqp = hctx->seqt; |
32aac18d AB |
444 | while (before48(seqp->ccid2s_seq, ackno)) { |
445 | seqp = seqp->ccid2s_next; | |
1fb87509 GR |
446 | if (seqp == hctx->seqh) { |
447 | seqp = hctx->seqh->ccid2s_prev; | |
32aac18d AB |
448 | break; |
449 | } | |
450 | } | |
2a91aa39 | 451 | |
a3020025 GR |
452 | /* |
453 | * In slow-start, cwnd can increase up to a maximum of Ack Ratio/2 | |
454 | * packets per acknowledgement. Rounding up avoids that cwnd is not | |
455 | * advanced when Ack Ratio is 1 and gives a slight edge otherwise. | |
2a91aa39 | 456 | */ |
1fb87509 | 457 | if (hctx->cwnd < hctx->ssthresh) |
a3020025 | 458 | maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); |
2a91aa39 AB |
459 | |
460 | /* go through all ack vectors */ | |
c8bf462b | 461 | list_for_each_entry(avp, &hctx->av_chunks, node) { |
2a91aa39 | 462 | /* go through this ack vector */ |
c8bf462b GR |
463 | for (; avp->len--; avp->vec++) { |
464 | u64 ackno_end_rl = SUB48(ackno, | |
465 | dccp_ackvec_runlen(avp->vec)); | |
2a91aa39 | 466 | |
c8bf462b | 467 | ccid2_pr_debug("ackvec %llu |%u,%u|\n", |
234af484 | 468 | (unsigned long long)ackno, |
c8bf462b GR |
469 | dccp_ackvec_state(avp->vec) >> 6, |
470 | dccp_ackvec_runlen(avp->vec)); | |
2a91aa39 AB |
471 | /* if the seqno we are analyzing is larger than the |
472 | * current ackno, then move towards the tail of our | |
473 | * seqnos. | |
474 | */ | |
475 | while (after48(seqp->ccid2s_seq, ackno)) { | |
1fb87509 | 476 | if (seqp == hctx->seqt) { |
2a91aa39 AB |
477 | done = 1; |
478 | break; | |
479 | } | |
480 | seqp = seqp->ccid2s_prev; | |
481 | } | |
482 | if (done) | |
483 | break; | |
484 | ||
485 | /* check all seqnos in the range of the vector | |
486 | * run length | |
487 | */ | |
488 | while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { | |
c8bf462b | 489 | const u8 state = dccp_ackvec_state(avp->vec); |
2a91aa39 AB |
490 | |
491 | /* new packet received or marked */ | |
ff49e270 | 492 | if (state != DCCPAV_NOT_RECEIVED && |
2a91aa39 | 493 | !seqp->ccid2s_acked) { |
ff49e270 | 494 | if (state == DCCPAV_ECN_MARKED) |
d50ad163 | 495 | ccid2_congestion_event(sk, |
374bcf32 | 496 | seqp); |
ff49e270 | 497 | else |
2a91aa39 AB |
498 | ccid2_new_ack(sk, seqp, |
499 | &maxincr); | |
2a91aa39 AB |
500 | |
501 | seqp->ccid2s_acked = 1; | |
502 | ccid2_pr_debug("Got ack for %llu\n", | |
234af484 | 503 | (unsigned long long)seqp->ccid2s_seq); |
e9803c01 | 504 | hctx->pipe--; |
2a91aa39 | 505 | } |
1fb87509 | 506 | if (seqp == hctx->seqt) { |
2a91aa39 AB |
507 | done = 1; |
508 | break; | |
509 | } | |
3de5489f | 510 | seqp = seqp->ccid2s_prev; |
2a91aa39 AB |
511 | } |
512 | if (done) | |
513 | break; | |
514 | ||
cfbbeabc | 515 | ackno = SUB48(ackno_end_rl, 1); |
2a91aa39 AB |
516 | } |
517 | if (done) | |
518 | break; | |
519 | } | |
520 | ||
521 | /* The state about what is acked should be correct now | |
522 | * Check for NUMDUPACK | |
523 | */ | |
1fb87509 GR |
524 | seqp = hctx->seqt; |
525 | while (before48(seqp->ccid2s_seq, hctx->high_ack)) { | |
32aac18d | 526 | seqp = seqp->ccid2s_next; |
1fb87509 GR |
527 | if (seqp == hctx->seqh) { |
528 | seqp = hctx->seqh->ccid2s_prev; | |
32aac18d AB |
529 | break; |
530 | } | |
531 | } | |
2a91aa39 AB |
532 | done = 0; |
533 | while (1) { | |
534 | if (seqp->ccid2s_acked) { | |
535 | done++; | |
63df18ad | 536 | if (done == NUMDUPACK) |
2a91aa39 | 537 | break; |
2a91aa39 | 538 | } |
1fb87509 | 539 | if (seqp == hctx->seqt) |
2a91aa39 | 540 | break; |
2a91aa39 AB |
541 | seqp = seqp->ccid2s_prev; |
542 | } | |
543 | ||
544 | /* If there are at least 3 acknowledgements, anything unacknowledged | |
545 | * below the last sequence number is considered lost | |
546 | */ | |
63df18ad | 547 | if (done == NUMDUPACK) { |
2a91aa39 AB |
548 | struct ccid2_seq *last_acked = seqp; |
549 | ||
550 | /* check for lost packets */ | |
551 | while (1) { | |
552 | if (!seqp->ccid2s_acked) { | |
374bcf32 | 553 | ccid2_pr_debug("Packet lost: %llu\n", |
234af484 | 554 | (unsigned long long)seqp->ccid2s_seq); |
374bcf32 AB |
555 | /* XXX need to traverse from tail -> head in |
556 | * order to detect multiple congestion events in | |
557 | * one ack vector. | |
558 | */ | |
d50ad163 | 559 | ccid2_congestion_event(sk, seqp); |
e9803c01 | 560 | hctx->pipe--; |
2a91aa39 | 561 | } |
1fb87509 | 562 | if (seqp == hctx->seqt) |
2a91aa39 AB |
563 | break; |
564 | seqp = seqp->ccid2s_prev; | |
565 | } | |
566 | ||
1fb87509 | 567 | hctx->seqt = last_acked; |
2a91aa39 AB |
568 | } |
569 | ||
570 | /* trim acked packets in tail */ | |
1fb87509 GR |
571 | while (hctx->seqt != hctx->seqh) { |
572 | if (!hctx->seqt->ccid2s_acked) | |
2a91aa39 AB |
573 | break; |
574 | ||
1fb87509 | 575 | hctx->seqt = hctx->seqt->ccid2s_next; |
2a91aa39 AB |
576 | } |
577 | ||
e9803c01 GR |
578 | /* restart RTO timer if not all outstanding data has been acked */ |
579 | if (hctx->pipe == 0) | |
580 | sk_stop_timer(sk, &hctx->rtotimer); | |
581 | else | |
1435562d | 582 | sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto); |
c8bf462b | 583 | done: |
83337dae GR |
584 | /* check if incoming Acks allow pending packets to be sent */ |
585 | if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx)) | |
586 | tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); | |
c8bf462b | 587 | dccp_ackvec_parsed_cleanup(&hctx->av_chunks); |
2a91aa39 AB |
588 | } |
589 | ||
91f0ebf7 | 590 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) |
2a91aa39 | 591 | { |
c9eaf173 | 592 | struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); |
b00d2bbc GR |
593 | struct dccp_sock *dp = dccp_sk(sk); |
594 | u32 max_ratio; | |
2a91aa39 | 595 | |
b00d2bbc | 596 | /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ |
1fb87509 | 597 | hctx->ssthresh = ~0U; |
2a91aa39 | 598 | |
6224877b GR |
599 | /* Use larger initial windows (RFC 3390, rfc2581bis) */ |
600 | hctx->cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); | |
b00d2bbc GR |
601 | |
602 | /* Make sure that Ack Ratio is enabled and within bounds. */ | |
1fb87509 | 603 | max_ratio = DIV_ROUND_UP(hctx->cwnd, 2); |
b00d2bbc GR |
604 | if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio) |
605 | dp->dccps_l_ack_ratio = max_ratio; | |
606 | ||
2a91aa39 | 607 | /* XXX init ~ to window size... */ |
cd1f7d34 | 608 | if (ccid2_hc_tx_alloc_seq(hctx)) |
2a91aa39 | 609 | return -ENOMEM; |
91f0ebf7 | 610 | |
1435562d | 611 | hctx->rto = DCCP_TIMEOUT_INIT; |
1fb87509 GR |
612 | hctx->rpdupack = -1; |
613 | hctx->last_cong = jiffies; | |
614 | setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); | |
c8bf462b | 615 | INIT_LIST_HEAD(&hctx->av_chunks); |
2a91aa39 AB |
616 | return 0; |
617 | } | |
618 | ||
619 | static void ccid2_hc_tx_exit(struct sock *sk) | |
620 | { | |
c9eaf173 | 621 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
07978aab | 622 | int i; |
2a91aa39 | 623 | |
20bbd0f7 | 624 | sk_stop_timer(sk, &hctx->rtotimer); |
07978aab | 625 | |
1fb87509 GR |
626 | for (i = 0; i < hctx->seqbufc; i++) |
627 | kfree(hctx->seqbuf[i]); | |
628 | hctx->seqbufc = 0; | |
2a91aa39 AB |
629 | } |
630 | ||
631 | static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |
632 | { | |
633 | const struct dccp_sock *dp = dccp_sk(sk); | |
634 | struct ccid2_hc_rx_sock *hcrx = ccid2_hc_rx_sk(sk); | |
635 | ||
636 | switch (DCCP_SKB_CB(skb)->dccpd_type) { | |
637 | case DCCP_PKT_DATA: | |
638 | case DCCP_PKT_DATAACK: | |
1fb87509 GR |
639 | hcrx->data++; |
640 | if (hcrx->data >= dp->dccps_r_ack_ratio) { | |
2a91aa39 | 641 | dccp_send_ack(sk); |
1fb87509 | 642 | hcrx->data = 0; |
2a91aa39 AB |
643 | } |
644 | break; | |
645 | } | |
646 | } | |
647 | ||
91f0ebf7 | 648 | static struct ccid_operations ccid2 = { |
c8bf462b GR |
649 | .ccid_id = DCCPC_CCID2, |
650 | .ccid_name = "TCP-like", | |
651 | .ccid_owner = THIS_MODULE, | |
652 | .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), | |
653 | .ccid_hc_tx_init = ccid2_hc_tx_init, | |
654 | .ccid_hc_tx_exit = ccid2_hc_tx_exit, | |
655 | .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, | |
656 | .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, | |
657 | .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options, | |
658 | .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, | |
659 | .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), | |
660 | .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, | |
2a91aa39 AB |
661 | }; |
662 | ||
84116716 | 663 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
43264991 | 664 | module_param(ccid2_debug, bool, 0644); |
2a91aa39 | 665 | MODULE_PARM_DESC(ccid2_debug, "Enable debug messages"); |
84116716 | 666 | #endif |
2a91aa39 AB |
667 | |
668 | static __init int ccid2_module_init(void) | |
669 | { | |
670 | return ccid_register(&ccid2); | |
671 | } | |
672 | module_init(ccid2_module_init); | |
673 | ||
674 | static __exit void ccid2_module_exit(void) | |
675 | { | |
676 | ccid_unregister(&ccid2); | |
677 | } | |
678 | module_exit(ccid2_module_exit); | |
679 | ||
680 | MODULE_AUTHOR("Andrea Bittau <a.bittau@cs.ucl.ac.uk>"); | |
c0c736db | 681 | MODULE_DESCRIPTION("DCCP TCP-Like (CCID2) CCID"); |
2a91aa39 AB |
682 | MODULE_LICENSE("GPL"); |
683 | MODULE_ALIAS("net-dccp-ccid-2"); |