]>
Commit | Line | Data |
---|---|---|
e9b1db4e FG |
1 | From d88270eef4b56bd7973841dd1fed387ccfa83709 Mon Sep 17 00:00:00 2001 |
2 | From: Neal Cardwell <ncardwell@google.com> | |
3 | Date: Mon, 25 Jan 2016 14:01:53 -0800 | |
4 | Subject: [PATCH] tcp: fix tcp_mark_head_lost to check skb len before | |
5 | fragmenting | |
6 | ||
7 | This commit fixes a corner case in tcp_mark_head_lost() which was | |
8 | causing the WARN_ON(len > skb->len) in tcp_fragment() to fire. | |
9 | ||
10 | tcp_mark_head_lost() was assuming that if a packet has | |
11 | tcp_skb_pcount(skb) of N, then it's safe to fragment off a prefix of | |
12 | M*mss bytes, for any M < N. But with the tricky way TCP pcounts are | |
13 | maintained, this is not always true. | |
14 | ||
15 | For example, suppose the sender sends 4 1-byte packets and have the | |
16 | last 3 packet sacked. It will merge the last 3 packets in the write | |
17 | queue into an skb with pcount = 3 and len = 3 bytes. If another | |
18 | recovery happens after a sack reneging event, tcp_mark_head_lost() | |
19 | may attempt to split the skb assuming it has more than 2*MSS bytes. | |
20 | ||
21 | This sounds very counterintuitive, but as the commit description for | |
22 | the related commit c0638c247f55 ("tcp: don't fragment SACKed skbs in | |
23 | tcp_mark_head_lost()") notes, this is because tcp_shifted_skb() | |
24 | coalesces adjacent regions of SACKed skbs, and when doing this it | |
25 | preserves the sum of their packet counts in order to reflect the | |
26 | real-world dynamics on the wire. The c0638c247f55 commit tried to | |
27 | avoid problems by not fragmenting SACKed skbs, since SACKed skbs are | |
28 | where the non-proportionality between pcount and skb->len/mss is known | |
29 | to be possible. However, that commit did not handle the case where | |
30 | during a reneging event one of these weird SACKed skbs becomes an | |
31 | un-SACKed skb, which tcp_mark_head_lost() can then try to fragment. | |
32 | ||
33 | The fix is to simply mark the entire skb lost when this happens. | |
34 | This makes the recovery slightly more aggressive in such corner | |
35 | cases before we detect reordering. But once we detect reordering | |
36 | this code path is by-passed because FACK is disabled. | |
37 | ||
38 | Signed-off-by: Neal Cardwell <ncardwell@google.com> | |
39 | Signed-off-by: Yuchung Cheng <ycheng@google.com> | |
40 | Signed-off-by: Eric Dumazet <edumazet@google.com> | |
41 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
42 | Cherry-picked-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> | |
43 | --- | |
44 | net/ipv4/tcp_input.c | 10 +++++----- | |
45 | 1 file changed, 5 insertions(+), 5 deletions(-) | |
46 | ||
47 | diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c | |
48 | index 0003d40..d2ad433 100644 | |
49 | --- a/net/ipv4/tcp_input.c | |
50 | +++ b/net/ipv4/tcp_input.c | |
51 | @@ -2164,8 +2164,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |
52 | { | |
53 | struct tcp_sock *tp = tcp_sk(sk); | |
54 | struct sk_buff *skb; | |
55 | - int cnt, oldcnt; | |
56 | - int err; | |
57 | + int cnt, oldcnt, lost; | |
58 | unsigned int mss; | |
59 | /* Use SACK to deduce losses of new sequences sent during recovery */ | |
60 | const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; | |
61 | @@ -2205,9 +2204,10 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |
62 | break; | |
63 | ||
64 | mss = tcp_skb_mss(skb); | |
65 | - err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, | |
66 | - mss, GFP_ATOMIC); | |
67 | - if (err < 0) | |
68 | + /* If needed, chop off the prefix to mark as lost. */ | |
69 | + lost = (packets - oldcnt) * mss; | |
70 | + if (lost < skb->len && | |
71 | + tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0) | |
72 | break; | |
73 | cnt = packets; | |
74 | } | |
75 | -- | |
76 | 2.1.4 | |
77 |