UBUNTU: SAUCE: tcp: tcp_fragment() should apply sane memory limits

[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / tcp_output.c
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index a4d214c7b506df70e4eb980a488880243986d836..57d2735db97b90c543e8d5d970424b2269b972c5 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -160,8 +160,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
  }
  
  /* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
+static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
+                                     u32 rcv_nxt)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       if (unlikely(rcv_nxt != tp->rcv_nxt))
+               return;  /* Special ACK sent by DCTCP to reflect ECN */
         tcp_dec_quickack_mode(sk, pkts);
         inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
  }
@@ -1031,8 +1036,8 @@ static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
   * We are working here with either a clone of the original
   * SKB, or a fresh unique copy made by the retransmit engine.
   */
-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
-                           gfp_t gfp_mask)
+static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
+                             int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
  {
         const struct inet_connection_sock *icsk = inet_csk(sk);
         struct inet_sock *inet;
@@ -1108,7 +1113,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
         th->source              = inet->inet_sport;
         th->dest                = inet->inet_dport;
         th->seq                 = htonl(tcb->seq);
-       th->ack_seq             = htonl(tp->rcv_nxt);
+       th->ack_seq             = htonl(rcv_nxt);
         *(((__be16 *)th) + 6)   = htons(((tcp_header_size >> 2) << 12) |
                                         tcb->tcp_flags);
  
@@ -1149,7 +1154,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
         icsk->icsk_af_ops->send_check(sk, skb);
  
         if (likely(tcb->tcp_flags & TCPHDR_ACK))
-               tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+               tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
  
         if (skb->len != tcp_header_size) {
                 tcp_event_data_sent(tp, sk);
@@ -1186,6 +1191,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
         return err;
  }
  
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+                           gfp_t gfp_mask)
+{
+       return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
+                                 tcp_sk(sk)->rcv_nxt);
+}
+
  /* This routine just queues the buffer for sending.
   *
   * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
@@ -1309,6 +1321,11 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
         if (nsize < 0)
                 nsize = 0;
  
+       if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) {
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
+               return -ENOMEM;
+       }
+
         if (skb_unclone(skb, gfp))
                 return -ENOMEM;
  
@@ -1479,8 +1496,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
         mss_now -= icsk->icsk_ext_hdr_len;
  
         /* Then reserve room for full set of TCP options and 8 bytes of data */
-       if (mss_now < 48)
-               mss_now = 48;
+       if (mss_now < TCP_MIN_SND_MSS)
+               mss_now = TCP_MIN_SND_MSS;
         return mss_now;
  }
  
@@ -1730,7 +1747,7 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
          */
         segs = max_t(u32, bytes / mss_now, min_tso_segs);
  
-       return min_t(u32, segs, sk->sk_gso_max_segs);
+       return segs;
  }
  EXPORT_SYMBOL(tcp_tso_autosize);
  
@@ -1742,9 +1759,10 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
         const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
         u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
  
-       return tso_segs ? :
-               tcp_tso_autosize(sk, mss_now,
-                                sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+       if (!tso_segs)
+               tso_segs = tcp_tso_autosize(sk, mss_now,
+                               sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+       return min_t(u32, tso_segs, sk->sk_gso_max_segs);
  }
  
  /* Returns the portion of skb which can be sent right away */
@@ -2026,6 +2044,24 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
         }
  }
  
+static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
+{
+       struct sk_buff *skb, *next;
+
+       skb = tcp_send_head(sk);
+       tcp_for_write_queue_from_safe(skb, next, sk) {
+               if (len <= skb->len)
+                       break;
+
+               if (unlikely(TCP_SKB_CB(skb)->eor))
+                       return false;
+
+               len -= skb->len;
+       }
+
+       return true;
+}
+
  /* Create a new MTU probe if we are ready.
   * MTU probe is regularly attempting to increase the path MTU by
   * deliberately sending larger packets.  This discovers routing
@@ -2098,6 +2134,9 @@ static int tcp_mtu_probe(struct sock *sk)
                         return 0;
         }
  
+       if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
+               return -1;
+
         /* We're allowed to probe.  Build it now. */
         nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
         if (!nskb)
@@ -2133,6 +2172,10 @@ static int tcp_mtu_probe(struct sock *sk)
                         /* We've eaten all the data from this skb.
                          * Throw it away. */
                         TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+                       /* If this is the last SKB we copy and eor is set
+                        * we need to propagate it to the new skb.
+                        */
+                       TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
                         tcp_unlink_write_queue(skb, sk);
                         sk_wmem_free_skb(sk, skb);
                 } else {
@@ -2414,15 +2457,12 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
  
         early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
         /* Schedule a loss probe in 2*RTT for SACK capable connections
-        * in Open state, that are either limited by cwnd or application.
+        * not in loss recovery, that are either limited by cwnd or application.
          */
         if ((early_retrans != 3 && early_retrans != 4) ||
             !tp->packets_out || !tcp_is_sack(tp) ||
-           icsk->icsk_ca_state != TCP_CA_Open)
-               return false;
-
-       if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
-            !tcp_write_queue_empty(sk))
+           (icsk->icsk_ca_state != TCP_CA_Open &&
+            icsk->icsk_ca_state != TCP_CA_CWR))
                 return false;
  
         /* Probe timeout is 2*rtt. Add minimum RTO to account
@@ -2718,7 +2758,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
                 if (next_skb_size <= skb_availroom(skb))
                         skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
                                       next_skb_size);
-               else if (!skb_shift(skb, next_skb, next_skb_size))
+               else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size))
                         return false;
         }
         tcp_highest_sack_replace(sk, next_skb, skb);
@@ -2836,8 +2876,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
                 return -EBUSY;
  
         if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
-               if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
-                       BUG();
+               if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
+                       WARN_ON_ONCE(1);
+                       return -EINVAL;
+               }
                 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
                         return -ENOMEM;
         }
@@ -3341,6 +3383,7 @@ static void tcp_connect_init(struct sock *sk)
         sock_reset_flag(sk, SOCK_DONE);
         tp->snd_wnd = 0;
         tcp_init_wl(tp, 0);
+       tcp_write_queue_purge(sk);
         tp->snd_una = tp->write_seq;
         tp->snd_sml = tp->write_seq;
         tp->snd_up = tp->write_seq;
@@ -3579,7 +3622,7 @@ void tcp_send_delayed_ack(struct sock *sk)
  }
  
  /* This routine sends an ack and also updates the window. */
-void tcp_send_ack(struct sock *sk)
+void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
  {
         struct sk_buff *buff;
  
@@ -3614,9 +3657,14 @@ void tcp_send_ack(struct sock *sk)
         skb_set_tcp_pure_ack(buff);
  
         /* Send it off, this clears delayed acks for us. */
-       tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0);
+       __tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt);
+}
+EXPORT_SYMBOL_GPL(__tcp_send_ack);
+
+void tcp_send_ack(struct sock *sk)
+{
+       __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
  }
-EXPORT_SYMBOL_GPL(tcp_send_ack);
  
  /* This routine sends a packet with an out of date sequence
   * number. It assumes the other end will try to ack it.