]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - net/ipv4/tcp.c
route: Set lwtstate for local traffic and cached input dsts
[mirror_ubuntu-artful-kernel.git] / net / ipv4 / tcp.c
index 814af89c1bd3418130e0ad90def6623ca13a6d16..1149b48700a125b03359b4b65fbb3f1d8494e0c4 100644 (file)
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
-#include <asm/unaligned.h>
 #include <net/busy_poll.h>
 
 int sysctl_tcp_min_tso_segs __read_mostly = 2;
@@ -405,7 +404,6 @@ void tcp_init_sock(struct sock *sk)
        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
        tp->snd_cwnd_clamp = ~0;
        tp->mss_cache = TCP_MSS_DEFAULT;
-       u64_stats_init(&tp->syncp);
 
        tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
        tcp_enable_early_retrans(tp);
@@ -998,8 +996,11 @@ do_error:
                goto out;
 out_err:
        /* make sure we wake any epoll edge trigger waiter */
-       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
+                    err == -EAGAIN)) {
                sk->sk_write_space(sk);
+               tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+       }
        return sk_stream_error(sk, flags, err);
 }
 
@@ -1333,8 +1334,11 @@ do_error:
 out_err:
        err = sk_stream_error(sk, flags, err);
        /* make sure we wake any epoll edge trigger waiter */
-       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
+                    err == -EAGAIN)) {
                sk->sk_write_space(sk);
+               tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+       }
        release_sock(sk);
        return err;
 }
@@ -2302,7 +2306,7 @@ EXPORT_SYMBOL(tcp_disconnect);
 static inline bool tcp_can_repair_sock(const struct sock *sk)
 {
        return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
-               ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
+               (sk->sk_state != TCP_LISTEN);
 }
 
 static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
@@ -2704,15 +2708,33 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL(compat_tcp_setsockopt);
 #endif
 
+static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
+                                     struct tcp_info *info)
+{
+       u64 stats[__TCP_CHRONO_MAX], total = 0;
+       enum tcp_chrono i;
+
+       for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) {
+               stats[i] = tp->chrono_stat[i - 1];
+               if (i == tp->chrono_type)
+                       stats[i] += tcp_time_stamp - tp->chrono_start;
+               stats[i] *= USEC_PER_SEC / HZ;
+               total += stats[i];
+       }
+
+       info->tcpi_busy_time = total;
+       info->tcpi_rwnd_limited = stats[TCP_CHRONO_RWND_LIMITED];
+       info->tcpi_sndbuf_limited = stats[TCP_CHRONO_SNDBUF_LIMITED];
+}
+
 /* Return information about state of tcp endpoint in API format. */
 void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
        const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
        const struct inet_connection_sock *icsk = inet_csk(sk);
        u32 now = tcp_time_stamp, intv;
-       unsigned int start;
-       int notsent_bytes;
        u64 rate64;
+       bool slow;
        u32 rate;
 
        memset(info, 0, sizeof(*info));
@@ -2721,6 +2743,27 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 
        info->tcpi_state = sk_state_load(sk);
 
+       /* Report meaningful fields for all TCP states, including listeners */
+       rate = READ_ONCE(sk->sk_pacing_rate);
+       rate64 = rate != ~0U ? rate : ~0ULL;
+       info->tcpi_pacing_rate = rate64;
+
+       rate = READ_ONCE(sk->sk_max_pacing_rate);
+       rate64 = rate != ~0U ? rate : ~0ULL;
+       info->tcpi_max_pacing_rate = rate64;
+
+       info->tcpi_reordering = tp->reordering;
+       info->tcpi_snd_cwnd = tp->snd_cwnd;
+
+       if (info->tcpi_state == TCP_LISTEN) {
+               /* listeners aliased fields :
+                * tcpi_unacked -> Number of children ready for accept()
+                * tcpi_sacked  -> max backlog
+                */
+               info->tcpi_unacked = sk->sk_ack_backlog;
+               info->tcpi_sacked = sk->sk_max_ack_backlog;
+               return;
+       }
        info->tcpi_ca_state = icsk->icsk_ca_state;
        info->tcpi_retransmits = icsk->icsk_retransmits;
        info->tcpi_probes = icsk->icsk_probes_out;
@@ -2748,13 +2791,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        info->tcpi_snd_mss = tp->mss_cache;
        info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
 
-       if (info->tcpi_state == TCP_LISTEN) {
-               info->tcpi_unacked = sk->sk_ack_backlog;
-               info->tcpi_sacked = sk->sk_max_ack_backlog;
-       } else {
-               info->tcpi_unacked = tp->packets_out;
-               info->tcpi_sacked = tp->sacked_out;
-       }
+       info->tcpi_unacked = tp->packets_out;
+       info->tcpi_sacked = tp->sacked_out;
+
        info->tcpi_lost = tp->lost_out;
        info->tcpi_retrans = tp->retrans_out;
        info->tcpi_fackets = tp->fackets_out;
@@ -2768,34 +2807,25 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        info->tcpi_rtt = tp->srtt_us >> 3;
        info->tcpi_rttvar = tp->mdev_us >> 2;
        info->tcpi_snd_ssthresh = tp->snd_ssthresh;
-       info->tcpi_snd_cwnd = tp->snd_cwnd;
        info->tcpi_advmss = tp->advmss;
-       info->tcpi_reordering = tp->reordering;
 
        info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
        info->tcpi_rcv_space = tp->rcvq_space.space;
 
        info->tcpi_total_retrans = tp->total_retrans;
 
-       rate = READ_ONCE(sk->sk_pacing_rate);
-       rate64 = rate != ~0U ? rate : ~0ULL;
-       put_unaligned(rate64, &info->tcpi_pacing_rate);
+       slow = lock_sock_fast(sk);
 
-       rate = READ_ONCE(sk->sk_max_pacing_rate);
-       rate64 = rate != ~0U ? rate : ~0ULL;
-       put_unaligned(rate64, &info->tcpi_max_pacing_rate);
+       info->tcpi_bytes_acked = tp->bytes_acked;
+       info->tcpi_bytes_received = tp->bytes_received;
+       info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt);
+       tcp_get_info_chrono_stats(tp, info);
+
+       unlock_sock_fast(sk, slow);
 
-       do {
-               start = u64_stats_fetch_begin_irq(&tp->syncp);
-               put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
-               put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
-       } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
        info->tcpi_segs_out = tp->segs_out;
        info->tcpi_segs_in = tp->segs_in;
 
-       notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt);
-       info->tcpi_notsent_bytes = max(0, notsent_bytes);
-
        info->tcpi_min_rtt = tcp_min_rtt(tp);
        info->tcpi_data_segs_in = tp->data_segs_in;
        info->tcpi_data_segs_out = tp->data_segs_out;
@@ -2806,11 +2836,31 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        if (rate && intv) {
                rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
                do_div(rate64, intv);
-               put_unaligned(rate64, &info->tcpi_delivery_rate);
+               info->tcpi_delivery_rate = rate64;
        }
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
+struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
+{
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct sk_buff *stats;
+       struct tcp_info info;
+
+       stats = alloc_skb(3 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC);
+       if (!stats)
+               return NULL;
+
+       tcp_get_info_chrono_stats(tp, &info);
+       nla_put_u64_64bit(stats, TCP_NLA_BUSY,
+                         info.tcpi_busy_time, TCP_NLA_PAD);
+       nla_put_u64_64bit(stats, TCP_NLA_RWND_LIMITED,
+                         info.tcpi_rwnd_limited, TCP_NLA_PAD);
+       nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED,
+                         info.tcpi_sndbuf_limited, TCP_NLA_PAD);
+       return stats;
+}
+
 static int do_tcp_getsockopt(struct sock *sk, int level,
                int optname, char __user *optval, int __user *optlen)
 {