Merge tag 'cifs-bug-fixes-for-4.13' of git://git.samba.org/sfrench/cifs-2.6

[mirror_ubuntu-bionic-kernel.git] / include / net / tcp.h
diff --git a/include/net/tcp.h b/include/net/tcp.h

index be6223c586fa05b3ef1dbcb96e73cf7b5dae292d..70483296157f87acdf5acd5e96eaa910119ba220 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -46,6 +46,10 @@
  #include <linux/seq_file.h>
  #include <linux/memcontrol.h>
  
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/bpf-cgroup.h>
+
  extern struct inet_hashinfo tcp_hashinfo;
  
  extern struct percpu_counter tcp_orphan_count;
@@ -237,9 +241,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
  
  
  /* sysctl variables for tcp */
-extern int sysctl_tcp_timestamps;
-extern int sysctl_tcp_window_scaling;
-extern int sysctl_tcp_sack;
  extern int sysctl_tcp_fastopen;
  extern int sysctl_tcp_retrans_collapse;
  extern int sysctl_tcp_stdurg;
@@ -279,7 +280,7 @@ extern int sysctl_tcp_pacing_ca_ratio;
  
  extern atomic_long_t tcp_memory_allocated;
  extern struct percpu_counter tcp_sockets_allocated;
-extern int tcp_memory_pressure;
+extern unsigned long tcp_memory_pressure;
  
  /* optimized version of sk_under_memory_pressure() for TCP sockets */
  static inline bool tcp_under_memory_pressure(const struct sock *sk)
@@ -353,6 +354,8 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
  int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
  int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
                  int flags);
+ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
+                size_t size, int flags);
  void tcp_release_cb(struct sock *sk);
  void tcp_wfree(struct sk_buff *skb);
  void tcp_write_timer_handler(struct sock *sk);
@@ -427,7 +430,7 @@ void tcp_set_keepalive(struct sock *sk, int val);
  void tcp_syn_ack_timeout(const struct request_sock *req);
  int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
                 int flags, int *addr_len);
-void tcp_parse_options(const struct sk_buff *skb,
+void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
                        struct tcp_options_received *opt_rx,
                        int estab, struct tcp_fastopen_cookie *foc);
  const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
@@ -519,8 +522,9 @@ static inline u32 tcp_cookie_time(void)
  u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
                               u16 *mssp);
  __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
-__u32 cookie_init_timestamp(struct request_sock *req);
-bool cookie_timestamp_decode(struct tcp_options_received *opt);
+u64 cookie_init_timestamp(struct request_sock *req);
+bool cookie_timestamp_decode(const struct net *net,
+                            struct tcp_options_received *opt);
  bool cookie_ecn_ok(const struct tcp_options_received *opt,
                    const struct net *net, const struct dst_entry *dst);
  
@@ -574,6 +578,7 @@ void tcp_fin(struct sock *sk);
  void tcp_init_xmit_timers(struct sock *);
  static inline void tcp_clear_xmit_timers(struct sock *sk)
  {
+       hrtimer_cancel(&tcp_sk(sk)->pacing_timer);
         inet_csk_clear_xmit_timers(sk);
  }
  
@@ -699,17 +704,61 @@ u32 __tcp_select_window(struct sock *sk);
  
  void tcp_send_window_probe(struct sock *sk);
  
-/* TCP timestamps are only 32-bits, this causes a slight
- * complication on 64-bit systems since we store a snapshot
- * of jiffies in the buffer control blocks below.  We decided
- * to use only the low 32-bits of jiffies and hide the ugly
- * casts with the following macro.
+/* TCP uses 32bit jiffies to save some space.
+ * Note that this is different from tcp_time_stamp, which
+ * historically has been the same until linux-4.13.
+ */
+#define tcp_jiffies32 ((u32)jiffies)
+
+/*
+ * Deliver a 32bit value for TCP timestamp option (RFC 7323)
+ * It is no longer tied to jiffies, but to 1 ms clock.
+ * Note: double check if you want to use tcp_jiffies32 instead of this.
+ */
+#define TCP_TS_HZ      1000
+
+static inline u64 tcp_clock_ns(void)
+{
+       return local_clock();
+}
+
+static inline u64 tcp_clock_us(void)
+{
+       return div_u64(tcp_clock_ns(), NSEC_PER_USEC);
+}
+
+/* This should only be used in contexts where tp->tcp_mstamp is up to date */
+static inline u32 tcp_time_stamp(const struct tcp_sock *tp)
+{
+       return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ);
+}
+
+/* Could use tcp_clock_us() / 1000, but this version uses a single divide */
+static inline u32 tcp_time_stamp_raw(void)
+{
+       return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ);
+}
+
+
+/* Refresh 1us clock of a TCP socket,
+ * ensuring monotically increasing values.
   */
-#define tcp_time_stamp         ((__u32)(jiffies))
+static inline void tcp_mstamp_refresh(struct tcp_sock *tp)
+{
+       u64 val = tcp_clock_us();
+
+       if (val > tp->tcp_mstamp)
+               tp->tcp_mstamp = val;
+}
+
+static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
+{
+       return max_t(s64, t1 - t0, 0);
+}
  
  static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
  {
-       return skb->skb_mstamp.stamp_jiffies;
+       return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ);
  }
  
  
@@ -774,9 +823,9 @@ struct tcp_skb_cb {
                         /* pkts S/ACKed so far upon tx of skb, incl retrans: */
                         __u32 delivered;
                         /* start of send pipeline phase */
-                       struct skb_mstamp first_tx_mstamp;
+                       u64 first_tx_mstamp;
                         /* when we reached the "delivered" count */
-                       struct skb_mstamp delivered_mstamp;
+                       u64 delivered_mstamp;
                 } tx;   /* only used for outgoing skbs */
                 union {
                         struct inet_skb_parm    h4;
@@ -892,7 +941,7 @@ struct ack_sample {
   * A sample is invalid if "delivered" or "interval_us" is negative.
   */
  struct rate_sample {
-       struct  skb_mstamp prior_mstamp; /* starting timestamp for interval */
+       u64  prior_mstamp; /* starting timestamp for interval */
         u32  prior_delivered;   /* tp->delivered at "prior_mstamp" */
         s32  delivered;         /* number of packets delivered over interval */
         long interval_us;       /* time for tp->delivered to incr "delivered" */
@@ -955,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name);
  void tcp_get_available_congestion_control(char *buf, size_t len);
  void tcp_get_allowed_congestion_control(char *buf, size_t len);
  int tcp_set_allowed_congestion_control(char *allowed);
-int tcp_set_congestion_control(struct sock *sk, const char *name);
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load);
+void tcp_reinit_congestion_control(struct sock *sk,
+                                  const struct tcp_congestion_ops *ca);
  u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
  void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
  
@@ -1241,7 +1292,7 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
         if (!sysctl_tcp_slow_start_after_idle || tp->packets_out ||
             ca_ops->cong_control)
                 return;
-       delta = tcp_time_stamp - tp->lsndtime;
+       delta = tcp_jiffies32 - tp->lsndtime;
         if (delta > inet_csk(sk)->icsk_rto)
                 tcp_cwnd_restart(sk, delta);
  }
@@ -1277,6 +1328,7 @@ extern void tcp_openreq_init_rwin(struct request_sock *req,
                                   const struct dst_entry *dst);
  
  void tcp_enter_memory_pressure(struct sock *sk);
+void tcp_leave_memory_pressure(struct sock *sk);
  
  static inline int keepalive_intvl_when(const struct tcp_sock *tp)
  {
@@ -1303,8 +1355,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
  {
         const struct inet_connection_sock *icsk = &tp->inet_conn;
  
-       return min_t(u32, tcp_time_stamp - icsk->icsk_ack.lrcvtime,
-                         tcp_time_stamp - tp->rcv_tstamp);
+       return min_t(u32, tcp_jiffies32 - icsk->icsk_ack.lrcvtime,
+                         tcp_jiffies32 - tp->rcv_tstamp);
  }
  
  static inline int tcp_fin_time(const struct sock *sk)
@@ -1395,6 +1447,7 @@ struct tcp_md5sig_key {
         u8                      keylen;
         u8                      family; /* AF_INET or AF_INET6 */
         union tcp_md5_addr      addr;
+       u8                      prefixlen;
         u8                      key[TCP_MD5SIG_MAXKEYLEN];
         struct rcu_head         rcu;
  };
@@ -1438,9 +1491,10 @@ struct tcp_md5sig_pool {
  int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
                         const struct sock *sk, const struct sk_buff *skb);
  int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
-                  int family, const u8 *newkey, u8 newkeylen, gfp_t gfp);
+                  int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
+                  gfp_t gfp);
  int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
-                  int family);
+                  int family, u8 prefixlen);
  struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
                                          const struct sock *addr_sk);
  
@@ -1800,6 +1854,7 @@ struct tcp_sock_af_ops {
                                          const struct sock *sk,
                                          const struct sk_buff *skb);
         int             (*md5_parse)(struct sock *sk,
+                                    int optname,
                                      char __user *optval,
                                      int optlen);
  #endif
@@ -1825,7 +1880,7 @@ struct tcp_request_sock_ops {
         struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
                                        const struct request_sock *req);
         u32 (*init_seq)(const struct sk_buff *skb);
-       u32 (*init_ts_off)(const struct sk_buff *skb);
+       u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
         int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
                            struct flowi *fl, struct request_sock *req,
                            struct tcp_fastopen_cookie *foc,
@@ -1858,7 +1913,7 @@ void tcp_init(void);
  /* tcp_recovery.c */
  extern void tcp_rack_mark_lost(struct sock *sk);
  extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
-                            const struct skb_mstamp *xmit_time);
+                            u64 xmit_time);
  extern void tcp_rack_reo_timeout(struct sock *sk);
  
  /*
@@ -1945,4 +2000,89 @@ static inline void tcp_listendrop(const struct sock *sk)
         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
  }
  
+enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer);
+
+/*
+ * Interface for adding Upper Level Protocols over TCP
+ */
+
+#define TCP_ULP_NAME_MAX       16
+#define TCP_ULP_MAX            128
+#define TCP_ULP_BUF_MAX                (TCP_ULP_NAME_MAX*TCP_ULP_MAX)
+
+struct tcp_ulp_ops {
+       struct list_head        list;
+
+       /* initialize ulp */
+       int (*init)(struct sock *sk);
+       /* cleanup ulp */
+       void (*release)(struct sock *sk);
+
+       char            name[TCP_ULP_NAME_MAX];
+       struct module   *owner;
+};
+int tcp_register_ulp(struct tcp_ulp_ops *type);
+void tcp_unregister_ulp(struct tcp_ulp_ops *type);
+int tcp_set_ulp(struct sock *sk, const char *name);
+void tcp_get_available_ulp(char *buf, size_t len);
+void tcp_cleanup_ulp(struct sock *sk);
+
+/* Call BPF_SOCK_OPS program that returns an int. If the return value
+ * is < 0, then the BPF op failed (for example if the loaded BPF
+ * program does not support the chosen operation or there is no BPF
+ * program loaded).
+ */
+#ifdef CONFIG_BPF
+static inline int tcp_call_bpf(struct sock *sk, int op)
+{
+       struct bpf_sock_ops_kern sock_ops;
+       int ret;
+
+       if (sk_fullsock(sk))
+               sock_owned_by_me(sk);
+
+       memset(&sock_ops, 0, sizeof(sock_ops));
+       sock_ops.sk = sk;
+       sock_ops.op = op;
+
+       ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+       if (ret == 0)
+               ret = sock_ops.reply;
+       else
+               ret = -1;
+       return ret;
+}
+#else
+static inline int tcp_call_bpf(struct sock *sk, int op)
+{
+       return -EPERM;
+}
+#endif
+
+static inline u32 tcp_timeout_init(struct sock *sk)
+{
+       int timeout;
+
+       timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT);
+
+       if (timeout <= 0)
+               timeout = TCP_TIMEOUT_INIT;
+       return timeout;
+}
+
+static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
+{
+       int rwnd;
+
+       rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT);
+
+       if (rwnd < 0)
+               rwnd = 0;
+       return rwnd;
+}
+
+static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
+{
+       return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
+}
  #endif /* _TCP_H */