]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
tcp: fix tcp_set_congestion_control() use from bpf hook
authorEric Dumazet <edumazet@google.com>
Fri, 19 Jul 2019 02:28:14 +0000 (19:28 -0700)
committerKleber Sacilotto de Souza <kleber.souza@canonical.com>
Wed, 14 Aug 2019 09:18:49 +0000 (11:18 +0200)
BugLink: https://bugs.launchpad.net/bugs/1839036
[ Upstream commit 8d650cdedaabb33e85e9b7c517c0c71fcecc1de9 ]

Neal reported incorrect use of ns_capable() from bpf hook.

bpf_setsockopt(...TCP_CONGESTION...)
  -> tcp_set_congestion_control()
   -> ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)
    -> ns_capable_common()
     -> current_cred()
      -> rcu_dereference_protected(current->cred, 1)

Accessing 'current' in bpf context makes no sense, since packets
are processed from softirq context.

As Neal stated : The capability check in tcp_set_congestion_control()
was written assuming a system call context, and then was reused from
a BPF call site.

The fix is to add a new parameter to tcp_set_congestion_control(),
so that the ns_capable() call is only performed under the right
context.

Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Lawrence Brakmo <brakmo@fb.com>
Reported-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Kamal Mostafa <kamal@canonical.com>
Signed-off-by: Khalid Elmously <khalid.elmously@canonical.com>
include/net/tcp.h
net/core/filter.c
net/ipv4/tcp.c
net/ipv4/tcp_cong.c

index 09bcfe8cc27afa3a4ff94356d99b5573634a67c4..8885d401d613b714eb8980858f65d4496632be7c 100644 (file)
@@ -1013,7 +1013,8 @@ void tcp_get_default_congestion_control(struct net *net, char *name);
 void tcp_get_available_congestion_control(char *buf, size_t len);
 void tcp_get_allowed_congestion_control(char *buf, size_t len);
 int tcp_set_allowed_congestion_control(char *allowed);
-int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit);
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
+                              bool reinit, bool cap_net_admin);
 u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
 void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
 
index f6aabe5b9d12ad90ded7297fc69686ee7cee7893..8f5c412df76491c2af37a092b776235752fed740 100644 (file)
@@ -3247,7 +3247,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
                        strncpy(name, optval, min_t(long, optlen,
                                                    TCP_CA_NAME_MAX-1));
                        name[TCP_CA_NAME_MAX-1] = 0;
-                       ret = tcp_set_congestion_control(sk, name, false, reinit);
+                       ret = tcp_set_congestion_control(sk, name, false,
+                                                        reinit, true);
                } else {
                        struct tcp_sock *tp = tcp_sk(sk);
 
index cb6a09e44010f8a14d0ff48261e43a6b47ea06c6..3077c8f4908583d3b77b83fd7400c7cb21a40444 100644 (file)
@@ -2555,7 +2555,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                name[val] = 0;
 
                lock_sock(sk);
-               err = tcp_set_congestion_control(sk, name, true, true);
+               err = tcp_set_congestion_control(sk, name, true, true,
+                                                ns_capable(sock_net(sk)->user_ns,
+                                                           CAP_NET_ADMIN));
                release_sock(sk);
                return err;
        }
index bc6c02f1624383043147101cd4ad2157b3bc9289..48f79db446a02a57db9afc5a9e01fa1cd069316f 100644 (file)
@@ -332,7 +332,8 @@ out:
  * tcp_reinit_congestion_control (if the current congestion control was
  * already initialized.
  */
-int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit)
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
+                              bool reinit, bool cap_net_admin)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        const struct tcp_congestion_ops *ca;
@@ -368,8 +369,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo
                } else {
                        err = -EBUSY;
                }
-       } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
-                    ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
+       } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) {
                err = -EPERM;
        } else if (!try_module_get(ca->owner)) {
                err = -EBUSY;