]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
mptcp: avoid flipping mp_capable field in syn_recv_sock()
authorPaolo Abeni <pabeni@redhat.com>
Mon, 20 Apr 2020 14:25:05 +0000 (16:25 +0200)
committerDavid S. Miller <davem@davemloft.net>
Mon, 20 Apr 2020 19:59:32 +0000 (12:59 -0700)
If multiple CPUs races on the same req_sock in syn_recv_sock(),
flipping such field can cause inconsistent child socket status.

When racing, the CPU losing the req ownership may still change
the mptcp request socket mp_capable flag while the CPU owning
the request is cloning the socket, leaving the child socket with
'is_mptcp' set but no 'mp_capable' flag.

Such socket will stay with 'conn' field cleared, heading to oops
in later mptcp callback.

Address the issue tracking the fallback status in a local variable.

Fixes: 58b09919626b ("mptcp: create msk early")
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/mptcp/subflow.c

index bc46b5091b9d510e21b3226a6fa5738d59915dc2..4fa19036850710cc9e06657bb7d3a0660a068a18 100644 (file)
@@ -376,6 +376,17 @@ static void mptcp_force_close(struct sock *sk)
        sk_common_release(sk);
 }
 
+static void subflow_ulp_fallback(struct sock *sk,
+                                struct mptcp_subflow_context *old_ctx)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       mptcp_subflow_tcp_fallback(sk, old_ctx);
+       icsk->icsk_ulp_ops = NULL;
+       rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
+       tcp_sk(sk)->is_mptcp = 0;
+}
+
 static struct sock *subflow_syn_recv_sock(const struct sock *sk,
                                          struct sk_buff *skb,
                                          struct request_sock *req,
@@ -388,6 +399,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
        struct tcp_options_received opt_rx;
        bool fallback_is_fatal = false;
        struct sock *new_msk = NULL;
+       bool fallback = false;
        struct sock *child;
 
        pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
@@ -412,14 +424,14 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
                        subflow_req->remote_key = opt_rx.mptcp.sndr_key;
                        subflow_req->remote_key_valid = 1;
                } else {
-                       subflow_req->mp_capable = 0;
+                       fallback = true;
                        goto create_child;
                }
 
 create_msk:
                new_msk = mptcp_sk_clone(listener->conn, req);
                if (!new_msk)
-                       subflow_req->mp_capable = 0;
+                       fallback = true;
        } else if (subflow_req->mp_join) {
                fallback_is_fatal = true;
                opt_rx.mptcp.mp_join = 0;
@@ -438,12 +450,18 @@ create_child:
        if (child && *own_req) {
                struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
 
-               /* we have null ctx on TCP fallback, which is fatal on
-                * MPJ handshake
+               /* we need to fallback on ctx allocation failure and on pre-reqs
+                * checking above. In the latter scenario we additionally need
+                * to reset the context to non MPTCP status.
                 */
-               if (!ctx) {
+               if (!ctx || fallback) {
                        if (fallback_is_fatal)
                                goto close_child;
+
+                       if (ctx) {
+                               subflow_ulp_fallback(child, ctx);
+                               kfree_rcu(ctx, rcu);
+                       }
                        goto out;
                }
 
@@ -474,6 +492,13 @@ out:
        /* dispose of the left over mptcp master, if any */
        if (unlikely(new_msk))
                mptcp_force_close(new_msk);
+
+       /* check for expected invariant - should never trigger, just help
+        * catching eariler subtle bugs
+        */
+       WARN_ON_ONCE(*own_req && child && tcp_sk(child)->is_mptcp &&
+                    (!mptcp_subflow_ctx(child) ||
+                     !mptcp_subflow_ctx(child)->conn));
        return child;
 
 close_child:
@@ -1076,17 +1101,6 @@ static void subflow_ulp_release(struct sock *sk)
        kfree_rcu(ctx, rcu);
 }
 
-static void subflow_ulp_fallback(struct sock *sk,
-                                struct mptcp_subflow_context *old_ctx)
-{
-       struct inet_connection_sock *icsk = inet_csk(sk);
-
-       mptcp_subflow_tcp_fallback(sk, old_ctx);
-       icsk->icsk_ulp_ops = NULL;
-       rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
-       tcp_sk(sk)->is_mptcp = 0;
-}
-
 static void subflow_ulp_clone(const struct request_sock *req,
                              struct sock *newsk,
                              const gfp_t priority)