]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>
Wed, 9 Feb 2022 18:56:57 +0000 (19:56 +0100)
committerStefan Bader <stefan.bader@canonical.com>
Fri, 20 May 2022 12:40:41 +0000 (14:40 +0200)
BugLink: https://bugs.launchpad.net/bugs/1969107
[ Upstream commit 4f9bf2a2f5aacf988e6d5e56b961ba45c5a25248 ]

Commit
   9652dc2eb9e40 ("tcp: relax listening_hash operations")

removed the need to disable bottom half while acquiring
listening_hash.lock. There are still two callers left which disable
bottom half before the lock is acquired.

On PREEMPT_RT the softirqs are preemptible and local_bh_disable() acts
as a lock to ensure that resources, that are protected by disabling
bottom halves, remain protected.
This leads to a circular locking dependency if the lock acquired with
disabled bottom halves is also acquired with enabled bottom halves
followed by disabling bottom halves. This is the reverse locking order.
It has been observed with inet_listen_hashbucket::lock:

local_bh_disable() + spin_lock(&ilb->lock):
  inet_listen()
    inet_csk_listen_start()
      sk->sk_prot->hash() := inet_hash()
local_bh_disable()
__inet_hash()
  spin_lock(&ilb->lock);
    acquire(&ilb->lock);

Reverse order: spin_lock(&ilb2->lock) + local_bh_disable():
  tcp_seq_next()
    listening_get_next()
      spin_lock(&ilb2->lock);
acquire(&ilb2->lock);

  tcp4_seq_show()
    get_tcp4_sock()
      sock_i_ino()
read_lock_bh(&sk->sk_callback_lock);
  acquire(softirq_ctrl) // <---- whoops
  acquire(&sk->sk_callback_lock)

Drop local_bh_disable() around __inet_hash() which acquires
listening_hash->lock. Split inet_unhash() and acquire the
listen_hashbucket lock without disabling bottom halves; the inet_ehash
lock with disabled bottom halves.

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lkml.kernel.org/r/12d6f9879a97cd56c09fb53dee343cbb14f7f1f7.camel@gmx.de
Link: https://lkml.kernel.org/r/X9CheYjuXWc75Spa@hirez.programming.kicks-ass.net
Link: https://lore.kernel.org/r/YgQOebeZ10eNx1W6@linutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
(cherry picked from commit aa9c9fd0ef9d83bdca5203fa72ebeadedd2a8400)
Signed-off-by: Paolo Pisati <paolo.pisati@canonical.com>
net/ipv4/inet_hashtables.c
net/ipv6/inet6_hashtables.c

index 75737267746f85a5be82fbe04bbfb429914334c1..7bd1e10086f0a498728b68dee0e0701029ab5071 100644 (file)
@@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
        int err = 0;
 
        if (sk->sk_state != TCP_LISTEN) {
+               local_bh_disable();
                inet_ehash_nolisten(sk, osk, NULL);
+               local_bh_enable();
                return 0;
        }
        WARN_ON(!sk_unhashed(sk));
@@ -669,45 +671,54 @@ int inet_hash(struct sock *sk)
 {
        int err = 0;
 
-       if (sk->sk_state != TCP_CLOSE) {
-               local_bh_disable();
+       if (sk->sk_state != TCP_CLOSE)
                err = __inet_hash(sk, NULL);
-               local_bh_enable();
-       }
 
        return err;
 }
 EXPORT_SYMBOL_GPL(inet_hash);
 
-void inet_unhash(struct sock *sk)
+static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
 {
-       struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-       struct inet_listen_hashbucket *ilb = NULL;
-       spinlock_t *lock;
-
        if (sk_unhashed(sk))
                return;
 
-       if (sk->sk_state == TCP_LISTEN) {
-               ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
-               lock = &ilb->lock;
-       } else {
-               lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
-       }
-       spin_lock_bh(lock);
-       if (sk_unhashed(sk))
-               goto unlock;
-
        if (rcu_access_pointer(sk->sk_reuseport_cb))
                reuseport_stop_listen_sock(sk);
        if (ilb) {
+               struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
                inet_unhash2(hashinfo, sk);
                ilb->count--;
        }
        __sk_nulls_del_node_init_rcu(sk);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-unlock:
-       spin_unlock_bh(lock);
+}
+
+void inet_unhash(struct sock *sk)
+{
+       struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
+       if (sk_unhashed(sk))
+               return;
+
+       if (sk->sk_state == TCP_LISTEN) {
+               struct inet_listen_hashbucket *ilb;
+
+               ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+               /* Don't disable bottom halves while acquiring the lock to
+                * avoid circular locking dependency on PREEMPT_RT.
+                */
+               spin_lock(&ilb->lock);
+               __inet_unhash(sk, ilb);
+               spin_unlock(&ilb->lock);
+       } else {
+               spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+
+               spin_lock_bh(lock);
+               __inet_unhash(sk, NULL);
+               spin_unlock_bh(lock);
+       }
 }
 EXPORT_SYMBOL_GPL(inet_unhash);
 
index 67c9114835c84864a3353c6f1c16853ea62a21c5..0a2e7f228391104d66e821f3bc09b134d62ea740 100644 (file)
@@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk)
 {
        int err = 0;
 
-       if (sk->sk_state != TCP_CLOSE) {
-               local_bh_disable();
+       if (sk->sk_state != TCP_CLOSE)
                err = __inet_hash(sk, NULL);
-               local_bh_enable();
-       }
 
        return err;
 }