]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
io-wq: serialize hash clear with wakeup
authorJens Axboe <axboe@kernel.dk>
Fri, 12 Nov 2021 00:32:53 +0000 (17:32 -0700)
committerAndrea Righi <andrea.righi@canonical.com>
Tue, 7 Dec 2021 06:32:39 +0000 (07:32 +0100)
BugLink: https://bugs.launchpad.net/bugs/1951822
commit d3e3c102d107bb84251455a298cf475f24bab995 upstream.

We need to ensure that we serialize the stalled and hash bits with the
wait_queue wait handler, or we could be racing with someone modifying
the hashed state after we find it busy, but before we then give up and
wait for it to be cleared. This can cause random delays or stalls when
handling buffered writes for many files, where some of these files cause
hash collisions between the worker threads.

Cc: stable@vger.kernel.org
Reported-by: Daniel Black <daniel@mariadb.org>
Fixes: e941894eae31 ("io-wq: make buffered file write hashed work map per-ctx")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Paolo Pisati <paolo.pisati@canonical.com>
fs/io-wq.c

index 8fcac49b7b1366dc2438502aa0e0a31a1b4f3ec5..203c0e2a5dfae761b1036cd67f7b5bf2f3e2a406 100644 (file)
@@ -421,9 +421,10 @@ static inline unsigned int io_get_work_hash(struct io_wq_work *work)
        return work->flags >> IO_WQ_HASH_SHIFT;
 }
 
-static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
+static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
 {
        struct io_wq *wq = wqe->wq;
+       bool ret = false;
 
        spin_lock_irq(&wq->hash->wait.lock);
        if (list_empty(&wqe->wait.entry)) {
@@ -431,9 +432,11 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
                if (!test_bit(hash, &wq->hash->map)) {
                        __set_current_state(TASK_RUNNING);
                        list_del_init(&wqe->wait.entry);
+                       ret = true;
                }
        }
        spin_unlock_irq(&wq->hash->wait.lock);
+       return ret;
 }
 
 static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
@@ -473,14 +476,21 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
        }
 
        if (stall_hash != -1U) {
+               bool unstalled;
+
                /*
                 * Set this before dropping the lock to avoid racing with new
                 * work being added and clearing the stalled bit.
                 */
                set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
                raw_spin_unlock(&wqe->lock);
-               io_wait_on_hash(wqe, stall_hash);
+               unstalled = io_wait_on_hash(wqe, stall_hash);
                raw_spin_lock(&wqe->lock);
+               if (unstalled) {
+                       clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+                       if (wq_has_sleeper(&wqe->wq->hash->wait))
+                               wake_up(&wqe->wq->hash->wait);
+               }
        }
 
        return NULL;
@@ -562,8 +572,11 @@ get_next:
                                io_wqe_enqueue(wqe, linked);
 
                        if (hash != -1U && !next_hashed) {
+                               /* serialize hash clear with wake_up() */
+                               spin_lock_irq(&wq->hash->wait.lock);
                                clear_bit(hash, &wq->hash->map);
                                clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+                               spin_unlock_irq(&wq->hash->wait.lock);
                                if (wq_has_sleeper(&wq->hash->wait))
                                        wake_up(&wq->hash->wait);
                                raw_spin_lock(&wqe->lock);