rebase patches on top of Ubuntu-5.13.0-23.23

author Thomas Lamprecht <t.lamprecht@proxmox.com>

Mon, 29 Nov 2021 11:07:43 +0000 (12:07 +0100)

committer Thomas Lamprecht <t.lamprecht@proxmox.com>

Mon, 29 Nov 2021 17:09:38 +0000 (18:09 +0100)
author Thomas Lamprecht <t.lamprecht@proxmox.com>
Mon, 29 Nov 2021 11:07:43 +0000 (12:07 +0100)
committer Thomas Lamprecht <t.lamprecht@proxmox.com>
Mon, 29 Nov 2021 17:09:38 +0000 (18:09 +0100)
diff --git a/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch b/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch

index fd6190f0aa082fa1ed9741dcdd38653e2e082b99..00c6bba9a3d8c3fd749b60752c6d0c49c7f1035e 100644 (file)
--- a/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch
+++ b/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch
@@ -13,7 +13,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
   1 file changed, 1 insertion(+), 1 deletion(-)
  
  diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
-index 1dcc66060a19..c0ca4f494a02 100644
+index 3b316cb42554..71aba634a6a8 100644
  --- a/virt/kvm/kvm_main.c
  +++ b/virt/kvm/kvm_main.c
  @@ -78,7 +78,7 @@ module_param(halt_poll_ns, uint, 0644);
diff --git a/patches/kernel/0008-Revert-block-remove-the-ERESTARTSYS-handling-in-blkd.patch b/patches/kernel/0008-Revert-block-remove-the-ERESTARTSYS-handling-in-blkd.patch

new file mode 100644 (file)

index 0000000..03d3b25
--- /dev/null
+++ b/patches/kernel/0008-Revert-block-remove-the-ERESTARTSYS-handling-in-blkd.patch
@@ -0,0 +1,40 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Date: Tue, 9 Nov 2021 12:01:12 +0100
+Subject: [PATCH] Revert "block: remove the -ERESTARTSYS handling in
+ blkdev_get_by_dev"
+
+This reverts commit a8ed1a0607cfa5478ff6009539f44790c4d0956d.
+
+It breaks ZFS sometimes:
+https://github.com/openzfs/zfs/issues/12301#issuecomment-873303739
+
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ fs/block_dev.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/fs/block_dev.c b/fs/block_dev.c
+index e4a80bd4ddf1..4a14d28dd923 100644
+--- a/fs/block_dev.c
++++ b/fs/block_dev.c
+@@ -1436,6 +1436,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
+       if (ret)
+               return ERR_PTR(ret);
+ 
++      /*
++       * If we lost a race with 'disk' being deleted, try again.  See md.c.
++       */
++retry:
+       bdev = blkdev_get_no_open(dev);
+       if (!bdev)
+               return ERR_PTR(-ENXIO);
+@@ -1482,6 +1486,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
+       disk_unblock_events(disk);
+ put_blkdev:
+       blkdev_put_no_open(bdev);
++      if (ret == -ERESTARTSYS)
++              goto retry;
+       return ERR_PTR(ret);
+ }
+ EXPORT_SYMBOL(blkdev_get_by_dev);
diff --git a/patches/kernel/0009-Revert-block-remove-the-ERESTARTSYS-handling-in-blkd.patch b/patches/kernel/0009-Revert-block-remove-the-ERESTARTSYS-handling-in-blkd.patch

deleted file mode 100644 (file)

index 03d3b25..0000000
--- a/patches/kernel/0009-Revert-block-remove-the-ERESTARTSYS-handling-in-blkd.patch
+++ /dev/null
@@ -1,40 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Lamprecht <t.lamprecht@proxmox.com>
-Date: Tue, 9 Nov 2021 12:01:12 +0100
-Subject: [PATCH] Revert "block: remove the -ERESTARTSYS handling in
- blkdev_get_by_dev"
-
-This reverts commit a8ed1a0607cfa5478ff6009539f44790c4d0956d.
-
-It breaks ZFS sometimes:
-https://github.com/openzfs/zfs/issues/12301#issuecomment-873303739
-
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- fs/block_dev.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/fs/block_dev.c b/fs/block_dev.c
-index e4a80bd4ddf1..4a14d28dd923 100644
---- a/fs/block_dev.c
-+++ b/fs/block_dev.c
-@@ -1436,6 +1436,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
-       if (ret)
-               return ERR_PTR(ret);
- 
-+      /*
-+       * If we lost a race with 'disk' being deleted, try again.  See md.c.
-+       */
-+retry:
-       bdev = blkdev_get_no_open(dev);
-       if (!bdev)
-               return ERR_PTR(-ENXIO);
-@@ -1482,6 +1486,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
-       disk_unblock_events(disk);
- put_blkdev:
-       blkdev_put_no_open(bdev);
-+      if (ret == -ERESTARTSYS)
-+              goto retry;
-       return ERR_PTR(ret);
- }
- EXPORT_SYMBOL(blkdev_get_by_dev);
diff --git a/patches/kernel/0009-io-wq-fix-queue-stalling-race.patch b/patches/kernel/0009-io-wq-fix-queue-stalling-race.patch

new file mode 100644 (file)

index 0000000..05fb6bc
--- /dev/null
+++ b/patches/kernel/0009-io-wq-fix-queue-stalling-race.patch
@@ -0,0 +1,69 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 31 Aug 2021 13:53:00 -0600
+Subject: [PATCH] io-wq: fix queue stalling race
+
+We need to set the stalled bit early, before we drop the lock for adding
+us to the stall hash queue. If not, then we can race with new work being
+queued between adding us to the stall hash and io_worker_handle_work()
+marking us stalled.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[backport]
+Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
+---
+ fs/io-wq.c | 15 +++++++--------
+ 1 file changed, 7 insertions(+), 8 deletions(-)
+
+diff --git a/fs/io-wq.c b/fs/io-wq.c
+index 6612d0aa497e..33678185f3bc 100644
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -437,8 +437,7 @@ static bool io_worker_can_run_work(struct io_worker *worker,
+ }
+ 
+ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
+-                                         struct io_worker *worker,
+-                                         bool *stalled)
++                                         struct io_worker *worker)
+       __must_hold(wqe->lock)
+ {
+       struct io_wq_work_node *node, *prev;
+@@ -476,10 +475,14 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
+       }
+ 
+       if (stall_hash != -1U) {
++              /*
++               * Set this before dropping the lock to avoid racing with new
++               * work being added and clearing the stalled bit.
++               */
++              wqe->flags |= IO_WQE_FLAG_STALLED;
+               raw_spin_unlock(&wqe->lock);
+               io_wait_on_hash(wqe, stall_hash);
+               raw_spin_lock(&wqe->lock);
+-              *stalled = true;
+       }
+ 
+       return NULL;
+@@ -519,7 +522,6 @@ static void io_worker_handle_work(struct io_worker *worker)
+ 
+       do {
+               struct io_wq_work *work;
+-              bool stalled;
+ get_next:
+               /*
+                * If we got some work, mark us as busy. If we didn't, but
+@@ -528,12 +530,9 @@ static void io_worker_handle_work(struct io_worker *worker)
+                * can't make progress, any work completion or insertion will
+                * clear the stalled flag.
+                */
+-              stalled = false;
+-              work = io_get_next_work(wqe, worker, &stalled);
++              work = io_get_next_work(wqe, worker);
+               if (work)
+                       __io_worker_busy(wqe, worker, work);
+-              else if (stalled)
+-                      wqe->flags |= IO_WQE_FLAG_STALLED;
+ 
+               raw_spin_unlock_irq(&wqe->lock);
+               if (!work)
diff --git a/patches/kernel/0010-io-wq-fix-queue-stalling-race.patch b/patches/kernel/0010-io-wq-fix-queue-stalling-race.patch

deleted file mode 100644 (file)

index 5ef160d..0000000
--- a/patches/kernel/0010-io-wq-fix-queue-stalling-race.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jens Axboe <axboe@kernel.dk>
-Date: Tue, 31 Aug 2021 13:53:00 -0600
-Subject: [PATCH] io-wq: fix queue stalling race
-
-We need to set the stalled bit early, before we drop the lock for adding
-us to the stall hash queue. If not, then we can race with new work being
-queued between adding us to the stall hash and io_worker_handle_work()
-marking us stalled.
-
-Signed-off-by: Jens Axboe <axboe@kernel.dk>
-[backport]
-Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
----
- fs/io-wq.c | 15 +++++++--------
- 1 file changed, 7 insertions(+), 8 deletions(-)
-
-diff --git a/fs/io-wq.c b/fs/io-wq.c
-index 6612d0aa497e..33678185f3bc 100644
---- a/fs/io-wq.c
-+++ b/fs/io-wq.c
-@@ -437,8 +437,7 @@ static bool io_worker_can_run_work(struct io_worker *worker,
- }
- 
- static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
--                                         struct io_worker *worker,
--                                         bool *stalled)
-+                                         struct io_worker *worker)
-       __must_hold(wqe->lock)
- {
-       struct io_wq_work_node *node, *prev;
-@@ -476,10 +475,14 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
-       }
- 
-       if (stall_hash != -1U) {
-+              /*
-+               * Set this before dropping the lock to avoid racing with new
-+               * work being added and clearing the stalled bit.
-+               */
-+              wqe->flags |= IO_WQE_FLAG_STALLED;
-               raw_spin_unlock(&wqe->lock);
-               io_wait_on_hash(wqe, stall_hash);
-               raw_spin_lock(&wqe->lock);
--              *stalled = true;
-       }
- 
-       return NULL;
-@@ -519,7 +522,6 @@ static void io_worker_handle_work(struct io_worker *worker)
- 
-       do {
-               struct io_wq_work *work;
--              bool stalled;
- get_next:
-               /*
-                * If we got some work, mark us as busy. If we didn't, but
-@@ -528,12 +530,9 @@ static void io_worker_handle_work(struct io_worker *worker)
-                * can't make progress, any work completion or insertion will
-                * clear the stalled flag.
-                */
--              stalled = false;
--              work = io_get_next_work(wqe, worker, &stalled);
-+              work = io_get_next_work(wqe, worker);
-               if (work)
-                       __io_worker_busy(wqe, worker, work);
--              else if (stalled)
--                      wqe->flags |= IO_WQE_FLAG_STALLED;
- 
-               raw_spin_unlock_irq(&wqe->lock);
-               if (!work)
--- 
-2.30.2
-
diff --git a/patches/kernel/0010-io-wq-split-bounded-and-unbounded-work-into-separate.patch b/patches/kernel/0010-io-wq-split-bounded-and-unbounded-work-into-separate.patch

new file mode 100644 (file)

index 0000000..789b199
--- /dev/null
+++ b/patches/kernel/0010-io-wq-split-bounded-and-unbounded-work-into-separate.patch
@@ -0,0 +1,412 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 31 Aug 2021 13:57:32 -0600
+Subject: [PATCH] io-wq: split bounded and unbounded work into separate lists
+
+We've got a few issues that all boil down to the fact that we have one
+list of pending work items, yet two different types of workers to
+serve them. This causes some oddities around workers switching type and
+even hashed work vs regular work on the same bounded list.
+
+Just separate them out cleanly, similarly to how we already do
+accounting of what is running. That provides a clean separation and
+removes some corner cases that can cause stalls when handling IO
+that is punted to io-wq.
+
+Fixes: ecc53c48c13d ("io-wq: check max_worker limits if a worker transitions bound state")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[backport]
+Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
+---
+ fs/io-wq.c | 156 +++++++++++++++++++++++------------------------------
+ 1 file changed, 68 insertions(+), 88 deletions(-)
+
+diff --git a/fs/io-wq.c b/fs/io-wq.c
+index 33678185f3bc..2496d8781ea1 100644
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -34,7 +34,7 @@ enum {
+ };
+ 
+ enum {
+-      IO_WQE_FLAG_STALLED     = 1,    /* stalled on hash */
++      IO_ACCT_STALLED_BIT     = 0,    /* stalled on hash */
+ };
+ 
+ /*
+@@ -73,25 +73,24 @@ struct io_wqe_acct {
+       unsigned max_workers;
+       int index;
+       atomic_t nr_running;
++      struct io_wq_work_list work_list;
++      unsigned long flags;
+ };
+ 
+ enum {
+       IO_WQ_ACCT_BOUND,
+       IO_WQ_ACCT_UNBOUND,
++      IO_WQ_ACCT_NR,
+ };
+ 
+ /*
+  * Per-node worker thread pool
+  */
+ struct io_wqe {
+-      struct {
+-              raw_spinlock_t lock;
+-              struct io_wq_work_list work_list;
+-              unsigned flags;
+-      } ____cacheline_aligned_in_smp;
++      raw_spinlock_t lock;
++      struct io_wqe_acct acct[2];
+ 
+       int node;
+-      struct io_wqe_acct acct[2];
+ 
+       struct hlist_nulls_head free_list;
+       struct list_head all_list;
+@@ -196,11 +195,10 @@ static void io_worker_exit(struct io_worker *worker)
+       do_exit(0);
+ }
+ 
+-static inline bool io_wqe_run_queue(struct io_wqe *wqe)
+-      __must_hold(wqe->lock)
++static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
+ {
+-      if (!wq_list_empty(&wqe->work_list) &&
+-          !(wqe->flags & IO_WQE_FLAG_STALLED))
++      if (!wq_list_empty(&acct->work_list) &&
++          !test_bit(IO_ACCT_STALLED_BIT, &acct->flags))
+               return true;
+       return false;
+ }
+@@ -209,7 +207,8 @@ static inline bool io_wqe_run_queue(struct io_wqe *wqe)
+  * Check head of free list for an available worker. If one isn't available,
+  * caller must create one.
+  */
+-static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
++static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
++                                      struct io_wqe_acct *acct)
+       __must_hold(RCU)
+ {
+       struct hlist_nulls_node *n;
+@@ -223,6 +222,10 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
+       hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
+               if (!io_worker_get(worker))
+                       continue;
++              if (io_wqe_get_acct(worker) != acct) {
++                      io_worker_release(worker);
++                      continue;
++              }
+               if (wake_up_process(worker->task)) {
+                       io_worker_release(worker);
+                       return true;
+@@ -341,7 +344,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
+       if (!(worker->flags & IO_WORKER_F_UP))
+               return;
+ 
+-      if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) {
++      if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
+               atomic_inc(&acct->nr_running);
+               atomic_inc(&wqe->wq->worker_refs);
+               io_queue_worker_create(wqe, worker, acct);
+@@ -356,29 +359,10 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
+                            struct io_wq_work *work)
+       __must_hold(wqe->lock)
+ {
+-      bool worker_bound, work_bound;
+-
+-      BUILD_BUG_ON((IO_WQ_ACCT_UNBOUND ^ IO_WQ_ACCT_BOUND) != 1);
+-
+       if (worker->flags & IO_WORKER_F_FREE) {
+               worker->flags &= ~IO_WORKER_F_FREE;
+               hlist_nulls_del_init_rcu(&worker->nulls_node);
+       }
+-
+-      /*
+-       * If worker is moving from bound to unbound (or vice versa), then
+-       * ensure we update the running accounting.
+-       */
+-      worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
+-      work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
+-      if (worker_bound != work_bound) {
+-              int index = work_bound ? IO_WQ_ACCT_UNBOUND : IO_WQ_ACCT_BOUND;
+-              io_wqe_dec_running(worker);
+-              worker->flags ^= IO_WORKER_F_BOUND;
+-              wqe->acct[index].nr_workers--;
+-              wqe->acct[index ^ 1].nr_workers++;
+-              io_wqe_inc_running(worker);
+-       }
+ }
+ 
+ /*
+@@ -417,44 +401,23 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
+       spin_unlock(&wq->hash->wait.lock);
+ }
+ 
+-/*
+- * We can always run the work if the worker is currently the same type as
+- * the work (eg both are bound, or both are unbound). If they are not the
+- * same, only allow it if incrementing the worker count would be allowed.
+- */
+-static bool io_worker_can_run_work(struct io_worker *worker,
+-                                 struct io_wq_work *work)
+-{
+-      struct io_wqe_acct *acct;
+-
+-      if (!(worker->flags & IO_WORKER_F_BOUND) !=
+-          !(work->flags & IO_WQ_WORK_UNBOUND))
+-              return true;
+-
+-      /* not the same type, check if we'd go over the limit */
+-      acct = io_work_get_acct(worker->wqe, work);
+-      return acct->nr_workers < acct->max_workers;
+-}
+-
+-static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
++static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
+                                          struct io_worker *worker)
+       __must_hold(wqe->lock)
+ {
+       struct io_wq_work_node *node, *prev;
+       struct io_wq_work *work, *tail;
+       unsigned int stall_hash = -1U;
++      struct io_wqe *wqe = worker->wqe;
+ 
+-      wq_list_for_each(node, prev, &wqe->work_list) {
++      wq_list_for_each(node, prev, &acct->work_list) {
+               unsigned int hash;
+ 
+               work = container_of(node, struct io_wq_work, list);
+ 
+-              if (!io_worker_can_run_work(worker, work))
+-                      break;
+-
+               /* not hashed, can run anytime */
+               if (!io_wq_is_hashed(work)) {
+-                      wq_list_del(&wqe->work_list, node, prev);
++                      wq_list_del(&acct->work_list, node, prev);
+                       return work;
+               }
+ 
+@@ -465,7 +428,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
+               /* hashed, can run if not already running */
+               if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
+                       wqe->hash_tail[hash] = NULL;
+-                      wq_list_cut(&wqe->work_list, &tail->list, prev);
++                      wq_list_cut(&acct->work_list, &tail->list, prev);
+                       return work;
+               }
+               if (stall_hash == -1U)
+@@ -479,7 +442,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
+                * Set this before dropping the lock to avoid racing with new
+                * work being added and clearing the stalled bit.
+                */
+-              wqe->flags |= IO_WQE_FLAG_STALLED;
++              set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+               raw_spin_unlock(&wqe->lock);
+               io_wait_on_hash(wqe, stall_hash);
+               raw_spin_lock(&wqe->lock);
+@@ -516,6 +479,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
+ static void io_worker_handle_work(struct io_worker *worker)
+       __releases(wqe->lock)
+ {
++      struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+       struct io_wqe *wqe = worker->wqe;
+       struct io_wq *wq = wqe->wq;
+       bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state);
+@@ -530,7 +494,7 @@ static void io_worker_handle_work(struct io_worker *worker)
+                * can't make progress, any work completion or insertion will
+                * clear the stalled flag.
+                */
+-              work = io_get_next_work(wqe, worker);
++              work = io_get_next_work(acct, worker);
+               if (work)
+                       __io_worker_busy(wqe, worker, work);
+ 
+@@ -564,10 +528,10 @@ static void io_worker_handle_work(struct io_worker *worker)
+ 
+                       if (hash != -1U && !next_hashed) {
+                               clear_bit(hash, &wq->hash->map);
++                              clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+                               if (wq_has_sleeper(&wq->hash->wait))
+                                       wake_up(&wq->hash->wait);
+                               raw_spin_lock_irq(&wqe->lock);
+-                              wqe->flags &= ~IO_WQE_FLAG_STALLED;
+                               /* skip unnecessary unlock-lock wqe->lock */
+                               if (!work)
+                                       goto get_next;
+@@ -582,6 +546,7 @@ static void io_worker_handle_work(struct io_worker *worker)
+ static int io_wqe_worker(void *data)
+ {
+       struct io_worker *worker = data;
++      struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+       struct io_wqe *wqe = worker->wqe;
+       struct io_wq *wq = wqe->wq;
+       char buf[TASK_COMM_LEN];
+@@ -597,7 +562,7 @@ static int io_wqe_worker(void *data)
+               set_current_state(TASK_INTERRUPTIBLE);
+ loop:
+               raw_spin_lock_irq(&wqe->lock);
+-              if (io_wqe_run_queue(wqe)) {
++              if (io_acct_run_queue(acct)) {
+                       io_worker_handle_work(worker);
+                       goto loop;
+               }
+@@ -623,7 +588,7 @@ static int io_wqe_worker(void *data)
+ 
+       if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
+               raw_spin_lock_irq(&wqe->lock);
+-              if (!wq_list_empty(&wqe->work_list))
++              if (!wq_list_empty(&acct->work_list))
+                       io_worker_handle_work(worker);
+               else
+                       raw_spin_unlock_irq(&wqe->lock);
+@@ -769,12 +734,13 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
+ 
+ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
+ {
++      struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+       unsigned int hash;
+       struct io_wq_work *tail;
+ 
+       if (!io_wq_is_hashed(work)) {
+ append:
+-              wq_list_add_tail(&work->list, &wqe->work_list);
++              wq_list_add_tail(&work->list, &acct->work_list);
+               return;
+       }
+ 
+@@ -784,7 +750,7 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
+       if (!tail)
+               goto append;
+ 
+-      wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
++      wq_list_add_after(&work->list, &tail->list, &acct->work_list);
+ }
+ 
+ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
+@@ -806,10 +772,10 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
+ 
+       raw_spin_lock_irqsave(&wqe->lock, flags);
+       io_wqe_insert_work(wqe, work);
+-      wqe->flags &= ~IO_WQE_FLAG_STALLED;
++      clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+ 
+       rcu_read_lock();
+-      do_create = !io_wqe_activate_free_worker(wqe);
++      do_create = !io_wqe_activate_free_worker(wqe, acct);
+       rcu_read_unlock();
+ 
+       raw_spin_unlock_irqrestore(&wqe->lock, flags);
+@@ -862,6 +828,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
+                                        struct io_wq_work *work,
+                                        struct io_wq_work_node *prev)
+ {
++      struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+       unsigned int hash = io_get_work_hash(work);
+       struct io_wq_work *prev_work = NULL;
+ 
+@@ -873,7 +840,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
+               else
+                       wqe->hash_tail[hash] = NULL;
+       }
+-      wq_list_del(&wqe->work_list, &work->list, prev);
++      wq_list_del(&acct->work_list, &work->list, prev);
+ }
+ 
+ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
+@@ -882,22 +849,27 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
+       struct io_wq_work_node *node, *prev;
+       struct io_wq_work *work;
+       unsigned long flags;
++      int i;
+ 
+ retry:
+       raw_spin_lock_irqsave(&wqe->lock, flags);
+-      wq_list_for_each(node, prev, &wqe->work_list) {
+-              work = container_of(node, struct io_wq_work, list);
+-              if (!match->fn(work, match->data))
+-                      continue;
+-              io_wqe_remove_pending(wqe, work, prev);
+-              raw_spin_unlock_irqrestore(&wqe->lock, flags);
+-              io_run_cancel(work, wqe);
+-              match->nr_pending++;
+-              if (!match->cancel_all)
+-                      return;
++      for (i = 0; i < IO_WQ_ACCT_NR; i++) {
++              struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
+ 
+-              /* not safe to continue after unlock */
+-              goto retry;
++              wq_list_for_each(node, prev, &acct->work_list) {
++                      work = container_of(node, struct io_wq_work, list);
++                      if (!match->fn(work, match->data))
++                              continue;
++                      io_wqe_remove_pending(wqe, work, prev);
++                      raw_spin_unlock_irqrestore(&wqe->lock, flags);
++                      io_run_cancel(work, wqe);
++                      match->nr_pending++;
++                      if (!match->cancel_all)
++                              return;
++
++                      /* not safe to continue after unlock */
++                      goto retry;
++              }
+       }
+       raw_spin_unlock_irqrestore(&wqe->lock, flags);
+ }
+@@ -958,18 +930,24 @@ static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
+                           int sync, void *key)
+ {
+       struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
++      int i;
+ 
+       list_del_init(&wait->entry);
+ 
+       rcu_read_lock();
+-      io_wqe_activate_free_worker(wqe);
++      for (i = 0; i < IO_WQ_ACCT_NR; i++) {
++              struct io_wqe_acct *acct = &wqe->acct[i];
++
++              if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags))
++                      io_wqe_activate_free_worker(wqe, acct);
++      }
+       rcu_read_unlock();
+       return 1;
+ }
+ 
+ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
+ {
+-      int ret = -ENOMEM, node;
++      int ret, node, i;
+       struct io_wq *wq;
+ 
+       if (WARN_ON_ONCE(!data->free_work || !data->do_work))
+@@ -1006,18 +984,20 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
+                       goto err;
+               wq->wqes[node] = wqe;
+               wqe->node = alloc_node;
+-              wqe->acct[IO_WQ_ACCT_BOUND].index = IO_WQ_ACCT_BOUND;
+-              wqe->acct[IO_WQ_ACCT_UNBOUND].index = IO_WQ_ACCT_UNBOUND;
+               wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
+-              atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
+               wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
+                                       task_rlimit(current, RLIMIT_NPROC);
+-              atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
+-              wqe->wait.func = io_wqe_hash_wake;
+               INIT_LIST_HEAD(&wqe->wait.entry);
++              wqe->wait.func = io_wqe_hash_wake;
++              for (i = 0; i < IO_WQ_ACCT_NR; i++) {
++                      struct io_wqe_acct *acct = &wqe->acct[i];
++
++                      acct->index = i;
++                      atomic_set(&acct->nr_running, 0);
++                      INIT_WQ_LIST(&acct->work_list);
++              }
+               wqe->wq = wq;
+               raw_spin_lock_init(&wqe->lock);
+-              INIT_WQ_LIST(&wqe->work_list);
+               INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
+               INIT_LIST_HEAD(&wqe->all_list);
+       }
diff --git a/patches/kernel/0011-io-wq-split-bounded-and-unbounded-work-into-separate.patch b/patches/kernel/0011-io-wq-split-bounded-and-unbounded-work-into-separate.patch

deleted file mode 100644 (file)

index 47df331..0000000
--- a/patches/kernel/0011-io-wq-split-bounded-and-unbounded-work-into-separate.patch
+++ /dev/null
@@ -1,415 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jens Axboe <axboe@kernel.dk>
-Date: Tue, 31 Aug 2021 13:57:32 -0600
-Subject: [PATCH] io-wq: split bounded and unbounded work into separate lists
-
-We've got a few issues that all boil down to the fact that we have one
-list of pending work items, yet two different types of workers to
-serve them. This causes some oddities around workers switching type and
-even hashed work vs regular work on the same bounded list.
-
-Just separate them out cleanly, similarly to how we already do
-accounting of what is running. That provides a clean separation and
-removes some corner cases that can cause stalls when handling IO
-that is punted to io-wq.
-
-Fixes: ecc53c48c13d ("io-wq: check max_worker limits if a worker transitions bound state")
-Signed-off-by: Jens Axboe <axboe@kernel.dk>
-[backport]
-Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
----
- fs/io-wq.c | 156 +++++++++++++++++++++++------------------------------
- 1 file changed, 68 insertions(+), 88 deletions(-)
-
-diff --git a/fs/io-wq.c b/fs/io-wq.c
-index 33678185f3bc..2496d8781ea1 100644
---- a/fs/io-wq.c
-+++ b/fs/io-wq.c
-@@ -34,7 +34,7 @@ enum {
- };
- 
- enum {
--      IO_WQE_FLAG_STALLED     = 1,    /* stalled on hash */
-+      IO_ACCT_STALLED_BIT     = 0,    /* stalled on hash */
- };
- 
- /*
-@@ -73,25 +73,24 @@ struct io_wqe_acct {
-       unsigned max_workers;
-       int index;
-       atomic_t nr_running;
-+      struct io_wq_work_list work_list;
-+      unsigned long flags;
- };
- 
- enum {
-       IO_WQ_ACCT_BOUND,
-       IO_WQ_ACCT_UNBOUND,
-+      IO_WQ_ACCT_NR,
- };
- 
- /*
-  * Per-node worker thread pool
-  */
- struct io_wqe {
--      struct {
--              raw_spinlock_t lock;
--              struct io_wq_work_list work_list;
--              unsigned flags;
--      } ____cacheline_aligned_in_smp;
-+      raw_spinlock_t lock;
-+      struct io_wqe_acct acct[2];
- 
-       int node;
--      struct io_wqe_acct acct[2];
- 
-       struct hlist_nulls_head free_list;
-       struct list_head all_list;
-@@ -196,11 +195,10 @@ static void io_worker_exit(struct io_worker *worker)
-       do_exit(0);
- }
- 
--static inline bool io_wqe_run_queue(struct io_wqe *wqe)
--      __must_hold(wqe->lock)
-+static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
- {
--      if (!wq_list_empty(&wqe->work_list) &&
--          !(wqe->flags & IO_WQE_FLAG_STALLED))
-+      if (!wq_list_empty(&acct->work_list) &&
-+          !test_bit(IO_ACCT_STALLED_BIT, &acct->flags))
-               return true;
-       return false;
- }
-@@ -209,7 +207,8 @@ static inline bool io_wqe_run_queue(struct io_wqe *wqe)
-  * Check head of free list for an available worker. If one isn't available,
-  * caller must create one.
-  */
--static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
-+static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
-+                                      struct io_wqe_acct *acct)
-       __must_hold(RCU)
- {
-       struct hlist_nulls_node *n;
-@@ -223,6 +222,10 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
-       hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
-               if (!io_worker_get(worker))
-                       continue;
-+              if (io_wqe_get_acct(worker) != acct) {
-+                      io_worker_release(worker);
-+                      continue;
-+              }
-               if (wake_up_process(worker->task)) {
-                       io_worker_release(worker);
-                       return true;
-@@ -341,7 +344,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
-       if (!(worker->flags & IO_WORKER_F_UP))
-               return;
- 
--      if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) {
-+      if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
-               atomic_inc(&acct->nr_running);
-               atomic_inc(&wqe->wq->worker_refs);
-               io_queue_worker_create(wqe, worker, acct);
-@@ -356,29 +359,10 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
-                            struct io_wq_work *work)
-       __must_hold(wqe->lock)
- {
--      bool worker_bound, work_bound;
--
--      BUILD_BUG_ON((IO_WQ_ACCT_UNBOUND ^ IO_WQ_ACCT_BOUND) != 1);
--
-       if (worker->flags & IO_WORKER_F_FREE) {
-               worker->flags &= ~IO_WORKER_F_FREE;
-               hlist_nulls_del_init_rcu(&worker->nulls_node);
-       }
--
--      /*
--       * If worker is moving from bound to unbound (or vice versa), then
--       * ensure we update the running accounting.
--       */
--      worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
--      work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
--      if (worker_bound != work_bound) {
--              int index = work_bound ? IO_WQ_ACCT_UNBOUND : IO_WQ_ACCT_BOUND;
--              io_wqe_dec_running(worker);
--              worker->flags ^= IO_WORKER_F_BOUND;
--              wqe->acct[index].nr_workers--;
--              wqe->acct[index ^ 1].nr_workers++;
--              io_wqe_inc_running(worker);
--       }
- }
- 
- /*
-@@ -417,44 +401,23 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
-       spin_unlock(&wq->hash->wait.lock);
- }
- 
--/*
-- * We can always run the work if the worker is currently the same type as
-- * the work (eg both are bound, or both are unbound). If they are not the
-- * same, only allow it if incrementing the worker count would be allowed.
-- */
--static bool io_worker_can_run_work(struct io_worker *worker,
--                                 struct io_wq_work *work)
--{
--      struct io_wqe_acct *acct;
--
--      if (!(worker->flags & IO_WORKER_F_BOUND) !=
--          !(work->flags & IO_WQ_WORK_UNBOUND))
--              return true;
--
--      /* not the same type, check if we'd go over the limit */
--      acct = io_work_get_acct(worker->wqe, work);
--      return acct->nr_workers < acct->max_workers;
--}
--
--static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
-+static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
-                                          struct io_worker *worker)
-       __must_hold(wqe->lock)
- {
-       struct io_wq_work_node *node, *prev;
-       struct io_wq_work *work, *tail;
-       unsigned int stall_hash = -1U;
-+      struct io_wqe *wqe = worker->wqe;
- 
--      wq_list_for_each(node, prev, &wqe->work_list) {
-+      wq_list_for_each(node, prev, &acct->work_list) {
-               unsigned int hash;
- 
-               work = container_of(node, struct io_wq_work, list);
- 
--              if (!io_worker_can_run_work(worker, work))
--                      break;
--
-               /* not hashed, can run anytime */
-               if (!io_wq_is_hashed(work)) {
--                      wq_list_del(&wqe->work_list, node, prev);
-+                      wq_list_del(&acct->work_list, node, prev);
-                       return work;
-               }
- 
-@@ -465,7 +428,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
-               /* hashed, can run if not already running */
-               if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
-                       wqe->hash_tail[hash] = NULL;
--                      wq_list_cut(&wqe->work_list, &tail->list, prev);
-+                      wq_list_cut(&acct->work_list, &tail->list, prev);
-                       return work;
-               }
-               if (stall_hash == -1U)
-@@ -479,7 +442,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
-                * Set this before dropping the lock to avoid racing with new
-                * work being added and clearing the stalled bit.
-                */
--              wqe->flags |= IO_WQE_FLAG_STALLED;
-+              set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
-               raw_spin_unlock(&wqe->lock);
-               io_wait_on_hash(wqe, stall_hash);
-               raw_spin_lock(&wqe->lock);
-@@ -516,6 +479,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
- static void io_worker_handle_work(struct io_worker *worker)
-       __releases(wqe->lock)
- {
-+      struct io_wqe_acct *acct = io_wqe_get_acct(worker);
-       struct io_wqe *wqe = worker->wqe;
-       struct io_wq *wq = wqe->wq;
-       bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state);
-@@ -530,7 +494,7 @@ static void io_worker_handle_work(struct io_worker *worker)
-                * can't make progress, any work completion or insertion will
-                * clear the stalled flag.
-                */
--              work = io_get_next_work(wqe, worker);
-+              work = io_get_next_work(acct, worker);
-               if (work)
-                       __io_worker_busy(wqe, worker, work);
- 
-@@ -564,10 +528,10 @@ static void io_worker_handle_work(struct io_worker *worker)
- 
-                       if (hash != -1U && !next_hashed) {
-                               clear_bit(hash, &wq->hash->map);
-+                              clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
-                               if (wq_has_sleeper(&wq->hash->wait))
-                                       wake_up(&wq->hash->wait);
-                               raw_spin_lock_irq(&wqe->lock);
--                              wqe->flags &= ~IO_WQE_FLAG_STALLED;
-                               /* skip unnecessary unlock-lock wqe->lock */
-                               if (!work)
-                                       goto get_next;
-@@ -582,6 +546,7 @@ static void io_worker_handle_work(struct io_worker *worker)
- static int io_wqe_worker(void *data)
- {
-       struct io_worker *worker = data;
-+      struct io_wqe_acct *acct = io_wqe_get_acct(worker);
-       struct io_wqe *wqe = worker->wqe;
-       struct io_wq *wq = wqe->wq;
-       char buf[TASK_COMM_LEN];
-@@ -597,7 +562,7 @@ static int io_wqe_worker(void *data)
-               set_current_state(TASK_INTERRUPTIBLE);
- loop:
-               raw_spin_lock_irq(&wqe->lock);
--              if (io_wqe_run_queue(wqe)) {
-+              if (io_acct_run_queue(acct)) {
-                       io_worker_handle_work(worker);
-                       goto loop;
-               }
-@@ -623,7 +588,7 @@ static int io_wqe_worker(void *data)
- 
-       if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
-               raw_spin_lock_irq(&wqe->lock);
--              if (!wq_list_empty(&wqe->work_list))
-+              if (!wq_list_empty(&acct->work_list))
-                       io_worker_handle_work(worker);
-               else
-                       raw_spin_unlock_irq(&wqe->lock);
-@@ -769,12 +734,13 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
- 
- static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
- {
-+      struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
-       unsigned int hash;
-       struct io_wq_work *tail;
- 
-       if (!io_wq_is_hashed(work)) {
- append:
--              wq_list_add_tail(&work->list, &wqe->work_list);
-+              wq_list_add_tail(&work->list, &acct->work_list);
-               return;
-       }
- 
-@@ -784,7 +750,7 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
-       if (!tail)
-               goto append;
- 
--      wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
-+      wq_list_add_after(&work->list, &tail->list, &acct->work_list);
- }
- 
- static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
-@@ -806,10 +772,10 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
- 
-       raw_spin_lock_irqsave(&wqe->lock, flags);
-       io_wqe_insert_work(wqe, work);
--      wqe->flags &= ~IO_WQE_FLAG_STALLED;
-+      clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
- 
-       rcu_read_lock();
--      do_create = !io_wqe_activate_free_worker(wqe);
-+      do_create = !io_wqe_activate_free_worker(wqe, acct);
-       rcu_read_unlock();
- 
-       raw_spin_unlock_irqrestore(&wqe->lock, flags);
-@@ -862,6 +828,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
-                                        struct io_wq_work *work,
-                                        struct io_wq_work_node *prev)
- {
-+      struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
-       unsigned int hash = io_get_work_hash(work);
-       struct io_wq_work *prev_work = NULL;
- 
-@@ -873,7 +840,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
-               else
-                       wqe->hash_tail[hash] = NULL;
-       }
--      wq_list_del(&wqe->work_list, &work->list, prev);
-+      wq_list_del(&acct->work_list, &work->list, prev);
- }
- 
- static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
-@@ -882,22 +849,27 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
-       struct io_wq_work_node *node, *prev;
-       struct io_wq_work *work;
-       unsigned long flags;
-+      int i;
- 
- retry:
-       raw_spin_lock_irqsave(&wqe->lock, flags);
--      wq_list_for_each(node, prev, &wqe->work_list) {
--              work = container_of(node, struct io_wq_work, list);
--              if (!match->fn(work, match->data))
--                      continue;
--              io_wqe_remove_pending(wqe, work, prev);
--              raw_spin_unlock_irqrestore(&wqe->lock, flags);
--              io_run_cancel(work, wqe);
--              match->nr_pending++;
--              if (!match->cancel_all)
--                      return;
-+      for (i = 0; i < IO_WQ_ACCT_NR; i++) {
-+              struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
- 
--              /* not safe to continue after unlock */
--              goto retry;
-+              wq_list_for_each(node, prev, &acct->work_list) {
-+                      work = container_of(node, struct io_wq_work, list);
-+                      if (!match->fn(work, match->data))
-+                              continue;
-+                      io_wqe_remove_pending(wqe, work, prev);
-+                      raw_spin_unlock_irqrestore(&wqe->lock, flags);
-+                      io_run_cancel(work, wqe);
-+                      match->nr_pending++;
-+                      if (!match->cancel_all)
-+                              return;
-+
-+                      /* not safe to continue after unlock */
-+                      goto retry;
-+              }
-       }
-       raw_spin_unlock_irqrestore(&wqe->lock, flags);
- }
-@@ -958,18 +930,24 @@ static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
-                           int sync, void *key)
- {
-       struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
-+      int i;
- 
-       list_del_init(&wait->entry);
- 
-       rcu_read_lock();
--      io_wqe_activate_free_worker(wqe);
-+      for (i = 0; i < IO_WQ_ACCT_NR; i++) {
-+              struct io_wqe_acct *acct = &wqe->acct[i];
-+
-+              if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags))
-+                      io_wqe_activate_free_worker(wqe, acct);
-+      }
-       rcu_read_unlock();
-       return 1;
- }
- 
- struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
- {
--      int ret = -ENOMEM, node;
-+      int ret, node, i;
-       struct io_wq *wq;
- 
-       if (WARN_ON_ONCE(!data->free_work || !data->do_work))
-@@ -1006,18 +984,20 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
-                       goto err;
-               wq->wqes[node] = wqe;
-               wqe->node = alloc_node;
--              wqe->acct[IO_WQ_ACCT_BOUND].index = IO_WQ_ACCT_BOUND;
--              wqe->acct[IO_WQ_ACCT_UNBOUND].index = IO_WQ_ACCT_UNBOUND;
-               wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
--              atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
-               wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
-                                       task_rlimit(current, RLIMIT_NPROC);
--              atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
--              wqe->wait.func = io_wqe_hash_wake;
-               INIT_LIST_HEAD(&wqe->wait.entry);
-+              wqe->wait.func = io_wqe_hash_wake;
-+              for (i = 0; i < IO_WQ_ACCT_NR; i++) {
-+                      struct io_wqe_acct *acct = &wqe->acct[i];
-+
-+                      acct->index = i;
-+                      atomic_set(&acct->nr_running, 0);
-+                      INIT_WQ_LIST(&acct->work_list);
-+              }
-               wqe->wq = wq;
-               raw_spin_lock_init(&wqe->lock);
--              INIT_WQ_LIST(&wqe->work_list);
-               INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
-               INIT_LIST_HEAD(&wqe->all_list);
-       }
--- 
-2.30.2
-
author	Thomas Lamprecht <t.lamprecht@proxmox.com>
	Mon, 29 Nov 2021 11:07:43 +0000 (12:07 +0100)
committer	Thomas Lamprecht <t.lamprecht@proxmox.com>
	Mon, 29 Nov 2021 17:09:38 +0000 (18:09 +0100)
patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch		patch \| blob \| blame \| history
patches/kernel/0008-Revert-block-remove-the-ERESTARTSYS-handling-in-blkd.patch	[new file with mode: 0644]	patch \| blob
patches/kernel/0009-Revert-block-remove-the-ERESTARTSYS-handling-in-blkd.patch	[deleted file]	patch \| blob \| blame \| history
patches/kernel/0009-io-wq-fix-queue-stalling-race.patch	[new file with mode: 0644]	patch \| blob
patches/kernel/0010-io-wq-fix-queue-stalling-race.patch	[deleted file]	patch \| blob \| blame \| history
patches/kernel/0010-io-wq-split-bounded-and-unbounded-work-into-separate.patch	[new file with mode: 0644]	patch \| blob
patches/kernel/0011-io-wq-split-bounded-and-unbounded-work-into-separate.patch	[deleted file]	patch \| blob \| blame \| history