update to Ubuntu-5.13.0-29.32

author Thomas Lamprecht <t.lamprecht@proxmox.com>

Mon, 31 Jan 2022 09:08:41 +0000 (10:08 +0100)

committer Thomas Lamprecht <t.lamprecht@proxmox.com>

Mon, 31 Jan 2022 09:08:41 +0000 (10:08 +0100)
author Thomas Lamprecht <t.lamprecht@proxmox.com>
Mon, 31 Jan 2022 09:08:41 +0000 (10:08 +0100)
committer Thomas Lamprecht <t.lamprecht@proxmox.com>
Mon, 31 Jan 2022 09:08:41 +0000 (10:08 +0100)
diff --git a/patches/kernel/0008-io-wq-split-bounded-and-unbounded-work-into-separate.patch b/patches/kernel/0008-io-wq-split-bounded-and-unbounded-work-into-separate.patch

new file mode 100644 (file)

index 0000000..2671f62
--- /dev/null
+++ b/patches/kernel/0008-io-wq-split-bounded-and-unbounded-work-into-separate.patch
@@ -0,0 +1,418 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 31 Aug 2021 13:57:32 -0600
+Subject: [PATCH] io-wq: split bounded and unbounded work into separate lists
+
+We've got a few issues that all boil down to the fact that we have one
+list of pending work items, yet two different types of workers to
+serve them. This causes some oddities around workers switching type and
+even hashed work vs regular work on the same bounded list.
+
+Just separate them out cleanly, similarly to how we already do
+accounting of what is running. That provides a clean separation and
+removes some corner cases that can cause stalls when handling IO
+that is punted to io-wq.
+
+Fixes: ecc53c48c13d ("io-wq: check max_worker limits if a worker transitions bound state")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[backport]
+Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ fs/io-wq.c | 160 +++++++++++++++++++++++------------------------------
+ 1 file changed, 70 insertions(+), 90 deletions(-)
+
+diff --git a/fs/io-wq.c b/fs/io-wq.c
+index ba7aaf2b95d0..6710da3d4445 100644
+--- a/fs/io-wq.c
++++ b/fs/io-wq.c
+@@ -34,7 +34,7 @@ enum {
+ };
+ 
+ enum {
+-      IO_WQE_FLAG_STALLED     = 1,    /* stalled on hash */
++      IO_ACCT_STALLED_BIT     = 0,    /* stalled on hash */
+ };
+ 
+ /*
+@@ -73,26 +73,25 @@ struct io_wqe_acct {
+       unsigned max_workers;
+       int index;
+       atomic_t nr_running;
++      struct io_wq_work_list work_list;
++      unsigned long flags;
+ };
+ 
+ enum {
+       IO_WQ_ACCT_BOUND,
+       IO_WQ_ACCT_UNBOUND,
++      IO_WQ_ACCT_NR,
+ };
+ 
+ /*
+  * Per-node worker thread pool
+  */
+ struct io_wqe {
+-      struct {
+-              raw_spinlock_t lock;
+-              struct io_wq_work_list work_list;
+-              unsigned flags;
+-      } ____cacheline_aligned_in_smp;
+-
+-      int node;
++      raw_spinlock_t lock;
+       struct io_wqe_acct acct[2];
+ 
++      int node;
++
+       struct hlist_nulls_head free_list;
+       struct list_head all_list;
+ 
+@@ -196,11 +195,10 @@ static void io_worker_exit(struct io_worker *worker)
+       do_exit(0);
+ }
+ 
+-static inline bool io_wqe_run_queue(struct io_wqe *wqe)
+-      __must_hold(wqe->lock)
++static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
+ {
+-      if (!wq_list_empty(&wqe->work_list) &&
+-          !(wqe->flags & IO_WQE_FLAG_STALLED))
++      if (!wq_list_empty(&acct->work_list) &&
++          !test_bit(IO_ACCT_STALLED_BIT, &acct->flags))
+               return true;
+       return false;
+ }
+@@ -209,7 +207,8 @@ static inline bool io_wqe_run_queue(struct io_wqe *wqe)
+  * Check head of free list for an available worker. If one isn't available,
+  * caller must create one.
+  */
+-static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
++static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
++                                      struct io_wqe_acct *acct)
+       __must_hold(RCU)
+ {
+       struct hlist_nulls_node *n;
+@@ -223,6 +222,10 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
+       hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
+               if (!io_worker_get(worker))
+                       continue;
++              if (io_wqe_get_acct(worker) != acct) {
++                      io_worker_release(worker);
++                      continue;
++              }
+               if (wake_up_process(worker->task)) {
+                       io_worker_release(worker);
+                       return true;
+@@ -341,7 +344,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
+       if (!(worker->flags & IO_WORKER_F_UP))
+               return;
+ 
+-      if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) {
++      if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
+               atomic_inc(&acct->nr_running);
+               atomic_inc(&wqe->wq->worker_refs);
+               io_queue_worker_create(wqe, worker, acct);
+@@ -356,29 +359,10 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
+                            struct io_wq_work *work)
+       __must_hold(wqe->lock)
+ {
+-      bool worker_bound, work_bound;
+-
+-      BUILD_BUG_ON((IO_WQ_ACCT_UNBOUND ^ IO_WQ_ACCT_BOUND) != 1);
+-
+       if (worker->flags & IO_WORKER_F_FREE) {
+               worker->flags &= ~IO_WORKER_F_FREE;
+               hlist_nulls_del_init_rcu(&worker->nulls_node);
+       }
+-
+-      /*
+-       * If worker is moving from bound to unbound (or vice versa), then
+-       * ensure we update the running accounting.
+-       */
+-      worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
+-      work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
+-      if (worker_bound != work_bound) {
+-              int index = work_bound ? IO_WQ_ACCT_UNBOUND : IO_WQ_ACCT_BOUND;
+-              io_wqe_dec_running(worker);
+-              worker->flags ^= IO_WORKER_F_BOUND;
+-              wqe->acct[index].nr_workers--;
+-              wqe->acct[index ^ 1].nr_workers++;
+-              io_wqe_inc_running(worker);
+-       }
+ }
+ 
+ /*
+@@ -420,44 +404,23 @@ static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
+       return ret;
+ }
+ 
+-/*
+- * We can always run the work if the worker is currently the same type as
+- * the work (eg both are bound, or both are unbound). If they are not the
+- * same, only allow it if incrementing the worker count would be allowed.
+- */
+-static bool io_worker_can_run_work(struct io_worker *worker,
+-                                 struct io_wq_work *work)
+-{
+-      struct io_wqe_acct *acct;
+-
+-      if (!(worker->flags & IO_WORKER_F_BOUND) !=
+-          !(work->flags & IO_WQ_WORK_UNBOUND))
+-              return true;
+-
+-      /* not the same type, check if we'd go over the limit */
+-      acct = io_work_get_acct(worker->wqe, work);
+-      return acct->nr_workers < acct->max_workers;
+-}
+-
+-static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
++static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
+                                          struct io_worker *worker)
+       __must_hold(wqe->lock)
+ {
+       struct io_wq_work_node *node, *prev;
+       struct io_wq_work *work, *tail;
+       unsigned int stall_hash = -1U;
++      struct io_wqe *wqe = worker->wqe;
+ 
+-      wq_list_for_each(node, prev, &wqe->work_list) {
++      wq_list_for_each(node, prev, &acct->work_list) {
+               unsigned int hash;
+ 
+               work = container_of(node, struct io_wq_work, list);
+ 
+-              if (!io_worker_can_run_work(worker, work))
+-                      break;
+-
+               /* not hashed, can run anytime */
+               if (!io_wq_is_hashed(work)) {
+-                      wq_list_del(&wqe->work_list, node, prev);
++                      wq_list_del(&acct->work_list, node, prev);
+                       return work;
+               }
+ 
+@@ -468,7 +431,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
+               /* hashed, can run if not already running */
+               if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
+                       wqe->hash_tail[hash] = NULL;
+-                      wq_list_cut(&wqe->work_list, &tail->list, prev);
++                      wq_list_cut(&acct->work_list, &tail->list, prev);
+                       return work;
+               }
+               if (stall_hash == -1U)
+@@ -484,12 +447,12 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
+                * Set this before dropping the lock to avoid racing with new
+                * work being added and clearing the stalled bit.
+                */
+-              wqe->flags |= IO_WQE_FLAG_STALLED;
++              set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+               raw_spin_unlock(&wqe->lock);
+               unstalled = io_wait_on_hash(wqe, stall_hash);
+               raw_spin_lock(&wqe->lock);
+               if (unstalled) {
+-                      wqe->flags &= ~IO_WQE_FLAG_STALLED;
++                      clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+                       if (wq_has_sleeper(&wqe->wq->hash->wait))
+                               wake_up(&wqe->wq->hash->wait);
+               }
+@@ -526,6 +489,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
+ static void io_worker_handle_work(struct io_worker *worker)
+       __releases(wqe->lock)
+ {
++      struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+       struct io_wqe *wqe = worker->wqe;
+       struct io_wq *wq = wqe->wq;
+       bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state);
+@@ -540,7 +504,7 @@ static void io_worker_handle_work(struct io_worker *worker)
+                * can't make progress, any work completion or insertion will
+                * clear the stalled flag.
+                */
+-              work = io_get_next_work(wqe, worker);
++              work = io_get_next_work(acct, worker);
+               if (work)
+                       __io_worker_busy(wqe, worker, work);
+ 
+@@ -576,7 +540,7 @@ static void io_worker_handle_work(struct io_worker *worker)
+                               /* serialize hash clear with wake_up() */
+                               spin_lock_irq(&wq->hash->wait.lock);
+                               clear_bit(hash, &wq->hash->map);
+-                              wqe->flags &= ~IO_WQE_FLAG_STALLED;
++                              clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+                               spin_unlock_irq(&wq->hash->wait.lock);
+                               if (wq_has_sleeper(&wq->hash->wait))
+                                       wake_up(&wq->hash->wait);
+@@ -595,6 +559,7 @@ static void io_worker_handle_work(struct io_worker *worker)
+ static int io_wqe_worker(void *data)
+ {
+       struct io_worker *worker = data;
++      struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+       struct io_wqe *wqe = worker->wqe;
+       struct io_wq *wq = wqe->wq;
+       char buf[TASK_COMM_LEN];
+@@ -610,7 +575,7 @@ static int io_wqe_worker(void *data)
+               set_current_state(TASK_INTERRUPTIBLE);
+ loop:
+               raw_spin_lock_irq(&wqe->lock);
+-              if (io_wqe_run_queue(wqe)) {
++              if (io_acct_run_queue(acct)) {
+                       io_worker_handle_work(worker);
+                       goto loop;
+               }
+@@ -636,7 +601,7 @@ static int io_wqe_worker(void *data)
+ 
+       if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
+               raw_spin_lock_irq(&wqe->lock);
+-              if (!wq_list_empty(&wqe->work_list))
++              if (!wq_list_empty(&acct->work_list))
+                       io_worker_handle_work(worker);
+               else
+                       raw_spin_unlock_irq(&wqe->lock);
+@@ -782,12 +747,13 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
+ 
+ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
+ {
++      struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+       unsigned int hash;
+       struct io_wq_work *tail;
+ 
+       if (!io_wq_is_hashed(work)) {
+ append:
+-              wq_list_add_tail(&work->list, &wqe->work_list);
++              wq_list_add_tail(&work->list, &acct->work_list);
+               return;
+       }
+ 
+@@ -797,7 +763,7 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
+       if (!tail)
+               goto append;
+ 
+-      wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
++      wq_list_add_after(&work->list, &tail->list, &acct->work_list);
+ }
+ 
+ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
+@@ -819,10 +785,10 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
+ 
+       raw_spin_lock_irqsave(&wqe->lock, flags);
+       io_wqe_insert_work(wqe, work);
+-      wqe->flags &= ~IO_WQE_FLAG_STALLED;
++      clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+ 
+       rcu_read_lock();
+-      do_create = !io_wqe_activate_free_worker(wqe);
++      do_create = !io_wqe_activate_free_worker(wqe, acct);
+       rcu_read_unlock();
+ 
+       raw_spin_unlock_irqrestore(&wqe->lock, flags);
+@@ -875,6 +841,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
+                                        struct io_wq_work *work,
+                                        struct io_wq_work_node *prev)
+ {
++      struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+       unsigned int hash = io_get_work_hash(work);
+       struct io_wq_work *prev_work = NULL;
+ 
+@@ -886,7 +853,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
+               else
+                       wqe->hash_tail[hash] = NULL;
+       }
+-      wq_list_del(&wqe->work_list, &work->list, prev);
++      wq_list_del(&acct->work_list, &work->list, prev);
+ }
+ 
+ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
+@@ -895,22 +862,27 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
+       struct io_wq_work_node *node, *prev;
+       struct io_wq_work *work;
+       unsigned long flags;
++      int i;
+ 
+ retry:
+       raw_spin_lock_irqsave(&wqe->lock, flags);
+-      wq_list_for_each(node, prev, &wqe->work_list) {
+-              work = container_of(node, struct io_wq_work, list);
+-              if (!match->fn(work, match->data))
+-                      continue;
+-              io_wqe_remove_pending(wqe, work, prev);
+-              raw_spin_unlock_irqrestore(&wqe->lock, flags);
+-              io_run_cancel(work, wqe);
+-              match->nr_pending++;
+-              if (!match->cancel_all)
+-                      return;
++      for (i = 0; i < IO_WQ_ACCT_NR; i++) {
++              struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
+ 
+-              /* not safe to continue after unlock */
+-              goto retry;
++              wq_list_for_each(node, prev, &acct->work_list) {
++                      work = container_of(node, struct io_wq_work, list);
++                      if (!match->fn(work, match->data))
++                              continue;
++                      io_wqe_remove_pending(wqe, work, prev);
++                      raw_spin_unlock_irqrestore(&wqe->lock, flags);
++                      io_run_cancel(work, wqe);
++                      match->nr_pending++;
++                      if (!match->cancel_all)
++                              return;
++
++                      /* not safe to continue after unlock */
++                      goto retry;
++              }
+       }
+       raw_spin_unlock_irqrestore(&wqe->lock, flags);
+ }
+@@ -971,18 +943,24 @@ static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
+                           int sync, void *key)
+ {
+       struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
++      int i;
+ 
+       list_del_init(&wait->entry);
+ 
+       rcu_read_lock();
+-      io_wqe_activate_free_worker(wqe);
++      for (i = 0; i < IO_WQ_ACCT_NR; i++) {
++              struct io_wqe_acct *acct = &wqe->acct[i];
++
++              if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags))
++                      io_wqe_activate_free_worker(wqe, acct);
++      }
+       rcu_read_unlock();
+       return 1;
+ }
+ 
+ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
+ {
+-      int ret = -ENOMEM, node;
++      int ret, node, i;
+       struct io_wq *wq;
+ 
+       if (WARN_ON_ONCE(!data->free_work || !data->do_work))
+@@ -1019,18 +997,20 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
+                       goto err;
+               wq->wqes[node] = wqe;
+               wqe->node = alloc_node;
+-              wqe->acct[IO_WQ_ACCT_BOUND].index = IO_WQ_ACCT_BOUND;
+-              wqe->acct[IO_WQ_ACCT_UNBOUND].index = IO_WQ_ACCT_UNBOUND;
+               wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
+-              atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
+               wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
+                                       task_rlimit(current, RLIMIT_NPROC);
+-              atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
+-              wqe->wait.func = io_wqe_hash_wake;
+               INIT_LIST_HEAD(&wqe->wait.entry);
++              wqe->wait.func = io_wqe_hash_wake;
++              for (i = 0; i < IO_WQ_ACCT_NR; i++) {
++                      struct io_wqe_acct *acct = &wqe->acct[i];
++
++                      acct->index = i;
++                      atomic_set(&acct->nr_running, 0);
++                      INIT_WQ_LIST(&acct->work_list);
++              }
+               wqe->wq = wq;
+               raw_spin_lock_init(&wqe->lock);
+-              INIT_WQ_LIST(&wqe->work_list);
+               INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
+               INIT_LIST_HEAD(&wqe->all_list);
+       }
diff --git a/patches/kernel/0009-do-not-generate-split-BTF-type-info-per-default.patch b/patches/kernel/0009-do-not-generate-split-BTF-type-info-per-default.patch

new file mode 100644 (file)

index 0000000..3f848c1
--- /dev/null
+++ b/patches/kernel/0009-do-not-generate-split-BTF-type-info-per-default.patch
@@ -0,0 +1,28 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
+Date: Thu, 14 Sep 2017 11:09:58 +0200
+Subject: [PATCH] do not generate split BTF type info per default
+
+This reverts commit a8ed1a0607cfa5478ff6009539f44790c4d0956d.
+
+It breaks ZFS sometimes:
+https://github.com/openzfs/zfs/issues/12301#issuecomment-873303739
+
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ lib/Kconfig.debug | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
+index 25dc20175bba..7ff51a3f65e6 100644
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -314,7 +314,7 @@ config PAHOLE_HAS_SPLIT_BTF
+       def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119")
+ 
+ config DEBUG_INFO_BTF_MODULES
+-      def_bool y
++      def_bool n
+       depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
+       help
+         Generate compact split BTF type information for kernel modules.
diff --git a/patches/kernel/0009-io-wq-split-bounded-and-unbounded-work-into-separate.patch b/patches/kernel/0009-io-wq-split-bounded-and-unbounded-work-into-separate.patch

deleted file mode 100644 (file)

index 2671f62..0000000
--- a/patches/kernel/0009-io-wq-split-bounded-and-unbounded-work-into-separate.patch
+++ /dev/null
@@ -1,418 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jens Axboe <axboe@kernel.dk>
-Date: Tue, 31 Aug 2021 13:57:32 -0600
-Subject: [PATCH] io-wq: split bounded and unbounded work into separate lists
-
-We've got a few issues that all boil down to the fact that we have one
-list of pending work items, yet two different types of workers to
-serve them. This causes some oddities around workers switching type and
-even hashed work vs regular work on the same bounded list.
-
-Just separate them out cleanly, similarly to how we already do
-accounting of what is running. That provides a clean separation and
-removes some corner cases that can cause stalls when handling IO
-that is punted to io-wq.
-
-Fixes: ecc53c48c13d ("io-wq: check max_worker limits if a worker transitions bound state")
-Signed-off-by: Jens Axboe <axboe@kernel.dk>
-[backport]
-Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- fs/io-wq.c | 160 +++++++++++++++++++++++------------------------------
- 1 file changed, 70 insertions(+), 90 deletions(-)
-
-diff --git a/fs/io-wq.c b/fs/io-wq.c
-index ba7aaf2b95d0..6710da3d4445 100644
---- a/fs/io-wq.c
-+++ b/fs/io-wq.c
-@@ -34,7 +34,7 @@ enum {
- };
- 
- enum {
--      IO_WQE_FLAG_STALLED     = 1,    /* stalled on hash */
-+      IO_ACCT_STALLED_BIT     = 0,    /* stalled on hash */
- };
- 
- /*
-@@ -73,26 +73,25 @@ struct io_wqe_acct {
-       unsigned max_workers;
-       int index;
-       atomic_t nr_running;
-+      struct io_wq_work_list work_list;
-+      unsigned long flags;
- };
- 
- enum {
-       IO_WQ_ACCT_BOUND,
-       IO_WQ_ACCT_UNBOUND,
-+      IO_WQ_ACCT_NR,
- };
- 
- /*
-  * Per-node worker thread pool
-  */
- struct io_wqe {
--      struct {
--              raw_spinlock_t lock;
--              struct io_wq_work_list work_list;
--              unsigned flags;
--      } ____cacheline_aligned_in_smp;
--
--      int node;
-+      raw_spinlock_t lock;
-       struct io_wqe_acct acct[2];
- 
-+      int node;
-+
-       struct hlist_nulls_head free_list;
-       struct list_head all_list;
- 
-@@ -196,11 +195,10 @@ static void io_worker_exit(struct io_worker *worker)
-       do_exit(0);
- }
- 
--static inline bool io_wqe_run_queue(struct io_wqe *wqe)
--      __must_hold(wqe->lock)
-+static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
- {
--      if (!wq_list_empty(&wqe->work_list) &&
--          !(wqe->flags & IO_WQE_FLAG_STALLED))
-+      if (!wq_list_empty(&acct->work_list) &&
-+          !test_bit(IO_ACCT_STALLED_BIT, &acct->flags))
-               return true;
-       return false;
- }
-@@ -209,7 +207,8 @@ static inline bool io_wqe_run_queue(struct io_wqe *wqe)
-  * Check head of free list for an available worker. If one isn't available,
-  * caller must create one.
-  */
--static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
-+static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
-+                                      struct io_wqe_acct *acct)
-       __must_hold(RCU)
- {
-       struct hlist_nulls_node *n;
-@@ -223,6 +222,10 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
-       hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
-               if (!io_worker_get(worker))
-                       continue;
-+              if (io_wqe_get_acct(worker) != acct) {
-+                      io_worker_release(worker);
-+                      continue;
-+              }
-               if (wake_up_process(worker->task)) {
-                       io_worker_release(worker);
-                       return true;
-@@ -341,7 +344,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
-       if (!(worker->flags & IO_WORKER_F_UP))
-               return;
- 
--      if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) {
-+      if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
-               atomic_inc(&acct->nr_running);
-               atomic_inc(&wqe->wq->worker_refs);
-               io_queue_worker_create(wqe, worker, acct);
-@@ -356,29 +359,10 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
-                            struct io_wq_work *work)
-       __must_hold(wqe->lock)
- {
--      bool worker_bound, work_bound;
--
--      BUILD_BUG_ON((IO_WQ_ACCT_UNBOUND ^ IO_WQ_ACCT_BOUND) != 1);
--
-       if (worker->flags & IO_WORKER_F_FREE) {
-               worker->flags &= ~IO_WORKER_F_FREE;
-               hlist_nulls_del_init_rcu(&worker->nulls_node);
-       }
--
--      /*
--       * If worker is moving from bound to unbound (or vice versa), then
--       * ensure we update the running accounting.
--       */
--      worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
--      work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
--      if (worker_bound != work_bound) {
--              int index = work_bound ? IO_WQ_ACCT_UNBOUND : IO_WQ_ACCT_BOUND;
--              io_wqe_dec_running(worker);
--              worker->flags ^= IO_WORKER_F_BOUND;
--              wqe->acct[index].nr_workers--;
--              wqe->acct[index ^ 1].nr_workers++;
--              io_wqe_inc_running(worker);
--       }
- }
- 
- /*
-@@ -420,44 +404,23 @@ static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
-       return ret;
- }
- 
--/*
-- * We can always run the work if the worker is currently the same type as
-- * the work (eg both are bound, or both are unbound). If they are not the
-- * same, only allow it if incrementing the worker count would be allowed.
-- */
--static bool io_worker_can_run_work(struct io_worker *worker,
--                                 struct io_wq_work *work)
--{
--      struct io_wqe_acct *acct;
--
--      if (!(worker->flags & IO_WORKER_F_BOUND) !=
--          !(work->flags & IO_WQ_WORK_UNBOUND))
--              return true;
--
--      /* not the same type, check if we'd go over the limit */
--      acct = io_work_get_acct(worker->wqe, work);
--      return acct->nr_workers < acct->max_workers;
--}
--
--static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
-+static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
-                                          struct io_worker *worker)
-       __must_hold(wqe->lock)
- {
-       struct io_wq_work_node *node, *prev;
-       struct io_wq_work *work, *tail;
-       unsigned int stall_hash = -1U;
-+      struct io_wqe *wqe = worker->wqe;
- 
--      wq_list_for_each(node, prev, &wqe->work_list) {
-+      wq_list_for_each(node, prev, &acct->work_list) {
-               unsigned int hash;
- 
-               work = container_of(node, struct io_wq_work, list);
- 
--              if (!io_worker_can_run_work(worker, work))
--                      break;
--
-               /* not hashed, can run anytime */
-               if (!io_wq_is_hashed(work)) {
--                      wq_list_del(&wqe->work_list, node, prev);
-+                      wq_list_del(&acct->work_list, node, prev);
-                       return work;
-               }
- 
-@@ -468,7 +431,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
-               /* hashed, can run if not already running */
-               if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
-                       wqe->hash_tail[hash] = NULL;
--                      wq_list_cut(&wqe->work_list, &tail->list, prev);
-+                      wq_list_cut(&acct->work_list, &tail->list, prev);
-                       return work;
-               }
-               if (stall_hash == -1U)
-@@ -484,12 +447,12 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
-                * Set this before dropping the lock to avoid racing with new
-                * work being added and clearing the stalled bit.
-                */
--              wqe->flags |= IO_WQE_FLAG_STALLED;
-+              set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
-               raw_spin_unlock(&wqe->lock);
-               unstalled = io_wait_on_hash(wqe, stall_hash);
-               raw_spin_lock(&wqe->lock);
-               if (unstalled) {
--                      wqe->flags &= ~IO_WQE_FLAG_STALLED;
-+                      clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
-                       if (wq_has_sleeper(&wqe->wq->hash->wait))
-                               wake_up(&wqe->wq->hash->wait);
-               }
-@@ -526,6 +489,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
- static void io_worker_handle_work(struct io_worker *worker)
-       __releases(wqe->lock)
- {
-+      struct io_wqe_acct *acct = io_wqe_get_acct(worker);
-       struct io_wqe *wqe = worker->wqe;
-       struct io_wq *wq = wqe->wq;
-       bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state);
-@@ -540,7 +504,7 @@ static void io_worker_handle_work(struct io_worker *worker)
-                * can't make progress, any work completion or insertion will
-                * clear the stalled flag.
-                */
--              work = io_get_next_work(wqe, worker);
-+              work = io_get_next_work(acct, worker);
-               if (work)
-                       __io_worker_busy(wqe, worker, work);
- 
-@@ -576,7 +540,7 @@ static void io_worker_handle_work(struct io_worker *worker)
-                               /* serialize hash clear with wake_up() */
-                               spin_lock_irq(&wq->hash->wait.lock);
-                               clear_bit(hash, &wq->hash->map);
--                              wqe->flags &= ~IO_WQE_FLAG_STALLED;
-+                              clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
-                               spin_unlock_irq(&wq->hash->wait.lock);
-                               if (wq_has_sleeper(&wq->hash->wait))
-                                       wake_up(&wq->hash->wait);
-@@ -595,6 +559,7 @@ static void io_worker_handle_work(struct io_worker *worker)
- static int io_wqe_worker(void *data)
- {
-       struct io_worker *worker = data;
-+      struct io_wqe_acct *acct = io_wqe_get_acct(worker);
-       struct io_wqe *wqe = worker->wqe;
-       struct io_wq *wq = wqe->wq;
-       char buf[TASK_COMM_LEN];
-@@ -610,7 +575,7 @@ static int io_wqe_worker(void *data)
-               set_current_state(TASK_INTERRUPTIBLE);
- loop:
-               raw_spin_lock_irq(&wqe->lock);
--              if (io_wqe_run_queue(wqe)) {
-+              if (io_acct_run_queue(acct)) {
-                       io_worker_handle_work(worker);
-                       goto loop;
-               }
-@@ -636,7 +601,7 @@ static int io_wqe_worker(void *data)
- 
-       if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
-               raw_spin_lock_irq(&wqe->lock);
--              if (!wq_list_empty(&wqe->work_list))
-+              if (!wq_list_empty(&acct->work_list))
-                       io_worker_handle_work(worker);
-               else
-                       raw_spin_unlock_irq(&wqe->lock);
-@@ -782,12 +747,13 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
- 
- static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
- {
-+      struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
-       unsigned int hash;
-       struct io_wq_work *tail;
- 
-       if (!io_wq_is_hashed(work)) {
- append:
--              wq_list_add_tail(&work->list, &wqe->work_list);
-+              wq_list_add_tail(&work->list, &acct->work_list);
-               return;
-       }
- 
-@@ -797,7 +763,7 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
-       if (!tail)
-               goto append;
- 
--      wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
-+      wq_list_add_after(&work->list, &tail->list, &acct->work_list);
- }
- 
- static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
-@@ -819,10 +785,10 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
- 
-       raw_spin_lock_irqsave(&wqe->lock, flags);
-       io_wqe_insert_work(wqe, work);
--      wqe->flags &= ~IO_WQE_FLAG_STALLED;
-+      clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
- 
-       rcu_read_lock();
--      do_create = !io_wqe_activate_free_worker(wqe);
-+      do_create = !io_wqe_activate_free_worker(wqe, acct);
-       rcu_read_unlock();
- 
-       raw_spin_unlock_irqrestore(&wqe->lock, flags);
-@@ -875,6 +841,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
-                                        struct io_wq_work *work,
-                                        struct io_wq_work_node *prev)
- {
-+      struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
-       unsigned int hash = io_get_work_hash(work);
-       struct io_wq_work *prev_work = NULL;
- 
-@@ -886,7 +853,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
-               else
-                       wqe->hash_tail[hash] = NULL;
-       }
--      wq_list_del(&wqe->work_list, &work->list, prev);
-+      wq_list_del(&acct->work_list, &work->list, prev);
- }
- 
- static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
-@@ -895,22 +862,27 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
-       struct io_wq_work_node *node, *prev;
-       struct io_wq_work *work;
-       unsigned long flags;
-+      int i;
- 
- retry:
-       raw_spin_lock_irqsave(&wqe->lock, flags);
--      wq_list_for_each(node, prev, &wqe->work_list) {
--              work = container_of(node, struct io_wq_work, list);
--              if (!match->fn(work, match->data))
--                      continue;
--              io_wqe_remove_pending(wqe, work, prev);
--              raw_spin_unlock_irqrestore(&wqe->lock, flags);
--              io_run_cancel(work, wqe);
--              match->nr_pending++;
--              if (!match->cancel_all)
--                      return;
-+      for (i = 0; i < IO_WQ_ACCT_NR; i++) {
-+              struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
- 
--              /* not safe to continue after unlock */
--              goto retry;
-+              wq_list_for_each(node, prev, &acct->work_list) {
-+                      work = container_of(node, struct io_wq_work, list);
-+                      if (!match->fn(work, match->data))
-+                              continue;
-+                      io_wqe_remove_pending(wqe, work, prev);
-+                      raw_spin_unlock_irqrestore(&wqe->lock, flags);
-+                      io_run_cancel(work, wqe);
-+                      match->nr_pending++;
-+                      if (!match->cancel_all)
-+                              return;
-+
-+                      /* not safe to continue after unlock */
-+                      goto retry;
-+              }
-       }
-       raw_spin_unlock_irqrestore(&wqe->lock, flags);
- }
-@@ -971,18 +943,24 @@ static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
-                           int sync, void *key)
- {
-       struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
-+      int i;
- 
-       list_del_init(&wait->entry);
- 
-       rcu_read_lock();
--      io_wqe_activate_free_worker(wqe);
-+      for (i = 0; i < IO_WQ_ACCT_NR; i++) {
-+              struct io_wqe_acct *acct = &wqe->acct[i];
-+
-+              if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags))
-+                      io_wqe_activate_free_worker(wqe, acct);
-+      }
-       rcu_read_unlock();
-       return 1;
- }
- 
- struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
- {
--      int ret = -ENOMEM, node;
-+      int ret, node, i;
-       struct io_wq *wq;
- 
-       if (WARN_ON_ONCE(!data->free_work || !data->do_work))
-@@ -1019,18 +997,20 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
-                       goto err;
-               wq->wqes[node] = wqe;
-               wqe->node = alloc_node;
--              wqe->acct[IO_WQ_ACCT_BOUND].index = IO_WQ_ACCT_BOUND;
--              wqe->acct[IO_WQ_ACCT_UNBOUND].index = IO_WQ_ACCT_UNBOUND;
-               wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
--              atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
-               wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
-                                       task_rlimit(current, RLIMIT_NPROC);
--              atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
--              wqe->wait.func = io_wqe_hash_wake;
-               INIT_LIST_HEAD(&wqe->wait.entry);
-+              wqe->wait.func = io_wqe_hash_wake;
-+              for (i = 0; i < IO_WQ_ACCT_NR; i++) {
-+                      struct io_wqe_acct *acct = &wqe->acct[i];
-+
-+                      acct->index = i;
-+                      atomic_set(&acct->nr_running, 0);
-+                      INIT_WQ_LIST(&acct->work_list);
-+              }
-               wqe->wq = wq;
-               raw_spin_lock_init(&wqe->lock);
--              INIT_WQ_LIST(&wqe->work_list);
-               INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
-               INIT_LIST_HEAD(&wqe->all_list);
-       }
diff --git a/patches/kernel/0011-disable-split-btf.patch b/patches/kernel/0011-disable-split-btf.patch

deleted file mode 100644 (file)

index 6b1d55c..0000000
--- a/patches/kernel/0011-disable-split-btf.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
-Date: Thu, 14 Sep 2017 11:09:58 +0200
-Subject: do not generate split BTF type info per default
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This reverts commit a8ed1a0607cfa5478ff6009539f44790c4d0956d.
-
-It breaks ZFS sometimes:
-https://github.com/openzfs/zfs/issues/12301#issuecomment-873303739
-
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
-diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
-index 25dc20175bba..7ff51a3f65e6 100644
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -314,7 +314,7 @@ config PAHOLE_HAS_SPLIT_BTF
-       def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119")
- 
- config DEBUG_INFO_BTF_MODULES
--      def_bool y
-+      def_bool n
-       depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
-       help
-         Generate compact split BTF type information for kernel modules.
diff --git a/patches/kernel/0011-vfs-fix.patch b/patches/kernel/0011-vfs-fix.patch

deleted file mode 100644 (file)

index dc95794..0000000
--- a/patches/kernel/0011-vfs-fix.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jamie Hill-Daniel <jamie@hill-daniel.co.uk>
-Date: Tue, 18 Jan 2022 08:06:04 +0100
-Subject: vfs: fs_context: fix up param length parsing in legacy_parse_param
-
-commit 722d94847de29310e8aa03fcbdb41fc92c521756 upstream.
-
-The "PAGE_SIZE - 2 - size" calculation in legacy_parse_param() is an
-unsigned type so a large value of "size" results in a high positive
-value instead of a negative value as expected.  Fix this by getting rid
-of the subtraction.
-
-Signed-off-by: Jamie Hill-Daniel <jamie@hill-daniel.co.uk>
-Signed-off-by: William Liu <willsroot@protonmail.com>
-Tested-by: Salvatore Bonaccorso <carnil@debian.org>
-Tested-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
-Acked-by: Dan Carpenter <dan.carpenter@oracle.com>
-Acked-by: Al Viro <viro@zeniv.linux.org.uk>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- fs/fs_context.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/fs/fs_context.c b/fs/fs_context.c
-index b7e43a780a625..24ce12f0db32e 100644
---- a/fs/fs_context.c
-+++ b/fs/fs_context.c
-@@ -548,7 +548,7 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
-                             param->key);
-       }
- 
--      if (len > PAGE_SIZE - 2 - size)
-+      if (size + len + 2 > PAGE_SIZE)
-               return invalf(fc, "VFS: Legacy: Cumulative options too large");
-       if (strchr(param->key, ',') ||
-           (param->type == fs_value_is_string &&
diff --git a/submodules/ubuntu-impish b/submodules/ubuntu-impish

index f71c3af206e5a967bf6058be2b4e6cc8df42bc29..ea234db8599f60b16db002b6b739e835c4204511 160000 (submodule)
--- a/submodules/ubuntu-impish
+++ b/submodules/ubuntu-impish
@@ -1 +1 @@
-Subproject commit f71c3af206e5a967bf6058be2b4e6cc8df42bc29
+Subproject commit ea234db8599f60b16db002b6b739e835c4204511
author	Thomas Lamprecht <t.lamprecht@proxmox.com>
	Mon, 31 Jan 2022 09:08:41 +0000 (10:08 +0100)
committer	Thomas Lamprecht <t.lamprecht@proxmox.com>
	Mon, 31 Jan 2022 09:08:41 +0000 (10:08 +0100)
patches/kernel/0008-io-wq-split-bounded-and-unbounded-work-into-separate.patch	[new file with mode: 0644]	patch \| blob
patches/kernel/0009-do-not-generate-split-BTF-type-info-per-default.patch	[new file with mode: 0644]	patch \| blob
patches/kernel/0009-io-wq-split-bounded-and-unbounded-work-into-separate.patch	[deleted file]	patch \| blob \| blame \| history
patches/kernel/0011-disable-split-btf.patch	[deleted file]	patch \| blob \| blame \| history
patches/kernel/0011-vfs-fix.patch	[deleted file]	patch \| blob \| blame \| history
submodules/ubuntu-impish		patch \| blob \| blame \| history