cifs: fix confusing unneeded warning message on smb2.1 and earlier

[mirror_ubuntu-jammy-kernel.git] / fs / io_uring.c
diff --git a/fs/io_uring.c b/fs/io_uring.c

index e68d27829bb21f4c4f1b8358fa2fd4e3bd2928fb..156c54ebb62b780a81ff4518a6f91951f5854fc3 100644 (file)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -456,6 +456,8 @@ struct io_ring_ctx {
                 struct work_struct              exit_work;
                 struct list_head                tctx_list;
                 struct completion               ref_comp;
+               u32                             iowq_limits[2];
+               bool                            iowq_limits_set;
         };
  };
  
@@ -1202,6 +1204,7 @@ static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
  
  static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
                           bool cancel_all)
+       __must_hold(&req->ctx->timeout_lock)
  {
         struct io_kiocb *req;
  
@@ -1217,6 +1220,44 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
         return false;
  }
  
+static bool io_match_linked(struct io_kiocb *head)
+{
+       struct io_kiocb *req;
+
+       io_for_each_link(req, head) {
+               if (req->flags & REQ_F_INFLIGHT)
+                       return true;
+       }
+       return false;
+}
+
+/*
+ * As io_match_task() but protected against racing with linked timeouts.
+ * User must not hold timeout_lock.
+ */
+static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
+                              bool cancel_all)
+{
+       bool matched;
+
+       if (task && head->task != task)
+               return false;
+       if (cancel_all)
+               return true;
+
+       if (head->flags & REQ_F_LINK_TIMEOUT) {
+               struct io_ring_ctx *ctx = head->ctx;
+
+               /* protect against races with linked timeouts */
+               spin_lock_irq(&ctx->timeout_lock);
+               matched = io_match_linked(head);
+               spin_unlock_irq(&ctx->timeout_lock);
+       } else {
+               matched = io_match_linked(head);
+       }
+       return matched;
+}
+
  static inline void req_set_fail(struct io_kiocb *req)
  {
         req->flags |= REQ_F_FAIL;
@@ -1368,11 +1409,6 @@ static void io_req_track_inflight(struct io_kiocb *req)
         }
  }
  
-static inline void io_unprep_linked_timeout(struct io_kiocb *req)
-{
-       req->flags &= ~REQ_F_LINK_TIMEOUT;
-}
-
  static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
  {
         if (WARN_ON_ONCE(!req->link))
@@ -1433,10 +1469,10 @@ static void io_prep_async_link(struct io_kiocb *req)
         if (req->flags & REQ_F_LINK_TIMEOUT) {
                 struct io_ring_ctx *ctx = req->ctx;
  
-               spin_lock(&ctx->completion_lock);
+               spin_lock_irq(&ctx->timeout_lock);
                 io_for_each_link(cur, req)
                         io_prep_async_work(cur);
-               spin_unlock(&ctx->completion_lock);
+               spin_unlock_irq(&ctx->timeout_lock);
         } else {
                 io_for_each_link(cur, req)
                         io_prep_async_work(cur);
@@ -1724,6 +1760,18 @@ static inline void io_get_task_refs(int nr)
                 io_task_refs_refill(tctx);
  }
  
+static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
+{
+       struct io_uring_task *tctx = task->io_uring;
+       unsigned int refs = tctx->cached_refs;
+
+       if (refs) {
+               tctx->cached_refs = 0;
+               percpu_counter_sub(&tctx->inflight, refs);
+               put_task_struct_many(task, refs);
+       }
+}
+
  static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
                                      long res, unsigned int cflags)
  {
@@ -2164,6 +2212,10 @@ static void tctx_task_work(struct callback_head *cb)
         }
  
         ctx_flush_and_put(ctx, &locked);
+
+       /* relaxed read is enough as only the task itself sets ->in_idle */
+       if (unlikely(atomic_read(&tctx->in_idle)))
+               io_uring_drop_tctx_refs(current);
  }
  
  static void io_req_task_work_add(struct io_kiocb *req)
@@ -2843,9 +2895,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                 req->flags |= REQ_F_ISREG;
  
         kiocb->ki_pos = READ_ONCE(sqe->off);
-       if (kiocb->ki_pos == -1 && !(file->f_mode & FMODE_STREAM)) {
-               req->flags |= REQ_F_CUR_POS;
-               kiocb->ki_pos = file->f_pos;
+       if (kiocb->ki_pos == -1) {
+               if (!(file->f_mode & FMODE_STREAM)) {
+                       req->flags |= REQ_F_CUR_POS;
+                       kiocb->ki_pos = file->f_pos;
+               } else {
+                       kiocb->ki_pos = 0;
+               }
         }
         kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
         kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
@@ -4307,6 +4363,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
                 kfree(nxt);
                 if (++i == nbufs)
                         return i;
+               cond_resched();
         }
         i++;
         kfree(buf);
@@ -4397,6 +4454,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
                 } else {
                         list_add_tail(&buf->list, &(*head)->list);
                 }
+               cond_resched();
         }
  
         return i ? i : -ENOMEM;
@@ -5705,7 +5763,7 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
  
                 list = &ctx->cancel_hash[i];
                 hlist_for_each_entry_safe(req, tmp, list, hash_node) {
-                       if (io_match_task(req, tsk, cancel_all))
+                       if (io_match_task_safe(req, tsk, cancel_all))
                                 posted += io_poll_remove_one(req);
                 }
         }
@@ -5884,6 +5942,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
          * update those. For multishot, if we're racing with completion, just
          * let completion re-add it.
          */
+       io_poll_remove_double(preq);
         completing = !__io_poll_remove_one(preq, &preq->poll, false);
         if (completing && (preq->poll.events & EPOLLONESHOT)) {
                 ret = -EALREADY;
@@ -6514,11 +6573,14 @@ static bool io_drain_req(struct io_kiocb *req)
         }
  
         /* Still need defer if there is pending req in defer list. */
+       spin_lock(&ctx->completion_lock);
         if (likely(list_empty_careful(&ctx->defer_list) &&
                 !(req->flags & REQ_F_IO_DRAIN))) {
+               spin_unlock(&ctx->completion_lock);
                 ctx->drain_active = false;
                 return false;
         }
+       spin_unlock(&ctx->completion_lock);
  
         seq = io_get_sequence(req);
         /* Still a chance to pass the sequence check */
@@ -6887,10 +6949,11 @@ static inline struct file *io_file_get(struct io_ring_ctx *ctx,
  static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
  {
         struct io_kiocb *prev = req->timeout.prev;
-       int ret;
+       int ret = -ENOENT;
  
         if (prev) {
-               ret = io_try_cancel_userdata(req, prev->user_data);
+               if (!(req->task->flags & PF_EXITING))
+                       ret = io_try_cancel_userdata(req, prev->user_data);
                 io_req_complete_post(req, ret ?: -ETIME, 0);
                 io_put_req(prev);
         } else {
@@ -6983,7 +7046,7 @@ issue_sqe:
                 switch (io_arm_poll_handler(req)) {
                 case IO_APOLL_READY:
                         if (linked_timeout)
-                               io_unprep_linked_timeout(req);
+                               io_queue_linked_timeout(linked_timeout);
                         goto issue_sqe;
                 case IO_APOLL_ABORTED:
                         /*
@@ -7531,7 +7594,7 @@ static int io_run_task_work_sig(void)
  /* when returns >0, the caller should retry */
  static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
                                           struct io_wait_queue *iowq,
-                                         signed long *timeout)
+                                         ktime_t timeout)
  {
         int ret;
  
@@ -7543,8 +7606,9 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
         if (test_bit(0, &ctx->check_cq_overflow))
                 return 1;
  
-       *timeout = schedule_timeout(*timeout);
-       return !*timeout ? -ETIME : 1;
+       if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS))
+               return -ETIME;
+       return 1;
  }
  
  /*
@@ -7557,7 +7621,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
  {
         struct io_wait_queue iowq;
         struct io_rings *rings = ctx->rings;
-       signed long timeout = MAX_SCHEDULE_TIMEOUT;
+       ktime_t timeout = KTIME_MAX;
         int ret;
  
         do {
@@ -7573,7 +7637,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
  
                 if (get_timespec64(&ts, uts))
                         return -EFAULT;
-               timeout = timespec64_to_jiffies(&ts);
+               timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
         }
  
         if (sig) {
@@ -7605,7 +7669,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                 }
                 prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
                                                 TASK_INTERRUPTIBLE);
-               ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
+               ret = io_cqring_wait_schedule(ctx, &iowq, timeout);
                 finish_wait(&ctx->cq_wait, &iowq.wq);
                 cond_resched();
         } while (ret > 0);
@@ -7659,10 +7723,15 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
         struct io_ring_ctx *ctx = node->rsrc_data->ctx;
         unsigned long flags;
         bool first_add = false;
+       unsigned long delay = HZ;
  
         spin_lock_irqsave(&ctx->rsrc_ref_lock, flags);
         node->done = true;
  
+       /* if we are mid-quiesce then do not delay */
+       if (node->rsrc_data->quiesce)
+               delay = 0;
+
         while (!list_empty(&ctx->rsrc_ref_list)) {
                 node = list_first_entry(&ctx->rsrc_ref_list,
                                             struct io_rsrc_node, node);
@@ -7675,7 +7744,7 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
         spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags);
  
         if (first_add)
-               mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ);
+               mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
  }
  
  static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
@@ -7753,7 +7822,15 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
                 ret = wait_for_completion_interruptible(&data->done);
                 if (!ret) {
                         mutex_lock(&ctx->uring_lock);
-                       break;
+                       if (atomic_read(&data->refs) > 0) {
+                               /*
+                                * it has been revived by another thread while
+                                * we were unlocked
+                                */
+                               mutex_unlock(&ctx->uring_lock);
+                       } else {
+                               break;
+                       }
                 }
  
                 atomic_inc(&data->refs);
@@ -8756,10 +8833,9 @@ static void io_mem_free(void *ptr)
  
  static void *io_mem_alloc(size_t size)
  {
-       gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP |
-                               __GFP_NORETRY | __GFP_ACCOUNT;
+       gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
  
-       return (void *) __get_free_pages(gfp_flags, get_order(size));
+       return (void *) __get_free_pages(gfp, get_order(size));
  }
  
  static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
@@ -9212,10 +9288,8 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
         struct io_buffer *buf;
         unsigned long index;
  
-       xa_for_each(&ctx->io_buffers, index, buf) {
+       xa_for_each(&ctx->io_buffers, index, buf)
                 __io_remove_buffers(ctx, buf, index, -1U);
-               cond_resched();
-       }
  }
  
  static void io_req_cache_free(struct list_head *list)
@@ -9520,19 +9594,8 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
  {
         struct io_kiocb *req = container_of(work, struct io_kiocb, work);
         struct io_task_cancel *cancel = data;
-       bool ret;
-
-       if (!cancel->all && (req->flags & REQ_F_LINK_TIMEOUT)) {
-               struct io_ring_ctx *ctx = req->ctx;
  
-               /* protect against races with linked timeouts */
-               spin_lock(&ctx->completion_lock);
-               ret = io_match_task(req, cancel->task, cancel->all);
-               spin_unlock(&ctx->completion_lock);
-       } else {
-               ret = io_match_task(req, cancel->task, cancel->all);
-       }
-       return ret;
+       return io_match_task_safe(req, cancel->task, cancel->all);
  }
  
  static bool io_cancel_defer_files(struct io_ring_ctx *ctx,
@@ -9543,7 +9606,7 @@ static bool io_cancel_defer_files(struct io_ring_ctx *ctx,
  
         spin_lock(&ctx->completion_lock);
         list_for_each_entry_reverse(de, &ctx->defer_list, list) {
-               if (io_match_task(de->req, task, cancel_all)) {
+               if (io_match_task_safe(de->req, task, cancel_all)) {
                         list_cut_position(&list, &ctx->defer_list, &de->list);
                         break;
                 }
@@ -9638,7 +9701,16 @@ static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx)
                 ret = io_uring_alloc_task_context(current, ctx);
                 if (unlikely(ret))
                         return ret;
+
                 tctx = current->io_uring;
+               if (ctx->iowq_limits_set) {
+                       unsigned int limits[2] = { ctx->iowq_limits[0],
+                                                  ctx->iowq_limits[1], };
+
+                       ret = io_wq_max_workers(tctx->io_wq, limits);
+                       if (ret)
+                               return ret;
+               }
         }
         if (!xa_load(&tctx->xa, (unsigned long)ctx)) {
                 node = kmalloc(sizeof(*node), GFP_KERNEL);
@@ -9727,21 +9799,9 @@ static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
         return percpu_counter_sum(&tctx->inflight);
  }
  
-static void io_uring_drop_tctx_refs(struct task_struct *task)
-{
-       struct io_uring_task *tctx = task->io_uring;
-       unsigned int refs = tctx->cached_refs;
-
-       if (refs) {
-               tctx->cached_refs = 0;
-               percpu_counter_sub(&tctx->inflight, refs);
-               put_task_struct_many(task, refs);
-       }
-}
-
  /*
   * Find any io_uring ctx that this task has registered or done IO on, and cancel
- * requests. @sqd should be not-null IIF it's an SQPOLL thread cancellation.
+ * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
   */
  static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
  {
@@ -9782,8 +9842,10 @@ static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
                                                              cancel_all);
                 }
  
-               prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
+               prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
+               io_run_task_work();
                 io_uring_drop_tctx_refs(current);
+
                 /*
                  * If we've seen completions, retry without waiting. This
                  * avoids a race where a completion comes in before we did
@@ -9793,10 +9855,14 @@ static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
                         schedule();
                 finish_wait(&tctx->wait, &wait);
         } while (1);
-       atomic_dec(&tctx->in_idle);
  
         io_uring_clean_tctx(tctx);
         if (cancel_all) {
+               /*
+                * We shouldn't run task_works after cancel, so just leave
+                * ->in_idle set for normal exit.
+                */
+               atomic_dec(&tctx->in_idle);
                 /* for exec all current's requests should be gone, kill tctx */
                 __io_uring_free(current);
         }
@@ -10643,7 +10709,9 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
  
  static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
                                         void __user *arg)
+       __must_hold(&ctx->uring_lock)
  {
+       struct io_tctx_node *node;
         struct io_uring_task *tctx = NULL;
         struct io_sq_data *sqd = NULL;
         __u32 new_count[2];
@@ -10674,13 +10742,21 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
                 tctx = current->io_uring;
         }
  
-       ret = -EINVAL;
-       if (!tctx || !tctx->io_wq)
-               goto err;
+       BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits));
  
-       ret = io_wq_max_workers(tctx->io_wq, new_count);
-       if (ret)
-               goto err;
+       for (i = 0; i < ARRAY_SIZE(new_count); i++)
+               if (new_count[i])
+                       ctx->iowq_limits[i] = new_count[i];
+       ctx->iowq_limits_set = true;
+
+       ret = -EINVAL;
+       if (tctx && tctx->io_wq) {
+               ret = io_wq_max_workers(tctx->io_wq, new_count);
+               if (ret)
+                       goto err;
+       } else {
+               memset(new_count, 0, sizeof(new_count));
+       }
  
         if (sqd) {
                 mutex_unlock(&sqd->lock);
@@ -10690,6 +10766,22 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
         if (copy_to_user(arg, new_count, sizeof(new_count)))
                 return -EFAULT;
  
+       /* that's it for SQPOLL, only the SQPOLL task creates requests */
+       if (sqd)
+               return 0;
+
+       /* now propagate the restriction to all registered users */
+       list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
+               struct io_uring_task *tctx = node->task->io_uring;
+
+               if (WARN_ON_ONCE(!tctx->io_wq))
+                       continue;
+
+               for (i = 0; i < ARRAY_SIZE(new_count); i++)
+                       new_count[i] = ctx->iowq_limits[i];
+               /* ignore errors, it always returns zero anyway */
+               (void)io_wq_max_workers(tctx->io_wq, new_count);
+       }
         return 0;
  err:
         if (sqd) {