i40e: Fix sparse warning: missing error code 'err'

[mirror_ubuntu-hirsute-kernel.git] / fs / io_uring.c
diff --git a/fs/io_uring.c b/fs/io_uring.c

index 985a9e3f976d3bf1e8b6da3aebbb6495fe875dc9..373f810e5a4b41110b77b54860f9066ec9957b56 100644 (file)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -222,7 +222,7 @@ struct fixed_file_data {
  struct io_buffer {
         struct list_head list;
         __u64 addr;
-       __s32 len;
+       __u32 len;
         __u16 bid;
  };
  
@@ -411,7 +411,6 @@ struct io_poll_remove {
  
  struct io_close {
         struct file                     *file;
-       struct file                     *put_file;
         int                             fd;
  };
  
@@ -536,7 +535,7 @@ struct io_splice {
  struct io_provide_buf {
         struct file                     *file;
         __u64                           addr;
-       __s32                           len;
+       __u32                           len;
         __u32                           bgid;
         __u16                           nbufs;
         __u16                           bid;
@@ -857,7 +856,8 @@ static const struct io_op_def io_op_defs[] = {
                 .pollout                = 1,
                 .needs_async_data       = 1,
                 .async_size             = sizeof(struct io_async_msghdr),
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
+               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
+                                               IO_WQ_WORK_FS,
         },
         [IORING_OP_RECVMSG] = {
                 .needs_file             = 1,
@@ -866,7 +866,8 @@ static const struct io_op_def io_op_defs[] = {
                 .buffer_select          = 1,
                 .needs_async_data       = 1,
                 .async_size             = sizeof(struct io_async_msghdr),
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
+               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
+                                               IO_WQ_WORK_FS,
         },
         [IORING_OP_TIMEOUT] = {
                 .needs_async_data       = 1,
@@ -906,8 +907,6 @@ static const struct io_op_def io_op_defs[] = {
                                                 IO_WQ_WORK_FS | IO_WQ_WORK_MM,
         },
         [IORING_OP_CLOSE] = {
-               .needs_file             = 1,
-               .needs_file_no_error    = 1,
                 .work_flags             = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG,
         },
         [IORING_OP_FILES_UPDATE] = {
@@ -994,9 +993,9 @@ enum io_mem_account {
         ACCT_PINNED,
  };
  
-static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
-                                           struct task_struct *task);
-
+static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
+                                        struct task_struct *task,
+                                        struct files_struct *files);
  static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node);
  static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
                         struct io_ring_ctx *ctx);
@@ -1025,6 +1024,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
  static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
                              const struct iovec *fast_iov,
                              struct iov_iter *iter, bool force);
+static void io_req_drop_files(struct io_kiocb *req);
+static void io_req_task_queue(struct io_kiocb *req);
  
  static struct kmem_cache *req_cachep;
  
@@ -1048,8 +1049,7 @@ EXPORT_SYMBOL(io_uring_get_socket);
  
  static inline void io_clean_op(struct io_kiocb *req)
  {
-       if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED |
-                         REQ_F_INFLIGHT))
+       if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED))
                 __io_clean_op(req);
  }
  
@@ -1069,14 +1069,21 @@ static bool io_match_task(struct io_kiocb *head,
  {
         struct io_kiocb *req;
  
-       if (task && head->task != task)
+       if (task && head->task != task) {
+               /* in terms of cancelation, always match if req task is dead */
+               if (head->task->flags & PF_EXITING)
+                       return true;
                 return false;
+       }
         if (!files)
                 return true;
  
         io_for_each_link(req, head) {
-               if ((req->flags & REQ_F_WORK_INITIALIZED) &&
-                   (req->work.flags & IO_WQ_WORK_FILES) &&
+               if (!(req->flags & REQ_F_WORK_INITIALIZED))
+                       continue;
+               if (req->file && req->file->f_op == &io_uring_fops)
+                       return true;
+               if ((req->work.flags & IO_WQ_WORK_FILES) &&
                     req->work.identity->files == files)
                         return true;
         }
@@ -1394,6 +1401,8 @@ static void io_req_clean_work(struct io_kiocb *req)
                         free_fs_struct(fs);
                 req->work.flags &= ~IO_WQ_WORK_FS;
         }
+       if (req->flags & REQ_F_INFLIGHT)
+               io_req_drop_files(req);
  
         io_put_identity(req->task->io_uring, req);
  }
@@ -1503,11 +1512,14 @@ static bool io_grab_identity(struct io_kiocb *req)
                         return false;
                 atomic_inc(&id->files->count);
                 get_nsproxy(id->nsproxy);
-               req->flags |= REQ_F_INFLIGHT;
  
-               spin_lock_irq(&ctx->inflight_lock);
-               list_add(&req->inflight_entry, &ctx->inflight_list);
-               spin_unlock_irq(&ctx->inflight_lock);
+               if (!(req->flags & REQ_F_INFLIGHT)) {
+                       req->flags |= REQ_F_INFLIGHT;
+
+                       spin_lock_irq(&ctx->inflight_lock);
+                       list_add(&req->inflight_entry, &ctx->inflight_list);
+                       spin_unlock_irq(&ctx->inflight_lock);
+               }
                 req->work.flags |= IO_WQ_WORK_FILES;
         }
         if (!(req->work.flags & IO_WQ_WORK_MM) &&
@@ -1582,7 +1594,7 @@ static void io_queue_async_work(struct io_kiocb *req)
                 io_queue_linked_timeout(link);
  }
  
-static void io_kill_timeout(struct io_kiocb *req)
+static void io_kill_timeout(struct io_kiocb *req, int status)
  {
         struct io_timeout_data *io = req->async_data;
         int ret;
@@ -1592,7 +1604,7 @@ static void io_kill_timeout(struct io_kiocb *req)
                 atomic_set(&req->ctx->cq_timeouts,
                         atomic_read(&req->ctx->cq_timeouts) + 1);
                 list_del_init(&req->timeout.list);
-               io_cqring_fill_event(req, 0);
+               io_cqring_fill_event(req, status);
                 io_put_req_deferred(req, 1);
         }
  }
@@ -1609,7 +1621,7 @@ static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
         spin_lock_irq(&ctx->completion_lock);
         list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
                 if (io_match_task(req, tsk, files)) {
-                       io_kill_timeout(req);
+                       io_kill_timeout(req, -ECANCELED);
                         canceled++;
                 }
         }
@@ -1622,18 +1634,11 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
         do {
                 struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
                                                 struct io_defer_entry, list);
-               struct io_kiocb *link;
  
                 if (req_need_defer(de->req, de->seq))
                         break;
                 list_del_init(&de->list);
-               /* punt-init is done before queueing for defer */
-               link = __io_queue_async_work(de->req);
-               if (link) {
-                       __io_queue_linked_timeout(link);
-                       /* drop submission reference */
-                       io_put_req_deferred(link, 1);
-               }
+               io_req_task_queue(de->req);
                 kfree(de);
         } while (!list_empty(&ctx->defer_list));
  }
@@ -1668,7 +1673,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
                         break;
  
                 list_del_init(&req->timeout.list);
-               io_kill_timeout(req);
+               io_kill_timeout(req, 0);
         } while (!list_empty(&ctx->timeout_list));
  
         ctx->cq_last_tm_flush = seq;
@@ -1767,12 +1772,13 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
         struct io_kiocb *req, *tmp;
         struct io_uring_cqe *cqe;
         unsigned long flags;
-       bool all_flushed;
+       bool all_flushed, posted;
         LIST_HEAD(list);
  
         if (!force && __io_cqring_events(ctx) == rings->cq_ring_entries)
                 return false;
  
+       posted = false;
         spin_lock_irqsave(&ctx->completion_lock, flags);
         list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) {
                 if (!io_match_task(req, tsk, files))
@@ -1792,6 +1798,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
                         WRITE_ONCE(ctx->rings->cq_overflow,
                                    ctx->cached_cq_overflow);
                 }
+               posted = true;
         }
  
         all_flushed = list_empty(&ctx->cq_overflow_list);
@@ -1801,9 +1808,11 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
                 ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
         }
  
-       io_commit_cqring(ctx);
+       if (posted)
+               io_commit_cqring(ctx);
         spin_unlock_irqrestore(&ctx->completion_lock, flags);
-       io_cqring_ev_posted(ctx);
+       if (posted)
+               io_cqring_ev_posted(ctx);
  
         while (!list_empty(&list)) {
                 req = list_first_entry(&list, struct io_kiocb, compl.list);
@@ -1814,18 +1823,22 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
         return all_flushed;
  }
  
-static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
+static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
                                      struct task_struct *tsk,
                                      struct files_struct *files)
  {
+       bool ret = true;
+
         if (test_bit(0, &ctx->cq_check_overflow)) {
                 /* iopoll syncs against uring_lock, not completion_lock */
                 if (ctx->flags & IORING_SETUP_IOPOLL)
                         mutex_lock(&ctx->uring_lock);
-               __io_cqring_overflow_flush(ctx, force, tsk, files);
+               ret = __io_cqring_overflow_flush(ctx, force, tsk, files);
                 if (ctx->flags & IORING_SETUP_IOPOLL)
                         mutex_unlock(&ctx->uring_lock);
         }
+
+       return ret;
  }
  
  static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
@@ -2160,6 +2173,16 @@ static int io_req_task_work_add(struct io_kiocb *req)
         return ret;
  }
  
+static void io_req_task_work_add_fallback(struct io_kiocb *req,
+                                         void (*cb)(struct callback_head *))
+{
+       struct task_struct *tsk = io_wq_get_task(req->ctx->io_wq);
+
+       init_task_work(&req->task_work, cb);
+       task_work_add(tsk, &req->task_work, TWA_NONE);
+       wake_up_process(tsk);
+}
+
  static void __io_req_task_cancel(struct io_kiocb *req, int error)
  {
         struct io_ring_ctx *ctx = req->ctx;
@@ -2179,7 +2202,9 @@ static void io_req_task_cancel(struct callback_head *cb)
         struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
         struct io_ring_ctx *ctx = req->ctx;
  
+       mutex_lock(&ctx->uring_lock);
         __io_req_task_cancel(req, -ECANCELED);
+       mutex_unlock(&ctx->uring_lock);
         percpu_ref_put(&ctx->refs);
  }
  
@@ -2195,6 +2220,10 @@ static void __io_req_task_submit(struct io_kiocb *req)
         else
                 __io_req_task_cancel(req, -EFAULT);
         mutex_unlock(&ctx->uring_lock);
+
+       ctx->flags &= ~IORING_SETUP_R_DISABLED;
+       if (ctx->flags & IORING_SETUP_SQPOLL)
+               io_sq_thread_drop_mm_files();
  }
  
  static void io_req_task_submit(struct callback_head *cb)
@@ -2214,14 +2243,8 @@ static void io_req_task_queue(struct io_kiocb *req)
         percpu_ref_get(&req->ctx->refs);
  
         ret = io_req_task_work_add(req);
-       if (unlikely(ret)) {
-               struct task_struct *tsk;
-
-               init_task_work(&req->task_work, io_req_task_cancel);
-               tsk = io_wq_get_task(req->ctx->io_wq);
-               task_work_add(tsk, &req->task_work, TWA_NONE);
-               wake_up_process(tsk);
-       }
+       if (unlikely(ret))
+               io_req_task_work_add_fallback(req, io_req_task_cancel);
  }
  
  static inline void io_queue_next(struct io_kiocb *req)
@@ -2270,6 +2293,8 @@ static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
                 struct io_uring_task *tctx = rb->task->io_uring;
  
                 percpu_counter_sub(&tctx->inflight, rb->task_refs);
+               if (atomic_read(&tctx->in_idle))
+                       wake_up(&tctx->wait);
                 put_task_struct_many(rb->task, rb->task_refs);
                 rb->task = NULL;
         }
@@ -2288,6 +2313,8 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req)
                         struct io_uring_task *tctx = rb->task->io_uring;
  
                         percpu_counter_sub(&tctx->inflight, rb->task_refs);
+                       if (atomic_read(&tctx->in_idle))
+                               wake_up(&tctx->wait);
                         put_task_struct_many(rb->task, rb->task_refs);
                 }
                 rb->task = req->task;
@@ -2335,13 +2362,8 @@ static void io_free_req_deferred(struct io_kiocb *req)
  
         init_task_work(&req->task_work, io_put_req_deferred_cb);
         ret = io_req_task_work_add(req);
-       if (unlikely(ret)) {
-               struct task_struct *tsk;
-
-               tsk = io_wq_get_task(req->ctx->io_wq);
-               task_work_add(tsk, &req->task_work, TWA_NONE);
-               wake_up_process(tsk);
-       }
+       if (unlikely(ret))
+               io_req_task_work_add_fallback(req, io_put_req_deferred_cb);
  }
  
  static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
@@ -2350,22 +2372,6 @@ static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
                 io_free_req_deferred(req);
  }
  
-static struct io_wq_work *io_steal_work(struct io_kiocb *req)
-{
-       struct io_kiocb *nxt;
-
-       /*
-        * A ref is owned by io-wq in which context we're. So, if that's the
-        * last one, it's safe to steal next work. False negatives are Ok,
-        * it just will be re-punted async in io_put_work()
-        */
-       if (refcount_read(&req->refs) != 1)
-               return NULL;
-
-       nxt = io_req_find_next(req);
-       return nxt ? &nxt->work : NULL;
-}
-
  static void io_double_put_req(struct io_kiocb *req)
  {
         /* drop both submit and complete references */
@@ -2716,6 +2722,13 @@ static bool io_rw_reissue(struct io_kiocb *req, long res)
                 return false;
         if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker())
                 return false;
+       /*
+        * If ref is dying, we might be running poll reap from the exit work.
+        * Don't attempt to reissue from that path, just let it fail with
+        * -EAGAIN.
+        */
+       if (percpu_ref_is_dying(&req->ctx->refs))
+               return false;
  
         lockdep_assert_held(&req->ctx->uring_lock);
  
@@ -3420,15 +3433,8 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
         /* submit ref gets dropped, acquire a new one */
         refcount_inc(&req->refs);
         ret = io_req_task_work_add(req);
-       if (unlikely(ret)) {
-               struct task_struct *tsk;
-
-               /* queue just for cancelation */
-               init_task_work(&req->task_work, io_req_task_cancel);
-               tsk = io_wq_get_task(req->ctx->io_wq);
-               task_work_add(tsk, &req->task_work, TWA_NONE);
-               wake_up_process(tsk);
-       }
+       if (unlikely(ret))
+               io_req_task_work_add_fallback(req, io_req_task_cancel);
         return 1;
  }
  
@@ -3513,7 +3519,6 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
         else
                 kiocb->ki_flags |= IOCB_NOWAIT;
  
-
         /* If the file doesn't support async, just async punt */
         no_async = force_nonblock && !io_file_supports_async(req->file, READ);
         if (no_async)
@@ -3525,9 +3530,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
  
         ret = io_iter_do_read(req, iter);
  
-       if (!ret) {
-               goto done;
-       } else if (ret == -EIOCBQUEUED) {
+       if (ret == -EIOCBQUEUED) {
                 ret = 0;
                 goto out_free;
         } else if (ret == -EAGAIN) {
@@ -3541,14 +3544,14 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
                 iov_iter_revert(iter, io_size - iov_iter_count(iter));
                 ret = 0;
                 goto copy_iov;
-       } else if (ret < 0) {
+       } else if (ret <= 0) {
                 /* make sure -ERESTARTSYS -> -EINTR is done */
                 goto done;
         }
  
         /* read it all, or we did blocking attempt. no retry. */
         if (!iov_iter_count(iter) || !force_nonblock ||
-           (req->file->f_flags & O_NONBLOCK))
+           (req->file->f_flags & O_NONBLOCK) || !(req->flags & REQ_F_ISREG))
                 goto done;
  
         io_size -= ret;
@@ -3585,6 +3588,7 @@ retry:
                 goto out_free;
         } else if (ret > 0 && ret < io_size) {
                 /* we got some bytes, but not all. retry. */
+               kiocb->ki_flags &= ~IOCB_WAITQ;
                 goto retry;
         }
  done:
@@ -4210,6 +4214,7 @@ static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock,
  static int io_provide_buffers_prep(struct io_kiocb *req,
                                    const struct io_uring_sqe *sqe)
  {
+       unsigned long size, tmp_check;
         struct io_provide_buf *p = &req->pbuf;
         u64 tmp;
  
@@ -4223,7 +4228,14 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
         p->addr = READ_ONCE(sqe->addr);
         p->len = READ_ONCE(sqe->len);
  
-       if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs)))
+       if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs,
+                               &size))
+               return -EOVERFLOW;
+       if (check_add_overflow((unsigned long)p->addr, size, &tmp_check))
+               return -EOVERFLOW;
+
+       size = (unsigned long)p->len * p->nbufs;
+       if (!access_ok(u64_to_user_ptr(p->addr), size))
                 return -EFAULT;
  
         p->bgid = READ_ONCE(sqe->buf_group);
@@ -4246,7 +4258,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
                         break;
  
                 buf->addr = addr;
-               buf->len = pbuf->len;
+               buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
                 buf->bid = bid;
                 addr += pbuf->len;
                 bid++;
@@ -4462,14 +4474,6 @@ static int io_statx(struct io_kiocb *req, bool force_nonblock)
  
  static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  {
-       /*
-        * If we queue this for async, it must not be cancellable. That would
-        * leave the 'file' in an undeterminate state, and here need to modify
-        * io_wq_work.flags, so initialize io_wq_work firstly.
-        */
-       io_req_init_async(req);
-       req->work.flags |= IO_WQ_WORK_NO_CANCEL;
-
         if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
                 return -EINVAL;
         if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
@@ -4479,41 +4483,59 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                 return -EBADF;
  
         req->close.fd = READ_ONCE(sqe->fd);
-       if ((req->file && req->file->f_op == &io_uring_fops))
-               return -EBADF;
-
-       req->close.put_file = NULL;
         return 0;
  }
  
  static int io_close(struct io_kiocb *req, bool force_nonblock,
                     struct io_comp_state *cs)
  {
+       struct files_struct *files = current->files;
         struct io_close *close = &req->close;
+       struct fdtable *fdt;
+       struct file *file;
         int ret;
  
-       /* might be already done during nonblock submission */
-       if (!close->put_file) {
-               ret = close_fd_get_file(close->fd, &close->put_file);
-               if (ret < 0)
-                       return (ret == -ENOENT) ? -EBADF : ret;
+       file = NULL;
+       ret = -EBADF;
+       spin_lock(&files->file_lock);
+       fdt = files_fdtable(files);
+       if (close->fd >= fdt->max_fds) {
+               spin_unlock(&files->file_lock);
+               goto err;
+       }
+       file = fdt->fd[close->fd];
+       if (!file) {
+               spin_unlock(&files->file_lock);
+               goto err;
+       }
+
+       if (file->f_op == &io_uring_fops) {
+               spin_unlock(&files->file_lock);
+               file = NULL;
+               goto err;
         }
  
         /* if the file has a flush method, be safe and punt to async */
-       if (close->put_file->f_op->flush && force_nonblock) {
-               /* was never set, but play safe */
-               req->flags &= ~REQ_F_NOWAIT;
-               /* avoid grabbing files - we don't need the files */
-               req->flags |= REQ_F_NO_FILE_TABLE;
+       if (file->f_op->flush && force_nonblock) {
+               spin_unlock(&files->file_lock);
                 return -EAGAIN;
         }
  
+       ret = __close_fd_get_file(close->fd, &file);
+       spin_unlock(&files->file_lock);
+       if (ret < 0) {
+               if (ret == -ENOENT)
+                       ret = -EBADF;
+               goto err;
+       }
+
         /* No ->flush() or already async, safely close from here */
-       ret = filp_close(close->put_file, req->work.identity->files);
+       ret = filp_close(file, current->files);
+err:
         if (ret < 0)
                 req_set_fail_links(req);
-       fput(close->put_file);
-       close->put_file = NULL;
+       if (file)
+               fput(file);
         __io_req_complete(req, ret, 0, cs);
         return 0;
  }
@@ -4612,6 +4634,7 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock,
         struct io_async_msghdr iomsg, *kmsg;
         struct socket *sock;
         unsigned flags;
+       int min_ret = 0;
         int ret;
  
         sock = sock_from_file(req->file);
@@ -4632,12 +4655,15 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock,
                 kmsg = &iomsg;
         }
  
-       flags = req->sr_msg.msg_flags;
+       flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
         if (flags & MSG_DONTWAIT)
                 req->flags |= REQ_F_NOWAIT;
         else if (force_nonblock)
                 flags |= MSG_DONTWAIT;
  
+       if (flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
         ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
         if (force_nonblock && ret == -EAGAIN)
                 return io_setup_async_msg(req, kmsg);
@@ -4647,7 +4673,7 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock,
         if (kmsg->iov != kmsg->fast_iov)
                 kfree(kmsg->iov);
         req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret < 0)
+       if (ret < min_ret)
                 req_set_fail_links(req);
         __io_req_complete(req, ret, 0, cs);
         return 0;
@@ -4661,6 +4687,7 @@ static int io_send(struct io_kiocb *req, bool force_nonblock,
         struct iovec iov;
         struct socket *sock;
         unsigned flags;
+       int min_ret = 0;
         int ret;
  
         sock = sock_from_file(req->file);
@@ -4676,12 +4703,15 @@ static int io_send(struct io_kiocb *req, bool force_nonblock,
         msg.msg_controllen = 0;
         msg.msg_namelen = 0;
  
-       flags = req->sr_msg.msg_flags;
+       flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
         if (flags & MSG_DONTWAIT)
                 req->flags |= REQ_F_NOWAIT;
         else if (force_nonblock)
                 flags |= MSG_DONTWAIT;
  
+       if (flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&msg.msg_iter);
+
         msg.msg_flags = flags;
         ret = sock_sendmsg(sock, &msg);
         if (force_nonblock && ret == -EAGAIN)
@@ -4689,7 +4719,7 @@ static int io_send(struct io_kiocb *req, bool force_nonblock,
         if (ret == -ERESTARTSYS)
                 ret = -EINTR;
  
-       if (ret < 0)
+       if (ret < min_ret)
                 req_set_fail_links(req);
         __io_req_complete(req, ret, 0, cs);
         return 0;
@@ -4841,6 +4871,7 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock,
         struct socket *sock;
         struct io_buffer *kbuf;
         unsigned flags;
+       int min_ret = 0;
         int ret, cflags = 0;
  
         sock = sock_from_file(req->file);
@@ -4870,12 +4901,15 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock,
                                 1, req->sr_msg.len);
         }
  
-       flags = req->sr_msg.msg_flags;
+       flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
         if (flags & MSG_DONTWAIT)
                 req->flags |= REQ_F_NOWAIT;
         else if (force_nonblock)
                 flags |= MSG_DONTWAIT;
  
+       if (flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
         ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
                                         kmsg->uaddr, flags);
         if (force_nonblock && ret == -EAGAIN)
@@ -4888,7 +4922,7 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock,
         if (kmsg->iov != kmsg->fast_iov)
                 kfree(kmsg->iov);
         req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret < 0)
+       if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
                 req_set_fail_links(req);
         __io_req_complete(req, ret, cflags, cs);
         return 0;
@@ -4904,6 +4938,7 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock,
         struct socket *sock;
         struct iovec iov;
         unsigned flags;
+       int min_ret = 0;
         int ret, cflags = 0;
  
         sock = sock_from_file(req->file);
@@ -4928,12 +4963,15 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock,
         msg.msg_iocb = NULL;
         msg.msg_flags = 0;
  
-       flags = req->sr_msg.msg_flags;
+       flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
         if (flags & MSG_DONTWAIT)
                 req->flags |= REQ_F_NOWAIT;
         else if (force_nonblock)
                 flags |= MSG_DONTWAIT;
  
+       if (flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&msg.msg_iter);
+
         ret = sock_recvmsg(sock, &msg, flags);
         if (force_nonblock && ret == -EAGAIN)
                 return -EAGAIN;
@@ -4942,7 +4980,7 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock,
  out_free:
         if (req->flags & REQ_F_BUFFER_SELECTED)
                 cflags = io_put_recv_kbuf(req);
-       if (ret < 0)
+       if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
                 req_set_fail_links(req);
         __io_req_complete(req, ret, cflags, cs);
         return 0;
@@ -5139,12 +5177,8 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
          */
         ret = io_req_task_work_add(req);
         if (unlikely(ret)) {
-               struct task_struct *tsk;
-
                 WRITE_ONCE(poll->canceled, true);
-               tsk = io_wq_get_task(req->ctx->io_wq);
-               task_work_add(tsk, &req->task_work, TWA_NONE);
-               wake_up_process(tsk);
+               io_req_task_work_add_fallback(req, func);
         }
         return 1;
  }
@@ -5296,6 +5330,9 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
                         pt->error = -EINVAL;
                         return;
                 }
+               /* double add on the same waitqueue head, ignore */
+               if (poll->head == head)
+                       return;
                 poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
                 if (!poll) {
                         pt->error = -ENOMEM;
@@ -6157,8 +6194,10 @@ static void io_req_drop_files(struct io_kiocb *req)
         struct io_uring_task *tctx = req->task->io_uring;
         unsigned long flags;
  
-       put_files_struct(req->work.identity->files);
-       put_nsproxy(req->work.identity->nsproxy);
+       if (req->work.flags & IO_WQ_WORK_FILES) {
+               put_files_struct(req->work.identity->files);
+               put_nsproxy(req->work.identity->nsproxy);
+       }
         spin_lock_irqsave(&ctx->inflight_lock, flags);
         list_del(&req->inflight_entry);
         spin_unlock_irqrestore(&ctx->inflight_lock, flags);
@@ -6225,9 +6264,6 @@ static void __io_clean_op(struct io_kiocb *req)
                 }
                 req->flags &= ~REQ_F_NEED_CLEANUP;
         }
-
-       if (req->flags & REQ_F_INFLIGHT)
-               io_req_drop_files(req);
  }
  
  static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
@@ -6362,7 +6398,7 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
         return 0;
  }
  
-static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work)
+static void io_wq_submit_work(struct io_wq_work *work)
  {
         struct io_kiocb *req = container_of(work, struct io_kiocb, work);
         struct io_kiocb *timeout;
@@ -6372,10 +6408,12 @@ static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work)
         if (timeout)
                 io_queue_linked_timeout(timeout);
  
-       /* if NO_CANCEL is set, we must still run the work */
-       if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) ==
-                               IO_WQ_WORK_CANCEL) {
-               ret = -ECANCELED;
+       if (work->flags & IO_WQ_WORK_CANCEL) {
+               /* io-wq is going to take down one */
+               refcount_inc(&req->refs);
+               percpu_ref_get(&req->ctx->refs);
+               io_req_task_work_add_fallback(req, io_req_task_cancel);
+               return;
         }
  
         if (!ret) {
@@ -6416,8 +6454,6 @@ static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work)
                 if (lock_ctx)
                         mutex_unlock(&lock_ctx->uring_lock);
         }
-
-       return io_steal_work(req);
  }
  
  static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
@@ -6446,6 +6482,16 @@ static struct file *io_file_get(struct io_submit_state *state,
                 file = __io_file_get(state, fd);
         }
  
+       if (file && file->f_op == &io_uring_fops &&
+           !(req->flags & REQ_F_INFLIGHT)) {
+               io_req_init_async(req);
+               req->flags |= REQ_F_INFLIGHT;
+
+               spin_lock_irq(&ctx->inflight_lock);
+               list_add(&req->inflight_entry, &ctx->inflight_list);
+               spin_unlock_irq(&ctx->inflight_lock);
+       }
+
         return file;
  }
  
@@ -6472,11 +6518,11 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
         spin_unlock_irqrestore(&ctx->completion_lock, flags);
  
         if (prev) {
-               req_set_fail_links(prev);
                 io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
-               io_put_req(prev);
+               io_put_req_deferred(prev, 1);
         } else {
-               io_req_complete(req, -ETIME);
+               io_cqring_add_event(req, -ETIME, 0);
+               io_put_req_deferred(req, 1);
         }
         return HRTIMER_NORESTART;
  }
@@ -7188,6 +7234,25 @@ static int io_run_task_work_sig(void)
         return -EINTR;
  }
  
+/* when returns >0, the caller should retry */
+static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
+                                         struct io_wait_queue *iowq,
+                                         signed long *timeout)
+{
+       int ret;
+
+       /* make sure we run task_work before checking for signals */
+       ret = io_run_task_work_sig();
+       if (ret || io_should_wake(iowq))
+               return ret;
+       /* let the caller flush overflows, retry */
+       if (test_bit(0, &ctx->cq_check_overflow))
+               return 1;
+
+       *timeout = schedule_timeout(*timeout);
+       return !*timeout ? -ETIME : 1;
+}
+
  /*
   * Wait until events become available, if we don't already have some. The
   * application must reap them itself, as they reside on the shared cq ring.
@@ -7206,9 +7271,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                 .to_wait        = min_events,
         };
         struct io_rings *rings = ctx->rings;
-       struct timespec64 ts;
-       signed long timeout = 0;
-       int ret = 0;
+       signed long timeout = MAX_SCHEDULE_TIMEOUT;
+       int ret;
  
         do {
                 io_cqring_overflow_flush(ctx, false, NULL, NULL);
@@ -7232,6 +7296,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
         }
  
         if (uts) {
+               struct timespec64 ts;
+
                 if (get_timespec64(&ts, uts))
                         return -EFAULT;
                 timeout = timespec64_to_jiffies(&ts);
@@ -7240,30 +7306,17 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
         iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
         trace_io_uring_cqring_wait(ctx, min_events);
         do {
-               io_cqring_overflow_flush(ctx, false, NULL, NULL);
-               prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
-                                               TASK_INTERRUPTIBLE);
-               /* make sure we run task_work before checking for signals */
-               ret = io_run_task_work_sig();
-               if (ret > 0)
-                       continue;
-               else if (ret < 0)
-                       break;
-               if (io_should_wake(&iowq))
+               /* if we can't even flush overflow, don't wait for more */
+               if (!io_cqring_overflow_flush(ctx, false, NULL, NULL)) {
+                       ret = -EBUSY;
                         break;
-               if (test_bit(0, &ctx->cq_check_overflow))
-                       continue;
-               if (uts) {
-                       timeout = schedule_timeout(timeout);
-                       if (timeout == 0) {
-                               ret = -ETIME;
-                               break;
-                       }
-               } else {
-                       schedule();
                 }
-       } while (1);
-       finish_wait(&ctx->wait, &iowq.wq);
+               prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
+                                               TASK_INTERRUPTIBLE);
+               ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
+               finish_wait(&ctx->wait, &iowq.wq);
+               cond_resched();
+       } while (ret > 0);
  
         restore_saved_sigmask_unless(ret == -EINTR);
  
@@ -8034,12 +8087,12 @@ static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
         return __io_sqe_files_update(ctx, &up, nr_args);
  }
  
-static void io_free_work(struct io_wq_work *work)
+static struct io_wq_work *io_free_work(struct io_wq_work *work)
  {
         struct io_kiocb *req = container_of(work, struct io_kiocb, work);
  
-       /* Consider that io_steal_work() relies on this ref */
-       io_put_req(req);
+       req = io_put_req_find_next(req);
+       return req ? &req->work : NULL;
  }
  
  static int io_init_wq_offload(struct io_ring_ctx *ctx,
@@ -8690,8 +8743,29 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
         smp_rmb();
         if (!io_sqring_full(ctx))
                 mask |= EPOLLOUT | EPOLLWRNORM;
-       io_cqring_overflow_flush(ctx, false, NULL, NULL);
-       if (io_cqring_events(ctx))
+
+       /* prevent SQPOLL from submitting new requests */
+       if (ctx->sq_data) {
+               io_sq_thread_park(ctx->sq_data);
+               list_del_init(&ctx->sqd_list);
+               io_sqd_update_thread_idle(ctx->sq_data);
+               io_sq_thread_unpark(ctx->sq_data);
+       }
+
+       /*
+        * Don't flush cqring overflow list here, just do a simple check.
+        * Otherwise there could possible be ABBA deadlock:
+        *      CPU0                    CPU1
+        *      ----                    ----
+        * lock(&ctx->uring_lock);
+        *                              lock(&ep->mtx);
+        *                              lock(&ctx->uring_lock);
+        * lock(&ep->mtx);
+        *
+        * Users may get EPOLLIN meanwhile seeing nothing in cqring, this
+        * pushs them to do the flush.
+        */
+       if (io_cqring_events(ctx) || test_bit(0, &ctx->cq_check_overflow))
                 mask |= EPOLLIN | EPOLLRDNORM;
  
         return mask;
@@ -8730,7 +8804,7 @@ static void io_ring_exit_work(struct work_struct *work)
          * as nobody else will be looking for them.
          */
         do {
-               __io_uring_cancel_task_requests(ctx, NULL);
+               io_uring_try_cancel_requests(ctx, NULL, NULL);
         } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
         io_ring_ctx_free(ctx);
  }
@@ -8818,11 +8892,11 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
         return ret;
  }
  
-static void io_cancel_defer_files(struct io_ring_ctx *ctx,
+static bool io_cancel_defer_files(struct io_ring_ctx *ctx,
                                   struct task_struct *task,
                                   struct files_struct *files)
  {
-       struct io_defer_entry *de = NULL;
+       struct io_defer_entry *de;
         LIST_HEAD(list);
  
         spin_lock_irq(&ctx->completion_lock);
@@ -8833,6 +8907,8 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
                 }
         }
         spin_unlock_irq(&ctx->completion_lock);
+       if (list_empty(&list))
+               return false;
  
         while (!list_empty(&list)) {
                 de = list_first_entry(&list, struct io_defer_entry, list);
@@ -8842,50 +8918,16 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
                 io_req_complete(de->req, -ECANCELED);
                 kfree(de);
         }
+       return true;
  }
  
-static void io_uring_cancel_files(struct io_ring_ctx *ctx,
-                                 struct task_struct *task,
-                                 struct files_struct *files)
+static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
+                                        struct task_struct *task,
+                                        struct files_struct *files)
  {
-       while (!list_empty_careful(&ctx->inflight_list)) {
-               struct io_task_cancel cancel = { .task = task, .files = files };
-               struct io_kiocb *req;
-               DEFINE_WAIT(wait);
-               bool found = false;
-
-               spin_lock_irq(&ctx->inflight_lock);
-               list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
-                       if (req->task != task ||
-                           req->work.identity->files != files)
-                               continue;
-                       found = true;
-                       break;
-               }
-               if (found)
-                       prepare_to_wait(&task->io_uring->wait, &wait,
-                                       TASK_UNINTERRUPTIBLE);
-               spin_unlock_irq(&ctx->inflight_lock);
-
-               /* We need to keep going until we don't find a matching req */
-               if (!found)
-                       break;
+       struct io_task_cancel cancel = { .task = task, .files = files, };
  
-               io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true);
-               io_poll_remove_all(ctx, task, files);
-               io_kill_timeouts(ctx, task, files);
-               /* cancellations _may_ trigger task work */
-               io_run_task_work();
-               schedule();
-               finish_wait(&task->io_uring->wait, &wait);
-       }
-}
-
-static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
-                                           struct task_struct *task)
-{
         while (1) {
-               struct io_task_cancel cancel = { .task = task, .files = NULL, };
                 enum io_wq_cancel cret;
                 bool ret = false;
  
@@ -8896,28 +8938,71 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
                 }
  
                 /* SQPOLL thread does its own polling */
-               if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
+               if ((!(ctx->flags & IORING_SETUP_SQPOLL) && !files) ||
+                   (ctx->sq_data && ctx->sq_data->thread == current)) {
                         while (!list_empty_careful(&ctx->iopoll_list)) {
                                 io_iopoll_try_reap_events(ctx);
                                 ret = true;
                         }
                 }
  
-               ret |= io_poll_remove_all(ctx, task, NULL);
-               ret |= io_kill_timeouts(ctx, task, NULL);
+               ret |= io_cancel_defer_files(ctx, task, files);
+               ret |= io_poll_remove_all(ctx, task, files);
+               ret |= io_kill_timeouts(ctx, task, files);
                 ret |= io_run_task_work();
+               io_cqring_overflow_flush(ctx, true, task, files);
                 if (!ret)
                         break;
                 cond_resched();
         }
  }
  
-static void io_disable_sqo_submit(struct io_ring_ctx *ctx)
+static int io_uring_count_inflight(struct io_ring_ctx *ctx,
+                                  struct task_struct *task,
+                                  struct files_struct *files)
  {
-       WARN_ON_ONCE(ctx->sqo_task != current);
+       struct io_kiocb *req;
+       int cnt = 0;
+
+       spin_lock_irq(&ctx->inflight_lock);
+       list_for_each_entry(req, &ctx->inflight_list, inflight_entry)
+               cnt += io_match_task(req, task, files);
+       spin_unlock_irq(&ctx->inflight_lock);
+       return cnt;
+}
+
+static void io_uring_cancel_files(struct io_ring_ctx *ctx,
+                                 struct task_struct *task,
+                                 struct files_struct *files)
+{
+       while (!list_empty_careful(&ctx->inflight_list)) {
+               DEFINE_WAIT(wait);
+               int inflight;
+
+               inflight = io_uring_count_inflight(ctx, task, files);
+               if (!inflight)
+                       break;
+
+               io_uring_try_cancel_requests(ctx, task, files);
+
+               if (ctx->sq_data)
+                       io_sq_thread_unpark(ctx->sq_data);
+               prepare_to_wait(&task->io_uring->wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+               if (inflight == io_uring_count_inflight(ctx, task, files))
+                       schedule();
+               finish_wait(&task->io_uring->wait, &wait);
+               if (ctx->sq_data)
+                       io_sq_thread_park(ctx->sq_data);
+       }
+}
  
+static void io_disable_sqo_submit(struct io_ring_ctx *ctx)
+{
         mutex_lock(&ctx->uring_lock);
         ctx->sqo_dead = 1;
+       if (ctx->flags & IORING_SETUP_R_DISABLED)
+               io_sq_offload_start(ctx);
         mutex_unlock(&ctx->uring_lock);
  
         /* make sure callers enter the ring to get error */
@@ -8936,29 +9021,18 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
         struct task_struct *task = current;
  
         if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
-               /* for SQPOLL only sqo_task has task notes */
                 io_disable_sqo_submit(ctx);
                 task = ctx->sq_data->thread;
                 atomic_inc(&task->io_uring->in_idle);
                 io_sq_thread_park(ctx->sq_data);
         }
  
-       io_cancel_defer_files(ctx, task, files);
-       io_cqring_overflow_flush(ctx, true, task, files);
-
+       io_uring_cancel_files(ctx, task, files);
         if (!files)
-               __io_uring_cancel_task_requests(ctx, task);
-       else
-               io_uring_cancel_files(ctx, task, files);
+               io_uring_try_cancel_requests(ctx, task, NULL);
  
         if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
                 atomic_dec(&task->io_uring->in_idle);
-               /*
-                * If the files that are going away are the ones in the thread
-                * identity, clear them out.
-                */
-               if (task->io_uring->identity->files == files)
-                       task->io_uring->identity->files = NULL;
                 io_sq_thread_unpark(ctx->sq_data);
         }
  }
@@ -9082,6 +9156,10 @@ void __io_uring_task_cancel(void)
         /* make sure overflow events are dropped */
         atomic_inc(&tctx->in_idle);
  
+       /* trigger io_disable_sqo_submit() */
+       if (tctx->sqpoll)
+               __io_uring_files_cancel(NULL);
+
         do {
                 /* read completions before cancelations */
                 inflight = tctx_inflight(tctx);
@@ -9092,16 +9170,15 @@ void __io_uring_task_cancel(void)
                 prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
  
                 /*
-                * If we've seen completions, retry. This avoids a race where
-                * a completion comes in before we did prepare_to_wait().
+                * If we've seen completions, retry without waiting. This
+                * avoids a race where a completion comes in before we did
+                * prepare_to_wait().
                  */
-               if (inflight != tctx_inflight(tctx))
-                       continue;
-               schedule();
+               if (inflight == tctx_inflight(tctx))
+                       schedule();
                 finish_wait(&tctx->wait, &wait);
         } while (1);
  
-       finish_wait(&tctx->wait, &wait);
         atomic_dec(&tctx->in_idle);
  
         io_uring_remove_task_files(tctx);
@@ -9112,6 +9189,9 @@ static int io_uring_flush(struct file *file, void *data)
         struct io_uring_task *tctx = current->io_uring;
         struct io_ring_ctx *ctx = file->private_data;
  
+       if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
+               io_uring_cancel_task_requests(ctx, NULL);
+
         if (!tctx)
                 return 0;
  
@@ -9128,7 +9208,10 @@ static int io_uring_flush(struct file *file, void *data)
  
         if (ctx->flags & IORING_SETUP_SQPOLL) {
                 /* there is only one file note, which is owned by sqo_task */
-               WARN_ON_ONCE((ctx->sqo_task == current) ==
+               WARN_ON_ONCE(ctx->sqo_task != current &&
+                            xa_load(&tctx->xa, (unsigned long)file));
+               /* sqo_dead check is for when this happens after cancellation */
+               WARN_ON_ONCE(ctx->sqo_task == current && !ctx->sqo_dead &&
                              !xa_load(&tctx->xa, (unsigned long)file));
  
                 io_disable_sqo_submit(ctx);
@@ -9933,10 +10016,7 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx)
         if (ctx->restrictions.registered)
                 ctx->restricted = 1;
  
-       ctx->flags &= ~IORING_SETUP_R_DISABLED;
-
         io_sq_offload_start(ctx);
-
         return 0;
  }