struct io_buffer {
struct list_head list;
__u64 addr;
- __s32 len;
+ __u32 len;
__u16 bid;
};
struct io_close {
struct file *file;
- struct file *put_file;
int fd;
};
struct io_provide_buf {
struct file *file;
__u64 addr;
- __s32 len;
+ __u32 len;
__u32 bgid;
__u16 nbufs;
__u16 bid;
struct io_completion {
struct file *file;
struct list_head list;
- int cflags;
+ u32 cflags;
};
struct io_async_connect {
.pollout = 1,
.needs_async_data = 1,
.async_size = sizeof(struct io_async_msghdr),
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
+ IO_WQ_WORK_FS,
},
[IORING_OP_RECVMSG] = {
.needs_file = 1,
.buffer_select = 1,
.needs_async_data = 1,
.async_size = sizeof(struct io_async_msghdr),
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
+ .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
+ IO_WQ_WORK_FS,
},
[IORING_OP_TIMEOUT] = {
.needs_async_data = 1,
IO_WQ_WORK_FS | IO_WQ_WORK_MM,
},
[IORING_OP_CLOSE] = {
- .needs_file = 1,
- .needs_file_no_error = 1,
.work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG,
},
[IORING_OP_FILES_UPDATE] = {
ACCT_PINNED,
};
-static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
- struct task_struct *task);
-
+static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
+ struct task_struct *task,
+ struct files_struct *files);
static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node);
static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
struct io_ring_ctx *ctx);
if (req->flags & REQ_F_ISREG) {
if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
io_wq_hash_work(&req->work, file_inode(req->file));
- } else {
+ } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
if (def->unbound_nonreg_file)
req->work.flags |= IO_WQ_WORK_UNBOUND;
}
io_queue_linked_timeout(link);
}
-static void io_kill_timeout(struct io_kiocb *req)
+static void io_kill_timeout(struct io_kiocb *req, int status)
{
struct io_timeout_data *io = req->async_data;
int ret;
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list);
- io_cqring_fill_event(req, 0);
+ io_cqring_fill_event(req, status);
io_put_req_deferred(req, 1);
}
}
spin_lock_irq(&ctx->completion_lock);
list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
if (io_match_task(req, tsk, files)) {
- io_kill_timeout(req);
+ io_kill_timeout(req, -ECANCELED);
canceled++;
}
}
break;
list_del_init(&req->timeout.list);
- io_kill_timeout(req);
+ io_kill_timeout(req, 0);
} while (!list_empty(&ctx->timeout_list));
ctx->cq_last_tm_flush = seq;
return all_flushed;
}
-static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
+static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
struct task_struct *tsk,
struct files_struct *files)
{
+ bool ret = true;
+
if (test_bit(0, &ctx->cq_check_overflow)) {
/* iopoll syncs against uring_lock, not completion_lock */
if (ctx->flags & IORING_SETUP_IOPOLL)
mutex_lock(&ctx->uring_lock);
- __io_cqring_overflow_flush(ctx, force, tsk, files);
+ ret = __io_cqring_overflow_flush(ctx, force, tsk, files);
if (ctx->flags & IORING_SETUP_IOPOLL)
mutex_unlock(&ctx->uring_lock);
}
+
+ return ret;
}
-static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
+static void __io_cqring_fill_event(struct io_kiocb *req, long res,
+ unsigned int cflags)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_uring_cqe *cqe;
return ret;
}
+static void io_req_task_work_add_fallback(struct io_kiocb *req,
+ void (*cb)(struct callback_head *))
+{
+ struct task_struct *tsk = io_wq_get_task(req->ctx->io_wq);
+
+ init_task_work(&req->task_work, cb);
+ task_work_add(tsk, &req->task_work, TWA_NONE);
+ wake_up_process(tsk);
+}
+
static void __io_req_task_cancel(struct io_kiocb *req, int error)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
struct io_ring_ctx *ctx = req->ctx;
+ mutex_lock(&ctx->uring_lock);
__io_req_task_cancel(req, -ECANCELED);
+ mutex_unlock(&ctx->uring_lock);
percpu_ref_put(&ctx->refs);
}
else
__io_req_task_cancel(req, -EFAULT);
mutex_unlock(&ctx->uring_lock);
+
+ ctx->flags &= ~IORING_SETUP_R_DISABLED;
+ if (ctx->flags & IORING_SETUP_SQPOLL)
+ io_sq_thread_drop_mm_files();
}
static void io_req_task_submit(struct callback_head *cb)
percpu_ref_get(&req->ctx->refs);
ret = io_req_task_work_add(req);
- if (unlikely(ret)) {
- struct task_struct *tsk;
-
- init_task_work(&req->task_work, io_req_task_cancel);
- tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, TWA_NONE);
- wake_up_process(tsk);
- }
+ if (unlikely(ret))
+ io_req_task_work_add_fallback(req, io_req_task_cancel);
}
static inline void io_queue_next(struct io_kiocb *req)
init_task_work(&req->task_work, io_put_req_deferred_cb);
ret = io_req_task_work_add(req);
- if (unlikely(ret)) {
- struct task_struct *tsk;
-
- tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, TWA_NONE);
- wake_up_process(tsk);
- }
+ if (unlikely(ret))
+ io_req_task_work_add_fallback(req, io_put_req_deferred_cb);
}
static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
io_free_req_deferred(req);
}
-static struct io_wq_work *io_steal_work(struct io_kiocb *req)
-{
- struct io_kiocb *nxt;
-
- /*
- * A ref is owned by io-wq in which context we're. So, if that's the
- * last one, it's safe to steal next work. False negatives are Ok,
- * it just will be re-punted async in io_put_work()
- */
- if (refcount_read(&req->refs) != 1)
- return NULL;
-
- nxt = io_req_find_next(req);
- return nxt ? &nxt->work : NULL;
-}
-
static void io_double_put_req(struct io_kiocb *req)
{
/* drop both submit and complete references */
return false;
if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker())
return false;
+ /*
+ * If ref is dying, we might be running poll reap from the exit work.
+ * Don't attempt to reissue from that path, just let it fail with
+ * -EAGAIN.
+ */
+ if (percpu_ref_is_dying(&req->ctx->refs))
+ return false;
lockdep_assert_held(&req->ctx->uring_lock);
/* submit ref gets dropped, acquire a new one */
refcount_inc(&req->refs);
ret = io_req_task_work_add(req);
- if (unlikely(ret)) {
- struct task_struct *tsk;
-
- /* queue just for cancelation */
- init_task_work(&req->task_work, io_req_task_cancel);
- tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, TWA_NONE);
- wake_up_process(tsk);
- }
+ if (unlikely(ret))
+ io_req_task_work_add_fallback(req, io_req_task_cancel);
return 1;
}
else
kiocb->ki_flags |= IOCB_NOWAIT;
-
/* If the file doesn't support async, just async punt */
no_async = force_nonblock && !io_file_supports_async(req->file, READ);
if (no_async)
ret = io_iter_do_read(req, iter);
- if (!ret) {
- goto done;
- } else if (ret == -EIOCBQUEUED) {
+ if (ret == -EIOCBQUEUED) {
ret = 0;
goto out_free;
} else if (ret == -EAGAIN) {
iov_iter_revert(iter, io_size - iov_iter_count(iter));
ret = 0;
goto copy_iov;
- } else if (ret < 0) {
+ } else if (ret <= 0) {
/* make sure -ERESTARTSYS -> -EINTR is done */
goto done;
}
goto out_free;
} else if (ret > 0 && ret < io_size) {
/* we got some bytes, but not all. retry. */
+ kiocb->ki_flags &= ~IOCB_WAITQ;
goto retry;
}
done:
static int io_provide_buffers_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
+ unsigned long size, tmp_check;
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
p->addr = READ_ONCE(sqe->addr);
p->len = READ_ONCE(sqe->len);
- if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs)))
+ if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs,
+ &size))
+ return -EOVERFLOW;
+ if (check_add_overflow((unsigned long)p->addr, size, &tmp_check))
+ return -EOVERFLOW;
+
+ size = (unsigned long)p->len * p->nbufs;
+ if (!access_ok(u64_to_user_ptr(p->addr), size))
return -EFAULT;
p->bgid = READ_ONCE(sqe->buf_group);
break;
buf->addr = addr;
- buf->len = pbuf->len;
+ buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
buf->bid = bid;
addr += pbuf->len;
bid++;
static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- /*
- * If we queue this for async, it must not be cancellable. That would
- * leave the 'file' in an undeterminate state, and here need to modify
- * io_wq_work.flags, so initialize io_wq_work firstly.
- */
- io_req_init_async(req);
-
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
return -EBADF;
req->close.fd = READ_ONCE(sqe->fd);
- if ((req->file && req->file->f_op == &io_uring_fops))
- return -EBADF;
-
- req->close.put_file = NULL;
return 0;
}
static int io_close(struct io_kiocb *req, bool force_nonblock,
struct io_comp_state *cs)
{
+ struct files_struct *files = current->files;
struct io_close *close = &req->close;
+ struct fdtable *fdt;
+ struct file *file;
int ret;
- /* might be already done during nonblock submission */
- if (!close->put_file) {
- ret = close_fd_get_file(close->fd, &close->put_file);
- if (ret < 0)
- return (ret == -ENOENT) ? -EBADF : ret;
+ file = NULL;
+ ret = -EBADF;
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ if (close->fd >= fdt->max_fds) {
+ spin_unlock(&files->file_lock);
+ goto err;
+ }
+ file = fdt->fd[close->fd];
+ if (!file) {
+ spin_unlock(&files->file_lock);
+ goto err;
+ }
+
+ if (file->f_op == &io_uring_fops) {
+ spin_unlock(&files->file_lock);
+ file = NULL;
+ goto err;
}
/* if the file has a flush method, be safe and punt to async */
- if (close->put_file->f_op->flush && force_nonblock) {
- /* not safe to cancel at this point */
- req->work.flags |= IO_WQ_WORK_NO_CANCEL;
- /* was never set, but play safe */
- req->flags &= ~REQ_F_NOWAIT;
- /* avoid grabbing files - we don't need the files */
- req->flags |= REQ_F_NO_FILE_TABLE;
+ if (file->f_op->flush && force_nonblock) {
+ spin_unlock(&files->file_lock);
return -EAGAIN;
}
+ ret = __close_fd_get_file(close->fd, &file);
+ spin_unlock(&files->file_lock);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ ret = -EBADF;
+ goto err;
+ }
+
/* No ->flush() or already async, safely close from here */
- ret = filp_close(close->put_file, req->work.identity->files);
+ ret = filp_close(file, current->files);
+err:
if (ret < 0)
req_set_fail_links(req);
- fput(close->put_file);
- close->put_file = NULL;
+ if (file)
+ fput(file);
__io_req_complete(req, ret, 0, cs);
return 0;
}
struct io_async_msghdr iomsg, *kmsg;
struct socket *sock;
unsigned flags;
+ int min_ret = 0;
int ret;
sock = sock_from_file(req->file);
kmsg = &iomsg;
}
- flags = req->sr_msg.msg_flags;
+ flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
else if (force_nonblock)
flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
if (force_nonblock && ret == -EAGAIN)
return io_setup_async_msg(req, kmsg);
if (kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov);
req->flags &= ~REQ_F_NEED_CLEANUP;
- if (ret < 0)
+ if (ret < min_ret)
req_set_fail_links(req);
__io_req_complete(req, ret, 0, cs);
return 0;
struct iovec iov;
struct socket *sock;
unsigned flags;
+ int min_ret = 0;
int ret;
sock = sock_from_file(req->file);
msg.msg_controllen = 0;
msg.msg_namelen = 0;
- flags = req->sr_msg.msg_flags;
+ flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
else if (force_nonblock)
flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&msg.msg_iter);
+
msg.msg_flags = flags;
ret = sock_sendmsg(sock, &msg);
if (force_nonblock && ret == -EAGAIN)
if (ret == -ERESTARTSYS)
ret = -EINTR;
- if (ret < 0)
+ if (ret < min_ret)
req_set_fail_links(req);
__io_req_complete(req, ret, 0, cs);
return 0;
struct socket *sock;
struct io_buffer *kbuf;
unsigned flags;
+ int min_ret = 0;
int ret, cflags = 0;
sock = sock_from_file(req->file);
1, req->sr_msg.len);
}
- flags = req->sr_msg.msg_flags;
+ flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
else if (force_nonblock)
flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
kmsg->uaddr, flags);
if (force_nonblock && ret == -EAGAIN)
if (kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov);
req->flags &= ~REQ_F_NEED_CLEANUP;
- if (ret < 0)
+ if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
req_set_fail_links(req);
__io_req_complete(req, ret, cflags, cs);
return 0;
struct socket *sock;
struct iovec iov;
unsigned flags;
+ int min_ret = 0;
int ret, cflags = 0;
sock = sock_from_file(req->file);
msg.msg_iocb = NULL;
msg.msg_flags = 0;
- flags = req->sr_msg.msg_flags;
+ flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
else if (force_nonblock)
flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&msg.msg_iter);
+
ret = sock_recvmsg(sock, &msg, flags);
if (force_nonblock && ret == -EAGAIN)
return -EAGAIN;
out_free:
if (req->flags & REQ_F_BUFFER_SELECTED)
cflags = io_put_recv_kbuf(req);
- if (ret < 0)
+ if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
req_set_fail_links(req);
__io_req_complete(req, ret, cflags, cs);
return 0;
*/
ret = io_req_task_work_add(req);
if (unlikely(ret)) {
- struct task_struct *tsk;
-
WRITE_ONCE(poll->canceled, true);
- tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, TWA_NONE);
- wake_up_process(tsk);
+ io_req_task_work_add_fallback(req, func);
}
return 1;
}
pt->error = -EINVAL;
return;
}
+ /* double add on the same waitqueue head, ignore */
+ if (poll->head == head)
+ return;
poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
if (!poll) {
pt->error = -ENOMEM;
return 0;
}
-static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work)
+static void io_wq_submit_work(struct io_wq_work *work)
{
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
struct io_kiocb *timeout;
if (timeout)
io_queue_linked_timeout(timeout);
- /* if NO_CANCEL is set, we must still run the work */
- if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) ==
- IO_WQ_WORK_CANCEL) {
- ret = -ECANCELED;
+ if (work->flags & IO_WQ_WORK_CANCEL) {
+ /* io-wq is going to take down one */
+ refcount_inc(&req->refs);
+ percpu_ref_get(&req->ctx->refs);
+ io_req_task_work_add_fallback(req, io_req_task_cancel);
+ return;
}
if (!ret) {
if (lock_ctx)
mutex_unlock(&lock_ctx->uring_lock);
}
-
- return io_steal_work(req);
}
static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
* We don't expect the list to be empty, that will only happen if we
* race with the completion of the linked work.
*/
- if (prev && refcount_inc_not_zero(&prev->refs))
+ if (prev) {
io_remove_next_linked(prev);
- else
- prev = NULL;
+ if (!refcount_inc_not_zero(&prev->refs))
+ prev = NULL;
+ }
spin_unlock_irqrestore(&ctx->completion_lock, flags);
if (prev) {
- req_set_fail_links(prev);
io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
- io_put_req(prev);
+ io_put_req_deferred(prev, 1);
+ io_put_req_deferred(req, 1);
} else {
- io_req_complete(req, -ETIME);
+ io_cqring_add_event(req, -ETIME, 0);
+ io_put_req_deferred(req, 1);
}
return HRTIMER_NORESTART;
}
return -EINTR;
}
+/* when returns >0, the caller should retry */
+static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
+ struct io_wait_queue *iowq,
+ signed long *timeout)
+{
+ int ret;
+
+ /* make sure we run task_work before checking for signals */
+ ret = io_run_task_work_sig();
+ if (ret || io_should_wake(iowq))
+ return ret;
+ /* let the caller flush overflows, retry */
+ if (test_bit(0, &ctx->cq_check_overflow))
+ return 1;
+
+ *timeout = schedule_timeout(*timeout);
+ return !*timeout ? -ETIME : 1;
+}
+
/*
* Wait until events become available, if we don't already have some. The
* application must reap them itself, as they reside on the shared cq ring.
.to_wait = min_events,
};
struct io_rings *rings = ctx->rings;
- struct timespec64 ts;
- signed long timeout = 0;
- int ret = 0;
+ signed long timeout = MAX_SCHEDULE_TIMEOUT;
+ int ret;
do {
io_cqring_overflow_flush(ctx, false, NULL, NULL);
}
if (uts) {
+ struct timespec64 ts;
+
if (get_timespec64(&ts, uts))
return -EFAULT;
timeout = timespec64_to_jiffies(&ts);
iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
trace_io_uring_cqring_wait(ctx, min_events);
do {
- io_cqring_overflow_flush(ctx, false, NULL, NULL);
- prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
- TASK_INTERRUPTIBLE);
- /* make sure we run task_work before checking for signals */
- ret = io_run_task_work_sig();
- if (ret > 0) {
- finish_wait(&ctx->wait, &iowq.wq);
- continue;
- }
- else if (ret < 0)
- break;
- if (io_should_wake(&iowq))
+ /* if we can't even flush overflow, don't wait for more */
+ if (!io_cqring_overflow_flush(ctx, false, NULL, NULL)) {
+ ret = -EBUSY;
break;
- if (test_bit(0, &ctx->cq_check_overflow)) {
- finish_wait(&ctx->wait, &iowq.wq);
- continue;
- }
- if (uts) {
- timeout = schedule_timeout(timeout);
- if (timeout == 0) {
- ret = -ETIME;
- break;
- }
- } else {
- schedule();
}
- } while (1);
- finish_wait(&ctx->wait, &iowq.wq);
+ prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
+ TASK_INTERRUPTIBLE);
+ ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
+ finish_wait(&ctx->wait, &iowq.wq);
+ cond_resched();
+ } while (ret > 0);
restore_saved_sigmask_unless(ret == -EINTR);
return __io_sqe_files_update(ctx, &up, nr_args);
}
-static void io_free_work(struct io_wq_work *work)
+static struct io_wq_work *io_free_work(struct io_wq_work *work)
{
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
- /* Consider that io_steal_work() relies on this ref */
- io_put_req(req);
+ req = io_put_req_find_next(req);
+ return req ? &req->work : NULL;
}
static int io_init_wq_offload(struct io_ring_ctx *ctx,
smp_rmb();
if (!io_sqring_full(ctx))
mask |= EPOLLOUT | EPOLLWRNORM;
- io_cqring_overflow_flush(ctx, false, NULL, NULL);
- if (io_cqring_events(ctx))
+
+ /* prevent SQPOLL from submitting new requests */
+ if (ctx->sq_data) {
+ io_sq_thread_park(ctx->sq_data);
+ list_del_init(&ctx->sqd_list);
+ io_sqd_update_thread_idle(ctx->sq_data);
+ io_sq_thread_unpark(ctx->sq_data);
+ }
+
+ /*
+ * Don't flush cqring overflow list here, just do a simple check.
+ * Otherwise there could possible be ABBA deadlock:
+ * CPU0 CPU1
+ * ---- ----
+ * lock(&ctx->uring_lock);
+ * lock(&ep->mtx);
+ * lock(&ctx->uring_lock);
+ * lock(&ep->mtx);
+ *
+ * Users may get EPOLLIN meanwhile seeing nothing in cqring, this
+ * pushs them to do the flush.
+ */
+ if (io_cqring_events(ctx) || test_bit(0, &ctx->cq_check_overflow))
mask |= EPOLLIN | EPOLLRDNORM;
return mask;
* as nobody else will be looking for them.
*/
do {
- __io_uring_cancel_task_requests(ctx, NULL);
+ io_uring_try_cancel_requests(ctx, NULL, NULL);
} while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
io_ring_ctx_free(ctx);
}
return ret;
}
-static void io_cancel_defer_files(struct io_ring_ctx *ctx,
+static bool io_cancel_defer_files(struct io_ring_ctx *ctx,
struct task_struct *task,
struct files_struct *files)
{
- struct io_defer_entry *de = NULL;
+ struct io_defer_entry *de;
LIST_HEAD(list);
spin_lock_irq(&ctx->completion_lock);
}
}
spin_unlock_irq(&ctx->completion_lock);
+ if (list_empty(&list))
+ return false;
while (!list_empty(&list)) {
de = list_first_entry(&list, struct io_defer_entry, list);
io_req_complete(de->req, -ECANCELED);
kfree(de);
}
+ return true;
+}
+
+static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
+ struct task_struct *task,
+ struct files_struct *files)
+{
+ struct io_task_cancel cancel = { .task = task, .files = files, };
+
+ while (1) {
+ enum io_wq_cancel cret;
+ bool ret = false;
+
+ if (ctx->io_wq) {
+ cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb,
+ &cancel, true);
+ ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
+ }
+
+ /* SQPOLL thread does its own polling */
+ if ((!(ctx->flags & IORING_SETUP_SQPOLL) && !files) ||
+ (ctx->sq_data && ctx->sq_data->thread == current)) {
+ while (!list_empty_careful(&ctx->iopoll_list)) {
+ io_iopoll_try_reap_events(ctx);
+ ret = true;
+ }
+ }
+
+ ret |= io_cancel_defer_files(ctx, task, files);
+ ret |= io_poll_remove_all(ctx, task, files);
+ ret |= io_kill_timeouts(ctx, task, files);
+ ret |= io_run_task_work();
+ io_cqring_overflow_flush(ctx, true, task, files);
+ if (!ret)
+ break;
+ cond_resched();
+ }
}
static int io_uring_count_inflight(struct io_ring_ctx *ctx,
struct files_struct *files)
{
while (!list_empty_careful(&ctx->inflight_list)) {
- struct io_task_cancel cancel = { .task = task, .files = files };
DEFINE_WAIT(wait);
int inflight;
if (!inflight)
break;
- io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true);
- io_poll_remove_all(ctx, task, files);
- io_kill_timeouts(ctx, task, files);
- io_cqring_overflow_flush(ctx, true, task, files);
- /* cancellations _may_ trigger task work */
- io_run_task_work();
+ io_uring_try_cancel_requests(ctx, task, files);
+ if (ctx->sq_data)
+ io_sq_thread_unpark(ctx->sq_data);
prepare_to_wait(&task->io_uring->wait, &wait,
TASK_UNINTERRUPTIBLE);
if (inflight == io_uring_count_inflight(ctx, task, files))
schedule();
finish_wait(&task->io_uring->wait, &wait);
- }
-}
-
-static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
- struct task_struct *task)
-{
- while (1) {
- struct io_task_cancel cancel = { .task = task, .files = NULL, };
- enum io_wq_cancel cret;
- bool ret = false;
-
- if (ctx->io_wq) {
- cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb,
- &cancel, true);
- ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
- }
-
- /* SQPOLL thread does its own polling */
- if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
- while (!list_empty_careful(&ctx->iopoll_list)) {
- io_iopoll_try_reap_events(ctx);
- ret = true;
- }
- }
-
- ret |= io_poll_remove_all(ctx, task, NULL);
- ret |= io_kill_timeouts(ctx, task, NULL);
- ret |= io_run_task_work();
- if (!ret)
- break;
- cond_resched();
+ if (ctx->sq_data)
+ io_sq_thread_park(ctx->sq_data);
}
}
{
mutex_lock(&ctx->uring_lock);
ctx->sqo_dead = 1;
+ if (ctx->flags & IORING_SETUP_R_DISABLED)
+ io_sq_offload_start(ctx);
mutex_unlock(&ctx->uring_lock);
/* make sure callers enter the ring to get error */
io_sq_thread_park(ctx->sq_data);
}
- io_cancel_defer_files(ctx, task, files);
- io_cqring_overflow_flush(ctx, true, task, files);
-
io_uring_cancel_files(ctx, task, files);
if (!files)
- __io_uring_cancel_task_requests(ctx, task);
+ io_uring_try_cancel_requests(ctx, task, NULL);
if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
atomic_dec(&task->io_uring->in_idle);
- /*
- * If the files that are going away are the ones in the thread
- * identity, clear them out.
- */
- if (task->io_uring->identity->files == files)
- task->io_uring->identity->files = NULL;
io_sq_thread_unpark(ctx->sq_data);
}
}
if (ctx->restrictions.registered)
ctx->restricted = 1;
- ctx->flags &= ~IORING_SETUP_R_DISABLED;
-
io_sq_offload_start(ctx);
-
return 0;
}