bool cancel_all);
static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
-static bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
- long res, unsigned int cflags);
+static void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags);
+
static void io_put_req(struct io_kiocb *req);
static void io_put_req_deferred(struct io_kiocb *req);
static void io_dismantle_req(struct io_kiocb *req);
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list);
- io_cqring_fill_event(req->ctx, req->user_data, status, 0);
+ io_fill_cqe_req(req, status, 0);
io_put_req_deferred(req);
}
}
}
static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
- long res, unsigned int cflags)
+ s32 res, u32 cflags)
{
struct io_overflow_cqe *ocqe;
return true;
}
-static inline bool __io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
- long res, unsigned int cflags)
+static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
+ s32 res, u32 cflags)
{
struct io_uring_cqe *cqe;
return io_cqring_event_overflow(ctx, user_data, res, cflags);
}
-/* not as hot to bloat with inlining */
-static noinline bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
- long res, unsigned int cflags)
+static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
{
- return __io_cqring_fill_event(ctx, user_data, res, cflags);
+ __io_fill_cqe(req->ctx, req->user_data, res, cflags);
}
-static void io_req_complete_post(struct io_kiocb *req, long res,
- unsigned int cflags)
+static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
+ s32 res, u32 cflags)
+{
+ ctx->cq_extra++;
+ return __io_fill_cqe(ctx, user_data, res, cflags);
+}
+
+static void io_req_complete_post(struct io_kiocb *req, s32 res,
+ u32 cflags)
{
struct io_ring_ctx *ctx = req->ctx;
spin_lock(&ctx->completion_lock);
- __io_cqring_fill_event(ctx, req->user_data, res, cflags);
+ __io_fill_cqe(ctx, req->user_data, res, cflags);
/*
* If we're the last reference to this request, add to our locked
* free_list cache.
return req->flags & IO_REQ_CLEAN_FLAGS;
}
-static void io_req_complete_state(struct io_kiocb *req, long res,
- unsigned int cflags)
+static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
+ u32 cflags)
{
if (io_req_needs_clean(req))
io_clean_op(req);
}
static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags,
- long res, unsigned cflags)
+ s32 res, u32 cflags)
{
if (issue_flags & IO_URING_F_COMPLETE_DEFER)
io_req_complete_state(req, res, cflags);
io_req_complete_post(req, res, cflags);
}
-static inline void io_req_complete(struct io_kiocb *req, long res)
+static inline void io_req_complete(struct io_kiocb *req, s32 res)
{
__io_req_complete(req, 0, res, 0);
}
-static void io_req_complete_failed(struct io_kiocb *req, long res)
+static void io_req_complete_failed(struct io_kiocb *req, s32 res)
{
req_set_fail(req);
io_req_complete_post(req, res, 0);
link->timeout.head = NULL;
if (hrtimer_try_to_cancel(&io->timer) != -1) {
list_del(&link->timeout.list);
- io_cqring_fill_event(link->ctx, link->user_data,
- -ECANCELED, 0);
+ io_fill_cqe_req(link, -ECANCELED, 0);
io_put_req_deferred(link);
return true;
}
link->link = NULL;
trace_io_uring_fail_link(req, link);
- io_cqring_fill_event(link->ctx, link->user_data, res, 0);
+ io_fill_cqe_req(link, res, 0);
io_put_req_deferred(link);
link = nxt;
}
req->flags &= ~REQ_F_ARM_LTIMEOUT;
if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
io_remove_next_linked(req);
- io_cqring_fill_event(link->ctx, link->user_data,
- -ECANCELED, 0);
+ io_fill_cqe_req(link, -ECANCELED, 0);
io_put_req_deferred(link);
posted = true;
}
for (i = 0; i < nr; i++) {
struct io_kiocb *req = state->compl_reqs[i];
- __io_cqring_fill_event(ctx, req->user_data, req->result,
- req->compl.cflags);
+ __io_fill_cqe(ctx, req->user_data, req->result,
+ req->compl.cflags);
}
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
req = list_first_entry(done, struct io_kiocb, inflight_entry);
list_del(&req->inflight_entry);
- __io_cqring_fill_event(ctx, req->user_data, req->result,
- io_put_rw_kbuf(req));
+ io_fill_cqe_req(req, req->result, io_put_rw_kbuf(req));
(*nr_events)++;
if (req_ref_put_and_test(req))
static bool __io_complete_rw_common(struct io_kiocb *req, long res)
{
- if (req->rw.kiocb.ki_flags & IOCB_WRITE)
+ if (req->rw.kiocb.ki_flags & IOCB_WRITE) {
kiocb_end_write(req);
+ fsnotify_modify(req->file);
+ } else {
+ fsnotify_access(req->file);
+ }
if (res != req->result) {
if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
io_rw_should_reissue(req)) {
static void io_req_task_complete(struct io_kiocb *req, bool *locked)
{
unsigned int cflags = io_put_rw_kbuf(req);
- long res = req->result;
+ int res = req->result;
if (*locked) {
struct io_ring_ctx *ctx = req->ctx;
kiocb->ki_complete = io_complete_rw;
}
+ /* used for fixed read/write too - just read unconditionally */
+ req->buf_index = READ_ONCE(sqe->buf_index);
+ req->imu = NULL;
+
if (req->opcode == IORING_OP_READ_FIXED ||
req->opcode == IORING_OP_WRITE_FIXED) {
- req->imu = NULL;
+ struct io_ring_ctx *ctx = req->ctx;
+ u16 index;
+
+ if (unlikely(req->buf_index >= ctx->nr_user_bufs))
+ return -EFAULT;
+ index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
+ req->imu = ctx->user_bufs[index];
io_req_set_rsrc_node(req);
}
req->rw.addr = READ_ONCE(sqe->addr);
req->rw.len = READ_ONCE(sqe->len);
- req->buf_index = READ_ONCE(sqe->buf_index);
return 0;
}
static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
{
- struct io_ring_ctx *ctx = req->ctx;
- struct io_mapped_ubuf *imu = req->imu;
- u16 index, buf_index = req->buf_index;
-
- if (likely(!imu)) {
- if (unlikely(buf_index >= ctx->nr_user_bufs))
- return -EFAULT;
- index = array_index_nospec(buf_index, ctx->nr_user_bufs);
- imu = READ_ONCE(ctx->user_bufs[index]);
- req->imu = imu;
- }
- return __io_import_fixed(req, rw, iter, imu);
+ if (WARN_ON_ONCE(!req->imu))
+ return -EFAULT;
+ return __io_import_fixed(req, rw, iter, req->imu);
}
static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock)
req->sync.len);
if (ret < 0)
req_set_fail(req);
+ else
+ fsnotify_modify(req->file);
io_req_complete(req, ret);
return 0;
}
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
+ if (unlikely(sqe->addr2 || sqe->file_index))
+ return -EINVAL;
+ if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio))
+ return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
+ if (unlikely(sqe->addr2 || sqe->file_index))
+ return -EINVAL;
+ if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio))
+ return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
int error;
};
+static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req)
+{
+ /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
+ if (req->opcode == IORING_OP_POLL_ADD)
+ return req->async_data;
+ return req->apoll->double_poll;
+}
+
+static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req)
+{
+ if (req->opcode == IORING_OP_POLL_ADD)
+ return &req->poll;
+ return &req->apoll->poll;
+}
+
+static void io_poll_req_insert(struct io_kiocb *req)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ struct hlist_head *list;
+
+ list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
+ hlist_add_head(&req->hash_node, list);
+}
+
+static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
+ wait_queue_func_t wake_func)
+{
+ poll->head = NULL;
+ poll->done = false;
+ poll->canceled = false;
+#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
+ /* mask in events that we always want/need */
+ poll->events = events | IO_POLL_UNMASK;
+ INIT_LIST_HEAD(&poll->wait.entry);
+ init_waitqueue_func_entry(&poll->wait, wake_func);
+}
+
static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
__poll_t mask, io_req_tw_func_t func)
{
return false;
}
-static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req)
-{
- /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
- if (req->opcode == IORING_OP_POLL_ADD)
- return req->async_data;
- return req->apoll->double_poll;
-}
-
-static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req)
-{
- if (req->opcode == IORING_OP_POLL_ADD)
- return &req->poll;
- return &req->apoll->poll;
-}
-
static void io_poll_remove_double(struct io_kiocb *req)
__must_hold(&req->ctx->completion_lock)
{
}
if (req->poll.events & EPOLLONESHOT)
flags = 0;
- if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) {
+
+ if (!(flags & IORING_CQE_F_MORE)) {
+ io_fill_cqe_req(req, error, flags);
+ } else if (!io_fill_cqe_aux(ctx, req->user_data, error, flags)) {
req->poll.events |= EPOLLONESHOT;
flags = 0;
}
- if (flags & IORING_CQE_F_MORE)
- ctx->cq_extra++;
-
return !(flags & IORING_CQE_F_MORE);
}
-static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
- __must_hold(&req->ctx->completion_lock)
-{
- bool done;
-
- done = __io_poll_complete(req, mask);
- io_commit_cqring(req->ctx);
- return done;
-}
-
static void io_poll_task_func(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
- struct io_kiocb *nxt;
if (io_poll_rewait(req, &req->poll)) {
spin_unlock(&ctx->completion_lock);
spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
- if (done) {
- nxt = io_put_req_find_next(req);
- if (nxt)
- io_req_task_submit(nxt, locked);
- }
+ if (done)
+ io_put_req(req);
}
}
return 1;
}
-static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
- wait_queue_func_t wake_func)
-{
- poll->head = NULL;
- poll->done = false;
- poll->canceled = false;
-#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
- /* mask in events that we always want/need */
- poll->events = events | IO_POLL_UNMASK;
- INIT_LIST_HEAD(&poll->wait.entry);
- init_waitqueue_func_entry(&poll->wait, wake_func);
-}
-
static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
struct wait_queue_head *head,
struct io_poll_iocb **poll_ptr)
return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func);
}
-static void io_poll_req_insert(struct io_kiocb *req)
-{
- struct io_ring_ctx *ctx = req->ctx;
- struct hlist_head *list;
-
- list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
- hlist_add_head(&req->hash_node, list);
-}
-
static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
struct io_poll_iocb *poll,
struct io_poll_table *ipt, __poll_t mask,
struct async_poll *apoll;
struct io_poll_table ipt;
__poll_t ret, mask = EPOLLONESHOT | POLLERR | POLLPRI;
- int rw;
if (!req->file || !file_can_poll(req->file))
return IO_APOLL_ABORTED;
return IO_APOLL_ABORTED;
if (def->pollin) {
- rw = READ;
mask |= POLLIN | POLLRDNORM;
/* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
(req->sr_msg.msg_flags & MSG_ERRQUEUE))
mask &= ~POLLIN;
} else {
- rw = WRITE;
mask |= POLLOUT | POLLWRNORM;
}
- /* if we can't nonblock try, then no point in arming a poll handler */
- if (!io_file_supports_nowait(req, rw))
- return IO_APOLL_ABORTED;
-
apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
if (unlikely(!apoll))
return IO_APOLL_ABORTED;
do_complete = __io_poll_remove_one(req, io_poll_get_single(req), true);
if (do_complete) {
- io_cqring_fill_event(req->ctx, req->user_data, -ECANCELED, 0);
- io_commit_cqring(req->ctx);
req_set_fail(req);
+ io_fill_cqe_req(req, -ECANCELED, 0);
+ io_commit_cqring(req->ctx);
io_put_req_deferred(req);
}
return do_complete;
if (mask) { /* no async, we'd stolen it */
ipt.error = 0;
- done = io_poll_complete(req, mask);
+ done = __io_poll_complete(req, mask);
+ io_commit_cqring(req->ctx);
}
spin_unlock(&ctx->completion_lock);
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *preq;
bool completing;
- int ret;
+ int ret2, ret = 0;
spin_lock(&ctx->completion_lock);
preq = io_poll_find(ctx, req->poll_update.old_user_data, true);
if (!preq) {
ret = -ENOENT;
- goto err;
- }
-
- if (!req->poll_update.update_events && !req->poll_update.update_user_data) {
- completing = true;
- ret = io_poll_remove_one(preq) ? 0 : -EALREADY;
- goto err;
+fail:
+ spin_unlock(&ctx->completion_lock);
+ goto out;
}
-
+ io_poll_remove_double(preq);
/*
* Don't allow racy completion with singleshot, as we cannot safely
* update those. For multishot, if we're racing with completion, just
* let completion re-add it.
*/
- io_poll_remove_double(preq);
completing = !__io_poll_remove_one(preq, &preq->poll, false);
if (completing && (preq->poll.events & EPOLLONESHOT)) {
ret = -EALREADY;
- goto err;
- }
- /* we now have a detached poll request. reissue. */
- ret = 0;
-err:
- if (ret < 0) {
- spin_unlock(&ctx->completion_lock);
- req_set_fail(req);
- io_req_complete(req, ret);
- return 0;
- }
- /* only mask one event flags, keep behavior flags */
- if (req->poll_update.update_events) {
- preq->poll.events &= ~0xffff;
- preq->poll.events |= req->poll_update.events & 0xffff;
- preq->poll.events |= IO_POLL_UNMASK;
+ goto fail;
}
- if (req->poll_update.update_user_data)
- preq->user_data = req->poll_update.new_user_data;
spin_unlock(&ctx->completion_lock);
- /* complete update request, we're done with it */
- io_req_complete(req, ret);
-
- if (!completing) {
- ret = io_poll_add(preq, issue_flags);
- if (ret < 0) {
- req_set_fail(preq);
- io_req_complete(preq, ret);
+ if (req->poll_update.update_events || req->poll_update.update_user_data) {
+ /* only mask one event flags, keep behavior flags */
+ if (req->poll_update.update_events) {
+ preq->poll.events &= ~0xffff;
+ preq->poll.events |= req->poll_update.events & 0xffff;
+ preq->poll.events |= IO_POLL_UNMASK;
}
+ if (req->poll_update.update_user_data)
+ preq->user_data = req->poll_update.new_user_data;
+
+ ret2 = io_poll_add(preq, issue_flags);
+ /* successfully updated, don't complete poll request */
+ if (!ret2)
+ goto out;
}
+ req_set_fail(preq);
+ io_req_complete(preq, -ECANCELED);
+out:
+ if (ret < 0)
+ req_set_fail(req);
+ /* complete update request, we're done with it */
+ io_req_complete(req, ret);
return 0;
}
return PTR_ERR(req);
req_set_fail(req);
- io_cqring_fill_event(ctx, req->user_data, -ECANCELED, 0);
+ io_fill_cqe_req(req, -ECANCELED, 0);
io_put_req_deferred(req);
return 0;
}
up.nr = 0;
up.tags = 0;
up.resv = 0;
+ up.resv2 = 0;
io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
* forcing a sync submission from here, since we can't
* wait for request slots on the block side.
*/
- if (ret != -EAGAIN)
+ if (ret != -EAGAIN || !(req->ctx->flags & IORING_SETUP_IOPOLL))
break;
cond_resched();
} while (1);
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
+ unsigned nr = ctx->nr_user_files;
int ret;
if (!ctx->file_data)
return -ENXIO;
+
+ /*
+ * Quiesce may unlock ->uring_lock, and while it's not held
+ * prevent new requests using the table.
+ */
+ ctx->nr_user_files = 0;
ret = io_rsrc_ref_quiesce(ctx->file_data, ctx);
+ ctx->nr_user_files = nr;
if (!ret)
__io_sqe_files_unregister(ctx);
return ret;
io_ring_submit_lock(ctx, lock_ring);
spin_lock(&ctx->completion_lock);
- io_cqring_fill_event(ctx, prsrc->tag, 0, 0);
- ctx->cq_extra++;
+ io_fill_cqe_aux(ctx, prsrc->tag, 0, 0);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
struct io_rsrc_node *node, void *rsrc)
{
+ u64 *tag_slot = io_get_tag_slot(data, idx);
struct io_rsrc_put *prsrc;
prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
if (!prsrc)
return -ENOMEM;
- prsrc->tag = *io_get_tag_slot(data, idx);
+ prsrc->tag = *tag_slot;
+ *tag_slot = 0;
prsrc->rsrc = rsrc;
list_add(&prsrc->list, &node->rsrc_list);
return 0;
struct io_ring_ctx *ctx = req->ctx;
struct io_fixed_file *file_slot;
struct file *file;
- int ret, i;
+ int ret;
io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
ret = -ENXIO;
if (ret)
goto out;
- i = array_index_nospec(offset, ctx->nr_user_files);
- file_slot = io_fixed_file_slot(&ctx->file_table, i);
+ offset = array_index_nospec(offset, ctx->nr_user_files);
+ file_slot = io_fixed_file_slot(&ctx->file_table, offset);
ret = -EBADF;
if (!file_slot->file_ptr)
goto out;
if (file_slot->file_ptr) {
file = (struct file *)(file_slot->file_ptr & FFS_MASK);
- err = io_queue_rsrc_removal(data, up->offset + done,
- ctx->rsrc_node, file);
+ err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file);
if (err)
break;
file_slot->file_ptr = 0;
static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
{
+ unsigned nr = ctx->nr_user_bufs;
int ret;
if (!ctx->buf_data)
return -ENXIO;
+ /*
+ * Quiesce may unlock ->uring_lock, and while it's not held
+ * prevent new requests using the table.
+ */
+ ctx->nr_user_bufs = 0;
ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx);
+ ctx->nr_user_bufs = nr;
if (!ret)
__io_sqe_buffers_unregister(ctx);
return ret;
i = array_index_nospec(offset, ctx->nr_user_bufs);
if (ctx->user_bufs[i] != ctx->dummy_ubuf) {
- err = io_queue_rsrc_removal(ctx->buf_data, offset,
+ err = io_queue_rsrc_removal(ctx->buf_data, i,
ctx->rsrc_node, ctx->user_bufs[i]);
if (unlikely(err)) {
io_buffer_unmap(ctx, &imu);
return -EINVAL;
if (copy_from_user(&arg, argp, sizeof(arg)))
return -EFAULT;
+ if (arg.pad)
+ return -EINVAL;
*sig = u64_to_user_ptr(arg.sigmask);
*argsz = arg.sigmask_sz;
*ts = u64_to_user_ptr(arg.ts);
__u32 tmp;
int err;
- if (up->resv)
- return -EINVAL;
if (check_add_overflow(up->offset, nr_args, &tmp))
return -EOVERFLOW;
err = io_rsrc_node_switch_start(ctx);
memset(&up, 0, sizeof(up));
if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update)))
return -EFAULT;
+ if (up.resv || up.resv2)
+ return -EINVAL;
return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args);
}
return -EINVAL;
if (copy_from_user(&up, arg, sizeof(up)))
return -EFAULT;
- if (!up.nr || up.resv)
+ if (!up.nr || up.resv || up.resv2)
return -EINVAL;
return __io_register_rsrc_update(ctx, type, &up, up.nr);
}