[mirror_qemu.git] / util / aio-posix.c

/*
 * QEMU aio implementation
 *
 * Copyright IBM, Corp. 2008
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 * Contributions after 2012-01-13 are licensed under the terms of the
 * GNU GPL, version 2 or (at your option) any later version.
 */

#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block.h"
#include "qemu/rcu_queue.h"
#include "qemu/sockets.h"
#include "qemu/cutils.h"
#include "trace.h"
#ifdef CONFIG_EPOLL_CREATE1
#include <sys/epoll.h>
#endif

struct AioHandler
{
    GPollFD pfd;
    IOHandler *io_read;
    IOHandler *io_write;
    AioPollFn *io_poll;
    IOHandler *io_poll_begin;
    IOHandler *io_poll_end;
    int deleted;
    void *opaque;
    bool is_external;
    QLIST_ENTRY(AioHandler) node;
};

#ifdef CONFIG_EPOLL_CREATE1

/* The fd number threashold to switch to epoll */
#define EPOLL_ENABLE_THRESHOLD 64

static void aio_epoll_disable(AioContext *ctx)
{
    ctx->epoll_available = false;
    if (!ctx->epoll_enabled) {
        return;
    }
    ctx->epoll_enabled = false;
    close(ctx->epollfd);
}

static inline int epoll_events_from_pfd(int pfd_events)
{
    return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
           (pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
           (pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
           (pfd_events & G_IO_ERR ? EPOLLERR : 0);
}

static bool aio_epoll_try_enable(AioContext *ctx)
{
    AioHandler *node;
    struct epoll_event event;

    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        int r;
        if (node->deleted || !node->pfd.events) {
            continue;
        }
        event.events = epoll_events_from_pfd(node->pfd.events);
        event.data.ptr = node;
        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
        if (r) {
            return false;
        }
    }
    ctx->epoll_enabled = true;
    return true;
}

static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
{
    struct epoll_event event;
    int r;
    int ctl;

    if (!ctx->epoll_enabled) {
        return;
    }
    if (!node->pfd.events) {
        ctl = EPOLL_CTL_DEL;
    } else {
        event.data.ptr = node;
        event.events = epoll_events_from_pfd(node->pfd.events);
        ctl = is_new ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
    }

    r = epoll_ctl(ctx->epollfd, ctl, node->pfd.fd, &event);
    if (r) {
        aio_epoll_disable(ctx);
    }
}

static int aio_epoll(AioContext *ctx, GPollFD *pfds,
                     unsigned npfd, int64_t timeout)
{
    AioHandler *node;
    int i, ret = 0;
    struct epoll_event events[128];

    assert(npfd == 1);
    assert(pfds[0].fd == ctx->epollfd);
    if (timeout > 0) {
        ret = qemu_poll_ns(pfds, npfd, timeout);
    }
    if (timeout <= 0 || ret > 0) {
        ret = epoll_wait(ctx->epollfd, events,
                         sizeof(events) / sizeof(events[0]),
                         timeout);
        if (ret <= 0) {
            goto out;
        }
        for (i = 0; i < ret; i++) {
            int ev = events[i].events;
            node = events[i].data.ptr;
            node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) |
                (ev & EPOLLOUT ? G_IO_OUT : 0) |
                (ev & EPOLLHUP ? G_IO_HUP : 0) |
                (ev & EPOLLERR ? G_IO_ERR : 0);
        }
    }
out:
    return ret;
}

static bool aio_epoll_enabled(AioContext *ctx)
{
    /* Fall back to ppoll when external clients are disabled. */
    return !aio_external_disabled(ctx) && ctx->epoll_enabled;
}

static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
                                 unsigned npfd, int64_t timeout)
{
    if (!ctx->epoll_available) {
        return false;
    }
    if (aio_epoll_enabled(ctx)) {
        return true;
    }
    if (npfd >= EPOLL_ENABLE_THRESHOLD) {
        if (aio_epoll_try_enable(ctx)) {
            return true;
        } else {
            aio_epoll_disable(ctx);
        }
    }
    return false;
}

#else

static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
{
}

static int aio_epoll(AioContext *ctx, GPollFD *pfds,
                     unsigned npfd, int64_t timeout)
{
    assert(false);
}

static bool aio_epoll_enabled(AioContext *ctx)
{
    return false;
}

static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
                          unsigned npfd, int64_t timeout)
{
    return false;
}

#endif

static AioHandler *find_aio_handler(AioContext *ctx, int fd)
{
    AioHandler *node;

    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (node->pfd.fd == fd)
            if (!node->deleted)
                return node;
    }

    return NULL;
}

void aio_set_fd_handler(AioContext *ctx,
                        int fd,
                        bool is_external,
                        IOHandler *io_read,
                        IOHandler *io_write,
                        AioPollFn *io_poll,
                        void *opaque)
{
    AioHandler *node;
    bool is_new = false;
    bool deleted = false;

    qemu_lockcnt_lock(&ctx->list_lock);

    node = find_aio_handler(ctx, fd);

    /* Are we deleting the fd handler? */
    if (!io_read && !io_write && !io_poll) {
        if (node == NULL) {
            qemu_lockcnt_unlock(&ctx->list_lock);
            return;
        }

        g_source_remove_poll(&ctx->source, &node->pfd);

        /* If the lock is held, just mark the node as deleted */
        if (qemu_lockcnt_count(&ctx->list_lock)) {
            node->deleted = 1;
            node->pfd.revents = 0;
        } else {
            /* Otherwise, delete it for real.  We can't just mark it as
             * deleted because deleted nodes are only cleaned up while
             * no one is walking the handlers list.
             */
            QLIST_REMOVE(node, node);
            deleted = true;
        }

        if (!node->io_poll) {
            ctx->poll_disable_cnt--;
        }
    } else {
        if (node == NULL) {
            /* Alloc and insert if it's not already there */
            node = g_new0(AioHandler, 1);
            node->pfd.fd = fd;
            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);

            g_source_add_poll(&ctx->source, &node->pfd);
            is_new = true;

            ctx->poll_disable_cnt += !io_poll;
        } else {
            ctx->poll_disable_cnt += !io_poll - !node->io_poll;
        }

        /* Update handler with latest information */
        node->io_read = io_read;
        node->io_write = io_write;
        node->io_poll = io_poll;
        node->opaque = opaque;
        node->is_external = is_external;

        node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
        node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
    }

    aio_epoll_update(ctx, node, is_new);
    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);

    if (deleted) {
        g_free(node);
    }
}

void aio_set_fd_poll(AioContext *ctx, int fd,
                     IOHandler *io_poll_begin,
                     IOHandler *io_poll_end)
{
    AioHandler *node = find_aio_handler(ctx, fd);

    if (!node) {
        return;
    }

    node->io_poll_begin = io_poll_begin;
    node->io_poll_end = io_poll_end;
}

void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *notifier,
                            bool is_external,
                            EventNotifierHandler *io_read,
                            AioPollFn *io_poll)
{
    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
                       (IOHandler *)io_read, NULL, io_poll, notifier);
}

void aio_set_event_notifier_poll(AioContext *ctx,
                                 EventNotifier *notifier,
                                 EventNotifierHandler *io_poll_begin,
                                 EventNotifierHandler *io_poll_end)
{
    aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
                    (IOHandler *)io_poll_begin,
                    (IOHandler *)io_poll_end);
}

static void poll_set_started(AioContext *ctx, bool started)
{
    AioHandler *node;

    if (started == ctx->poll_started) {
        return;
    }

    ctx->poll_started = started;

    qemu_lockcnt_inc(&ctx->list_lock);
    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        IOHandler *fn;

        if (node->deleted) {
            continue;
        }

        if (started) {
            fn = node->io_poll_begin;
        } else {
            fn = node->io_poll_end;
        }

        if (fn) {
            fn(node->opaque);
        }
    }
    qemu_lockcnt_dec(&ctx->list_lock);
}


bool aio_prepare(AioContext *ctx)
{
    /* Poll mode cannot be used with glib's event loop, disable it. */
    poll_set_started(ctx, false);

    return false;
}

bool aio_pending(AioContext *ctx)
{
    AioHandler *node;
    bool result = false;

    /*
     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
    qemu_lockcnt_inc(&ctx->list_lock);

    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        int revents;

        revents = node->pfd.revents & node->pfd.events;
        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
            aio_node_check(ctx, node->is_external)) {
            result = true;
            break;
        }
        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
            aio_node_check(ctx, node->is_external)) {
            result = true;
            break;
        }
    }
    qemu_lockcnt_dec(&ctx->list_lock);

    return result;
}

static bool aio_dispatch_handlers(AioContext *ctx)
{
    AioHandler *node, *tmp;
    bool progress = false;

    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
        int revents;

        revents = node->pfd.revents & node->pfd.events;
        node->pfd.revents = 0;

        if (!node->deleted &&
            (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
            aio_node_check(ctx, node->is_external) &&
            node->io_read) {
            node->io_read(node->opaque);

            /* aio_notify() does not count as progress */
            if (node->opaque != &ctx->notifier) {
                progress = true;
            }
        }
        if (!node->deleted &&
            (revents & (G_IO_OUT | G_IO_ERR)) &&
            aio_node_check(ctx, node->is_external) &&
            node->io_write) {
            node->io_write(node->opaque);
            progress = true;
        }

        if (node->deleted) {
            if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
                QLIST_REMOVE(node, node);
                g_free(node);
                qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
            }
        }
    }

    return progress;
}

void aio_dispatch(AioContext *ctx)
{
    qemu_lockcnt_inc(&ctx->list_lock);
    aio_bh_poll(ctx);
    aio_dispatch_handlers(ctx);
    qemu_lockcnt_dec(&ctx->list_lock);

    timerlistgroup_run_timers(&ctx->tlg);
}

/* These thread-local variables are used only in a small part of aio_poll
 * around the call to the poll() system call.  In particular they are not
 * used while aio_poll is performing callbacks, which makes it much easier
 * to think about reentrancy!
 *
 * Stack-allocated arrays would be perfect but they have size limitations;
 * heap allocation is expensive enough that we want to reuse arrays across
 * calls to aio_poll().  And because poll() has to be called without holding
 * any lock, the arrays cannot be stored in AioContext.  Thread-local data
 * has none of the disadvantages of these three options.
 */
static __thread GPollFD *pollfds;
static __thread AioHandler **nodes;
static __thread unsigned npfd, nalloc;
static __thread Notifier pollfds_cleanup_notifier;

static void pollfds_cleanup(Notifier *n, void *unused)
{
    g_assert(npfd == 0);
    g_free(pollfds);
    g_free(nodes);
    nalloc = 0;
}

static void add_pollfd(AioHandler *node)
{
    if (npfd == nalloc) {
        if (nalloc == 0) {
            pollfds_cleanup_notifier.notify = pollfds_cleanup;
            qemu_thread_atexit_add(&pollfds_cleanup_notifier);
            nalloc = 8;
        } else {
            g_assert(nalloc <= INT_MAX);
            nalloc *= 2;
        }
        pollfds = g_renew(GPollFD, pollfds, nalloc);
        nodes = g_renew(AioHandler *, nodes, nalloc);
    }
    nodes[npfd] = node;
    pollfds[npfd] = (GPollFD) {
        .fd = node->pfd.fd,
        .events = node->pfd.events,
    };
    npfd++;
}

static bool run_poll_handlers_once(AioContext *ctx)
{
    bool progress = false;
    AioHandler *node;

    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        if (!node->deleted && node->io_poll &&
            aio_node_check(ctx, node->is_external) &&
            node->io_poll(node->opaque)) {
            progress = true;
        }

        /* Caller handles freeing deleted nodes.  Don't do it here. */
    }

    return progress;
}

/* run_poll_handlers:
 * @ctx: the AioContext
 * @max_ns: maximum time to poll for, in nanoseconds
 *
 * Polls for a given time.
 *
 * Note that ctx->notify_me must be non-zero so this function can detect
 * aio_notify().
 *
 * Note that the caller must have incremented ctx->list_lock.
 *
 * Returns: true if progress was made, false otherwise
 */
static bool run_poll_handlers(AioContext *ctx, int64_t max_ns)
{
    bool progress;
    int64_t end_time;

    assert(ctx->notify_me);
    assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
    assert(ctx->poll_disable_cnt == 0);

    trace_run_poll_handlers_begin(ctx, max_ns);

    end_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + max_ns;

    do {
        progress = run_poll_handlers_once(ctx);
    } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time);

    trace_run_poll_handlers_end(ctx, progress);

    return progress;
}

/* try_poll_mode:
 * @ctx: the AioContext
 * @blocking: busy polling is only attempted when blocking is true
 *
 * ctx->notify_me must be non-zero so this function can detect aio_notify().
 *
 * Note that the caller must have incremented ctx->list_lock.
 *
 * Returns: true if progress was made, false otherwise
 */
static bool try_poll_mode(AioContext *ctx, bool blocking)
{
    if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) {
        /* See qemu_soonest_timeout() uint64_t hack */
        int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx),
                             (uint64_t)ctx->poll_ns);

        if (max_ns) {
            poll_set_started(ctx, true);

            if (run_poll_handlers(ctx, max_ns)) {
                return true;
            }
        }
    }

    poll_set_started(ctx, false);

    /* Even if we don't run busy polling, try polling once in case it can make
     * progress and the caller will be able to avoid ppoll(2)/epoll_wait(2).
     */
    return run_poll_handlers_once(ctx);
}

bool aio_poll(AioContext *ctx, bool blocking)
{
    AioHandler *node;
    int i;
    int ret = 0;
    bool progress;
    int64_t timeout;
    int64_t start = 0;

    /* aio_notify can avoid the expensive event_notifier_set if
     * everything (file descriptors, bottom halves, timers) will
     * be re-evaluated before the next blocking poll().  This is
     * already true when aio_poll is called with blocking == false;
     * if blocking == true, it is only true after poll() returns,
     * so disable the optimization now.
     */
    if (blocking) {
        atomic_add(&ctx->notify_me, 2);
    }

    qemu_lockcnt_inc(&ctx->list_lock);

    if (ctx->poll_max_ns) {
        start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }

    progress = try_poll_mode(ctx, blocking);
    if (!progress) {
        assert(npfd == 0);

        /* fill pollfds */

        if (!aio_epoll_enabled(ctx)) {
            QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
                if (!node->deleted && node->pfd.events
                    && aio_node_check(ctx, node->is_external)) {
                    add_pollfd(node);
                }
            }
        }

        timeout = blocking ? aio_compute_timeout(ctx) : 0;

        /* wait until next event */
        if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
            AioHandler epoll_handler;

            epoll_handler.pfd.fd = ctx->epollfd;
            epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
            npfd = 0;
            add_pollfd(&epoll_handler);
            ret = aio_epoll(ctx, pollfds, npfd, timeout);
        } else  {
            ret = qemu_poll_ns(pollfds, npfd, timeout);
        }
    }

    if (blocking) {
        atomic_sub(&ctx->notify_me, 2);
    }

    /* Adjust polling time */
    if (ctx->poll_max_ns) {
        int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;

        if (block_ns <= ctx->poll_ns) {
            /* This is the sweet spot, no adjustment needed */
        } else if (block_ns > ctx->poll_max_ns) {
            /* We'd have to poll for too long, poll less */
            int64_t old = ctx->poll_ns;

            if (ctx->poll_shrink) {
                ctx->poll_ns /= ctx->poll_shrink;
            } else {
                ctx->poll_ns = 0;
            }

            trace_poll_shrink(ctx, old, ctx->poll_ns);
        } else if (ctx->poll_ns < ctx->poll_max_ns &&
                   block_ns < ctx->poll_max_ns) {
            /* There is room to grow, poll longer */
            int64_t old = ctx->poll_ns;
            int64_t grow = ctx->poll_grow;

            if (grow == 0) {
                grow = 2;
            }

            if (ctx->poll_ns) {
                ctx->poll_ns *= grow;
            } else {
                ctx->poll_ns = 4000; /* start polling at 4 microseconds */
            }

            if (ctx->poll_ns > ctx->poll_max_ns) {
                ctx->poll_ns = ctx->poll_max_ns;
            }

            trace_poll_grow(ctx, old, ctx->poll_ns);
        }
    }

    aio_notify_accept(ctx);

    /* if we have any readable fds, dispatch event */
    if (ret > 0) {
        for (i = 0; i < npfd; i++) {
            nodes[i]->pfd.revents = pollfds[i].revents;
        }
    }

    npfd = 0;

    progress |= aio_bh_poll(ctx);

    if (ret > 0) {
        progress |= aio_dispatch_handlers(ctx);
    }

    qemu_lockcnt_dec(&ctx->list_lock);

    progress |= timerlistgroup_run_timers(&ctx->tlg);

    return progress;
}

void aio_context_setup(AioContext *ctx)
{
    /* TODO remove this in final patch submission */
    if (getenv("QEMU_AIO_POLL_MAX_NS")) {
        fprintf(stderr, "The QEMU_AIO_POLL_MAX_NS environment variable has "
                "been replaced with -object iothread,poll-max-ns=NUM\n");
        exit(1);
    }

#ifdef CONFIG_EPOLL_CREATE1
    assert(!ctx->epollfd);
    ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
    if (ctx->epollfd == -1) {
        fprintf(stderr, "Failed to create epoll instance: %s", strerror(errno));
        ctx->epoll_available = false;
    } else {
        ctx->epoll_available = true;
    }
#endif
}

void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
                                 int64_t grow, int64_t shrink, Error **errp)
{
    /* No thread synchronization here, it doesn't matter if an incorrect value
     * is used once.
     */
    ctx->poll_max_ns = max_ns;
    ctx->poll_ns = 0;
    ctx->poll_grow = grow;
    ctx->poll_shrink = shrink;

    aio_notify(ctx);
}
Commit	Line	Data
a76bab49 AL	1	/*
	2	* QEMU aio implementation
	3	*
	4	* Copyright IBM, Corp. 2008
	5	*
	6	* Authors:
	7	* Anthony Liguori <aliguori@us.ibm.com>
	8	*
	9	* This work is licensed under the terms of the GNU GPL, version 2. See
	10	* the COPYING file in the top-level directory.
	11	*
6b620ca3 PB	12	* Contributions after 2012-01-13 are licensed under the terms of the
6b620ca3 PB	13	* GNU GPL, version 2 or (at your option) any later version.
a76bab49 AL	14	*/
a76bab49 AL	15
d38ea87a	16	#include "qemu/osdep.h"
a76bab49	17	#include "qemu-common.h"
737e150e	18	#include "block/block.h"
2bbf11d7	19	#include "qemu/rcu_queue.h"
1de7afc9	20	#include "qemu/sockets.h"
4a1cba38	21	#include "qemu/cutils.h"
c2b38b27	22	#include "trace.h"
147dfab7	23	#ifdef CONFIG_EPOLL_CREATE1
fbe3fc5c FZ	24	#include <sys/epoll.h>
fbe3fc5c FZ	25	#endif
a76bab49	26
a76bab49 AL	27	struct AioHandler
a76bab49 AL	28	{
cd9ba1eb	29	GPollFD pfd;
a76bab49 AL	30	IOHandler *io_read;
a76bab49 AL	31	IOHandler *io_write;
4a1cba38	32	AioPollFn *io_poll;
684e508c SH	33	IOHandler *io_poll_begin;
684e508c SH	34	IOHandler *io_poll_end;
a76bab49 AL	35	int deleted;
a76bab49 AL	36	void *opaque;
dca21ef2	37	bool is_external;
72cf2d4f	38	QLIST_ENTRY(AioHandler) node;
a76bab49 AL	39	};
a76bab49 AL	40
147dfab7	41	#ifdef CONFIG_EPOLL_CREATE1
fbe3fc5c FZ	42
	43	/* The fd number threashold to switch to epoll */
	44	#define EPOLL_ENABLE_THRESHOLD 64
	45
	46	static void aio_epoll_disable(AioContext *ctx)
	47	{
	48	ctx->epoll_available = false;
	49	if (!ctx->epoll_enabled) {
	50	return;
	51	}
	52	ctx->epoll_enabled = false;
	53	close(ctx->epollfd);
	54	}
	55
	56	static inline int epoll_events_from_pfd(int pfd_events)
	57	{
	58	return (pfd_events & G_IO_IN ? EPOLLIN : 0) \|
	59	(pfd_events & G_IO_OUT ? EPOLLOUT : 0) \|
	60	(pfd_events & G_IO_HUP ? EPOLLHUP : 0) \|
	61	(pfd_events & G_IO_ERR ? EPOLLERR : 0);
	62	}
	63
	64	static bool aio_epoll_try_enable(AioContext *ctx)
	65	{
	66	AioHandler *node;
	67	struct epoll_event event;
	68
2bbf11d7	69	QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
fbe3fc5c FZ	70	int r;
	71	if (node->deleted \|\| !node->pfd.events) {
	72	continue;
	73	}
	74	event.events = epoll_events_from_pfd(node->pfd.events);
	75	event.data.ptr = node;
	76	r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
	77	if (r) {
	78	return false;
	79	}
	80	}
	81	ctx->epoll_enabled = true;
	82	return true;
	83	}
	84
	85	static void aio_epoll_update(AioContext ctx, AioHandler node, bool is_new)
	86	{
	87	struct epoll_event event;
	88	int r;
35dd66e2	89	int ctl;
fbe3fc5c FZ	90
	91	if (!ctx->epoll_enabled) {
	92	return;
	93	}
	94	if (!node->pfd.events) {
35dd66e2	95	ctl = EPOLL_CTL_DEL;
fbe3fc5c FZ	96	} else {
	97	event.data.ptr = node;
	98	event.events = epoll_events_from_pfd(node->pfd.events);
35dd66e2 PB	99	ctl = is_new ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
	100	}
	101
	102	r = epoll_ctl(ctx->epollfd, ctl, node->pfd.fd, &event);
	103	if (r) {
	104	aio_epoll_disable(ctx);
fbe3fc5c FZ	105	}
	106	}
	107
	108	static int aio_epoll(AioContext ctx, GPollFD pfds,
	109	unsigned npfd, int64_t timeout)
	110	{
	111	AioHandler *node;
	112	int i, ret = 0;
	113	struct epoll_event events[128];
	114
	115	assert(npfd == 1);
	116	assert(pfds[0].fd == ctx->epollfd);
	117	if (timeout > 0) {
	118	ret = qemu_poll_ns(pfds, npfd, timeout);
	119	}
	120	if (timeout <= 0 \|\| ret > 0) {
	121	ret = epoll_wait(ctx->epollfd, events,
	122	sizeof(events) / sizeof(events[0]),
	123	timeout);
	124	if (ret <= 0) {
	125	goto out;
	126	}
	127	for (i = 0; i < ret; i++) {
	128	int ev = events[i].events;
	129	node = events[i].data.ptr;
	130	node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) \|
	131	(ev & EPOLLOUT ? G_IO_OUT : 0) \|
	132	(ev & EPOLLHUP ? G_IO_HUP : 0) \|
	133	(ev & EPOLLERR ? G_IO_ERR : 0);
	134	}
	135	}
	136	out:
	137	return ret;
	138	}
	139
	140	static bool aio_epoll_enabled(AioContext *ctx)
	141	{
	142	/* Fall back to ppoll when external clients are disabled. */
	143	return !aio_external_disabled(ctx) && ctx->epoll_enabled;
	144	}
	145
	146	static bool aio_epoll_check_poll(AioContext ctx, GPollFD pfds,
	147	unsigned npfd, int64_t timeout)
	148	{
	149	if (!ctx->epoll_available) {
	150	return false;
	151	}
	152	if (aio_epoll_enabled(ctx)) {
	153	return true;
	154	}
	155	if (npfd >= EPOLL_ENABLE_THRESHOLD) {
	156	if (aio_epoll_try_enable(ctx)) {
	157	return true;
	158	} else {
	159	aio_epoll_disable(ctx);
	160	}
	161	}
	162	return false;
	163	}
	164
	165	#else
	166
	167	static void aio_epoll_update(AioContext ctx, AioHandler node, bool is_new)
	168	{
169	}
170
171	static int aio_epoll(AioContext ctx, GPollFD pfds,
172	unsigned npfd, int64_t timeout)
173	{
174	assert(false);
175	}
176
177	static bool aio_epoll_enabled(AioContext *ctx)
178	{
179	return false;
180	}
181
182	static bool aio_epoll_check_poll(AioContext ctx, GPollFD pfds,
183	unsigned npfd, int64_t timeout)
184	{
185	return false;
186	}
187
188	#endif
189
a915f4bc	190	static AioHandler find_aio_handler(AioContext ctx, int fd)
a76bab49 AL	191	{
	192	AioHandler *node;
	193
a915f4bc	194	QLIST_FOREACH(node, &ctx->aio_handlers, node) {
cd9ba1eb	195	if (node->pfd.fd == fd)
79d5ca56 AG	196	if (!node->deleted)
79d5ca56 AG	197	return node;
a76bab49 AL	198	}
	199
	200	return NULL;
	201	}
	202
a915f4bc PB	203	void aio_set_fd_handler(AioContext *ctx,
a915f4bc PB	204	int fd,
dca21ef2	205	bool is_external,
a915f4bc PB	206	IOHandler *io_read,
a915f4bc PB	207	IOHandler *io_write,
f6a51c84	208	AioPollFn *io_poll,
a915f4bc	209	void *opaque)
a76bab49 AL	210	{
a76bab49 AL	211	AioHandler *node;
fbe3fc5c	212	bool is_new = false;
0ed39f3d	213	bool deleted = false;
a76bab49	214
2bbf11d7 PB	215	qemu_lockcnt_lock(&ctx->list_lock);
2bbf11d7 PB	216
a915f4bc	217	node = find_aio_handler(ctx, fd);
a76bab49 AL	218
a76bab49 AL	219	/* Are we deleting the fd handler? */
4a1cba38	220	if (!io_read && !io_write && !io_poll) {
36173ec5	221	if (node == NULL) {
2bbf11d7	222	qemu_lockcnt_unlock(&ctx->list_lock);
36173ec5 PB	223	return;
	224	}
	225
	226	g_source_remove_poll(&ctx->source, &node->pfd);
	227
	228	/* If the lock is held, just mark the node as deleted */
2bbf11d7	229	if (qemu_lockcnt_count(&ctx->list_lock)) {
36173ec5 PB	230	node->deleted = 1;
	231	node->pfd.revents = 0;
	232	} else {
	233	/* Otherwise, delete it for real. We can't just mark it as
2bbf11d7 PB	234	* deleted because deleted nodes are only cleaned up while
2bbf11d7 PB	235	* no one is walking the handlers list.
36173ec5 PB	236	*/
	237	QLIST_REMOVE(node, node);
	238	deleted = true;
a76bab49	239	}
4a1cba38 SH	240
	241	if (!node->io_poll) {
	242	ctx->poll_disable_cnt--;
	243	}
a76bab49 AL	244	} else {
	245	if (node == NULL) {
	246	/* Alloc and insert if it's not already there */
3ba235a0	247	node = g_new0(AioHandler, 1);
cd9ba1eb	248	node->pfd.fd = fd;
2bbf11d7	249	QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
e3713e00 PB	250
e3713e00 PB	251	g_source_add_poll(&ctx->source, &node->pfd);
fbe3fc5c	252	is_new = true;
4a1cba38 SH	253
	254	ctx->poll_disable_cnt += !io_poll;
	255	} else {
	256	ctx->poll_disable_cnt += !io_poll - !node->io_poll;
a76bab49	257	}
4a1cba38	258
a76bab49 AL	259	/* Update handler with latest information */
	260	node->io_read = io_read;
	261	node->io_write = io_write;
4a1cba38	262	node->io_poll = io_poll;
a76bab49	263	node->opaque = opaque;
dca21ef2	264	node->is_external = is_external;
cd9ba1eb	265
b5a01a70 SH	266	node->pfd.events = (io_read ? G_IO_IN \| G_IO_HUP \| G_IO_ERR : 0);
b5a01a70 SH	267	node->pfd.events \|= (io_write ? G_IO_OUT \| G_IO_ERR : 0);
a76bab49	268	}
7ed2b24c	269
fbe3fc5c	270	aio_epoll_update(ctx, node, is_new);
2bbf11d7	271	qemu_lockcnt_unlock(&ctx->list_lock);
7ed2b24c	272	aio_notify(ctx);
4a1cba38	273
0ed39f3d FZ	274	if (deleted) {
	275	g_free(node);
	276	}
9958c351 PB	277	}
9958c351 PB	278
684e508c SH	279	void aio_set_fd_poll(AioContext *ctx, int fd,
	280	IOHandler *io_poll_begin,
	281	IOHandler *io_poll_end)
	282	{
	283	AioHandler *node = find_aio_handler(ctx, fd);
	284
	285	if (!node) {
	286	return;
	287	}
	288
	289	node->io_poll_begin = io_poll_begin;
	290	node->io_poll_end = io_poll_end;
	291	}
	292
a915f4bc PB	293	void aio_set_event_notifier(AioContext *ctx,
a915f4bc PB	294	EventNotifier *notifier,
dca21ef2	295	bool is_external,
f6a51c84 SH	296	EventNotifierHandler *io_read,
f6a51c84 SH	297	AioPollFn *io_poll)
a76bab49	298	{
f6a51c84 SH	299	aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
f6a51c84 SH	300	(IOHandler *)io_read, NULL, io_poll, notifier);
a76bab49 AL	301	}
a76bab49 AL	302
684e508c SH	303	void aio_set_event_notifier_poll(AioContext *ctx,
	304	EventNotifier *notifier,
	305	EventNotifierHandler *io_poll_begin,
	306	EventNotifierHandler *io_poll_end)
	307	{
	308	aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
	309	(IOHandler *)io_poll_begin,
	310	(IOHandler *)io_poll_end);
	311	}
	312
	313	static void poll_set_started(AioContext *ctx, bool started)
	314	{
	315	AioHandler *node;
	316
	317	if (started == ctx->poll_started) {
	318	return;
	319	}
	320
	321	ctx->poll_started = started;
	322
2bbf11d7 PB	323	qemu_lockcnt_inc(&ctx->list_lock);
2bbf11d7 PB	324	QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
684e508c SH	325	IOHandler *fn;
	326
	327	if (node->deleted) {
	328	continue;
	329	}
	330
	331	if (started) {
	332	fn = node->io_poll_begin;
	333	} else {
	334	fn = node->io_poll_end;
	335	}
	336
	337	if (fn) {
	338	fn(node->opaque);
	339	}
	340	}
2bbf11d7	341	qemu_lockcnt_dec(&ctx->list_lock);
684e508c SH	342	}
	343
	344
a3462c65 PB	345	bool aio_prepare(AioContext *ctx)
a3462c65 PB	346	{
684e508c SH	347	/* Poll mode cannot be used with glib's event loop, disable it. */
	348	poll_set_started(ctx, false);
	349
a3462c65 PB	350	return false;
	351	}
	352
cd9ba1eb PB	353	bool aio_pending(AioContext *ctx)
	354	{
	355	AioHandler *node;
2bbf11d7	356	bool result = false;
cd9ba1eb	357
2bbf11d7 PB	358	/*
	359	* We have to walk very carefully in case aio_set_fd_handler is
	360	* called while we're walking.
	361	*/
	362	qemu_lockcnt_inc(&ctx->list_lock);
	363
	364	QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
cd9ba1eb PB	365	int revents;
cd9ba1eb PB	366
cd9ba1eb	367	revents = node->pfd.revents & node->pfd.events;
37989ced FZ	368	if (revents & (G_IO_IN \| G_IO_HUP \| G_IO_ERR) && node->io_read &&
37989ced FZ	369	aio_node_check(ctx, node->is_external)) {
2bbf11d7 PB	370	result = true;
2bbf11d7 PB	371	break;
cd9ba1eb	372	}
37989ced FZ	373	if (revents & (G_IO_OUT \| G_IO_ERR) && node->io_write &&
37989ced FZ	374	aio_node_check(ctx, node->is_external)) {
2bbf11d7 PB	375	result = true;
2bbf11d7 PB	376	break;
cd9ba1eb PB	377	}
cd9ba1eb PB	378	}
2bbf11d7	379	qemu_lockcnt_dec(&ctx->list_lock);
cd9ba1eb	380
2bbf11d7	381	return result;
cd9ba1eb PB	382	}
cd9ba1eb PB	383
56d2c3c6	384	static bool aio_dispatch_handlers(AioContext *ctx)
a76bab49	385	{
abf90d39	386	AioHandler node, tmp;
d0c8d2c0	387	bool progress = false;
7c0628b2	388
2bbf11d7	389	QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
abf90d39	390	int revents;
cd9ba1eb PB	391
	392	revents = node->pfd.revents & node->pfd.events;
	393	node->pfd.revents = 0;
	394
d0c8d2c0 SH	395	if (!node->deleted &&
d0c8d2c0 SH	396	(revents & (G_IO_IN \| G_IO_HUP \| G_IO_ERR)) &&
37989ced	397	aio_node_check(ctx, node->is_external) &&
d0c8d2c0	398	node->io_read) {
cd9ba1eb	399	node->io_read(node->opaque);
164a101f SH	400
	401	/* aio_notify() does not count as progress */
	402	if (node->opaque != &ctx->notifier) {
	403	progress = true;
	404	}
cd9ba1eb	405	}
d0c8d2c0 SH	406	if (!node->deleted &&
d0c8d2c0 SH	407	(revents & (G_IO_OUT \| G_IO_ERR)) &&
37989ced	408	aio_node_check(ctx, node->is_external) &&
d0c8d2c0	409	node->io_write) {
cd9ba1eb PB	410	node->io_write(node->opaque);
	411	progress = true;
	412	}
	413
abf90d39	414	if (node->deleted) {
2bbf11d7	415	if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
abf90d39 PB	416	QLIST_REMOVE(node, node);
abf90d39 PB	417	g_free(node);
2bbf11d7	418	qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
abf90d39	419	}
cd9ba1eb PB	420	}
cd9ba1eb PB	421	}
438e1f47	422
56d2c3c6 PB	423	return progress;
	424	}
	425
a153bf52	426	void aio_dispatch(AioContext *ctx)
56d2c3c6	427	{
a153bf52	428	qemu_lockcnt_inc(&ctx->list_lock);
bd451435	429	aio_bh_poll(ctx);
a153bf52 PB	430	aio_dispatch_handlers(ctx);
a153bf52 PB	431	qemu_lockcnt_dec(&ctx->list_lock);
438e1f47	432
a153bf52	433	timerlistgroup_run_timers(&ctx->tlg);
d0c8d2c0 SH	434	}
d0c8d2c0 SH	435
e98ab097 PB	436	/* These thread-local variables are used only in a small part of aio_poll
	437	* around the call to the poll() system call. In particular they are not
	438	* used while aio_poll is performing callbacks, which makes it much easier
	439	* to think about reentrancy!
	440	*
	441	* Stack-allocated arrays would be perfect but they have size limitations;
	442	* heap allocation is expensive enough that we want to reuse arrays across
	443	* calls to aio_poll(). And because poll() has to be called without holding
	444	* any lock, the arrays cannot be stored in AioContext. Thread-local data
	445	* has none of the disadvantages of these three options.
	446	*/
	447	static __thread GPollFD *pollfds;
	448	static __thread AioHandler **nodes;
	449	static __thread unsigned npfd, nalloc;
	450	static __thread Notifier pollfds_cleanup_notifier;
	451
	452	static void pollfds_cleanup(Notifier n, void unused)
	453	{
	454	g_assert(npfd == 0);
	455	g_free(pollfds);
	456	g_free(nodes);
	457	nalloc = 0;
	458	}
	459
	460	static void add_pollfd(AioHandler *node)
	461	{
	462	if (npfd == nalloc) {
	463	if (nalloc == 0) {
	464	pollfds_cleanup_notifier.notify = pollfds_cleanup;
	465	qemu_thread_atexit_add(&pollfds_cleanup_notifier);
	466	nalloc = 8;
	467	} else {
	468	g_assert(nalloc <= INT_MAX);
	469	nalloc *= 2;
	470	}
	471	pollfds = g_renew(GPollFD, pollfds, nalloc);
	472	nodes = g_renew(AioHandler *, nodes, nalloc);
	473	}
	474	nodes[npfd] = node;
	475	pollfds[npfd] = (GPollFD) {
	476	.fd = node->pfd.fd,
	477	.events = node->pfd.events,
	478	};
	479	npfd++;
	480	}
	481
684e508c SH	482	static bool run_poll_handlers_once(AioContext *ctx)
	483	{
	484	bool progress = false;
	485	AioHandler *node;
	486
2bbf11d7	487	QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
684e508c	488	if (!node->deleted && node->io_poll &&
59c9f437 SH	489	aio_node_check(ctx, node->is_external) &&
59c9f437 SH	490	node->io_poll(node->opaque)) {
684e508c SH	491	progress = true;
	492	}
	493
	494	/* Caller handles freeing deleted nodes. Don't do it here. */
	495	}
	496
	497	return progress;
	498	}
	499
4a1cba38 SH	500	/* run_poll_handlers:
	501	* @ctx: the AioContext
	502	* @max_ns: maximum time to poll for, in nanoseconds
	503	*
	504	* Polls for a given time.
	505	*
	506	* Note that ctx->notify_me must be non-zero so this function can detect
	507	* aio_notify().
	508	*
2bbf11d7	509	* Note that the caller must have incremented ctx->list_lock.
4a1cba38 SH	510	*
	511	* Returns: true if progress was made, false otherwise
	512	*/
	513	static bool run_poll_handlers(AioContext *ctx, int64_t max_ns)
	514	{
684e508c	515	bool progress;
4a1cba38 SH	516	int64_t end_time;
	517
	518	assert(ctx->notify_me);
2bbf11d7	519	assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
4a1cba38 SH	520	assert(ctx->poll_disable_cnt == 0);
	521
	522	trace_run_poll_handlers_begin(ctx, max_ns);
	523
	524	end_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + max_ns;
	525
	526	do {
684e508c	527	progress = run_poll_handlers_once(ctx);
4a1cba38 SH	528	} while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time);
	529
	530	trace_run_poll_handlers_end(ctx, progress);
	531
	532	return progress;
	533	}
	534
	535	/* try_poll_mode:
	536	* @ctx: the AioContext
684e508c	537	* @blocking: busy polling is only attempted when blocking is true
4a1cba38	538	*
684e508c	539	* ctx->notify_me must be non-zero so this function can detect aio_notify().
4a1cba38	540	*
2bbf11d7	541	* Note that the caller must have incremented ctx->list_lock.
4a1cba38 SH	542	*
	543	* Returns: true if progress was made, false otherwise
	544	*/
	545	static bool try_poll_mode(AioContext *ctx, bool blocking)
	546	{
	547	if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) {
	548	/* See qemu_soonest_timeout() uint64_t hack */
	549	int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx),
82a41186	550	(uint64_t)ctx->poll_ns);
4a1cba38 SH	551
4a1cba38 SH	552	if (max_ns) {
684e508c SH	553	poll_set_started(ctx, true);
684e508c SH	554
4a1cba38 SH	555	if (run_poll_handlers(ctx, max_ns)) {
	556	return true;
	557	}
	558	}
	559	}
	560
684e508c SH	561	poll_set_started(ctx, false);
	562
	563	/* Even if we don't run busy polling, try polling once in case it can make
	564	* progress and the caller will be able to avoid ppoll(2)/epoll_wait(2).
	565	*/
	566	return run_poll_handlers_once(ctx);
4a1cba38 SH	567	}
4a1cba38 SH	568
d0c8d2c0 SH	569	bool aio_poll(AioContext *ctx, bool blocking)
d0c8d2c0 SH	570	{
d0c8d2c0	571	AioHandler *node;
4a1cba38 SH	572	int i;
4a1cba38 SH	573	int ret = 0;
164a101f	574	bool progress;
e98ab097	575	int64_t timeout;
82a41186	576	int64_t start = 0;
d0c8d2c0	577
0ceb849b PB	578	/* aio_notify can avoid the expensive event_notifier_set if
0ceb849b PB	579	* everything (file descriptors, bottom halves, timers) will
e4c7e2d1 PB	580	* be re-evaluated before the next blocking poll(). This is
e4c7e2d1 PB	581	* already true when aio_poll is called with blocking == false;
eabc9779 PB	582	* if blocking == true, it is only true after poll() returns,
eabc9779 PB	583	* so disable the optimization now.
0ceb849b	584	*/
eabc9779 PB	585	if (blocking) {
	586	atomic_add(&ctx->notify_me, 2);
	587	}
0ceb849b	588
2bbf11d7	589	qemu_lockcnt_inc(&ctx->list_lock);
a76bab49	590
82a41186 SH	591	if (ctx->poll_max_ns) {
	592	start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
	593	}
	594
0836c72f	595	progress = try_poll_mode(ctx, blocking);
0836c72f	596	if (!progress) {
4a1cba38	597	assert(npfd == 0);
a76bab49	598
4a1cba38	599	/* fill pollfds */
6b942468	600
4a1cba38	601	if (!aio_epoll_enabled(ctx)) {
2bbf11d7	602	QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
4a1cba38 SH	603	if (!node->deleted && node->pfd.events
	604	&& aio_node_check(ctx, node->is_external)) {
	605	add_pollfd(node);
	606	}
6b942468	607	}
9eb0bfca	608	}
a76bab49	609
4a1cba38	610	timeout = blocking ? aio_compute_timeout(ctx) : 0;
a76bab49	611
4a1cba38	612	/* wait until next event */
4a1cba38 SH	613	if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
	614	AioHandler epoll_handler;
	615
	616	epoll_handler.pfd.fd = ctx->epollfd;
	617	epoll_handler.pfd.events = G_IO_IN \| G_IO_OUT \| G_IO_HUP \| G_IO_ERR;
	618	npfd = 0;
	619	add_pollfd(&epoll_handler);
	620	ret = aio_epoll(ctx, pollfds, npfd, timeout);
	621	} else {
	622	ret = qemu_poll_ns(pollfds, npfd, timeout);
	623	}
fbe3fc5c	624	}
4a1cba38	625
eabc9779 PB	626	if (blocking) {
	627	atomic_sub(&ctx->notify_me, 2);
	628	}
9eb0bfca	629
82a41186 SH	630	/* Adjust polling time */
	631	if (ctx->poll_max_ns) {
	632	int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
	633
	634	if (block_ns <= ctx->poll_ns) {
	635	/* This is the sweet spot, no adjustment needed */
	636	} else if (block_ns > ctx->poll_max_ns) {
	637	/* We'd have to poll for too long, poll less */
	638	int64_t old = ctx->poll_ns;
	639
	640	if (ctx->poll_shrink) {
	641	ctx->poll_ns /= ctx->poll_shrink;
	642	} else {
	643	ctx->poll_ns = 0;
	644	}
	645
	646	trace_poll_shrink(ctx, old, ctx->poll_ns);
	647	} else if (ctx->poll_ns < ctx->poll_max_ns &&
	648	block_ns < ctx->poll_max_ns) {
	649	/* There is room to grow, poll longer */
	650	int64_t old = ctx->poll_ns;
	651	int64_t grow = ctx->poll_grow;
	652
	653	if (grow == 0) {
	654	grow = 2;
	655	}
	656
	657	if (ctx->poll_ns) {
	658	ctx->poll_ns *= grow;
	659	} else {
	660	ctx->poll_ns = 4000; /* start polling at 4 microseconds */
	661	}
	662
	663	if (ctx->poll_ns > ctx->poll_max_ns) {
	664	ctx->poll_ns = ctx->poll_max_ns;
	665	}
	666
	667	trace_poll_grow(ctx, old, ctx->poll_ns);
	668	}
	669	}
	670
05e514b1	671	aio_notify_accept(ctx);
21a03d17	672
9eb0bfca PB	673	/* if we have any readable fds, dispatch event */
9eb0bfca PB	674	if (ret > 0) {
e98ab097 PB	675	for (i = 0; i < npfd; i++) {
e98ab097 PB	676	nodes[i]->pfd.revents = pollfds[i].revents;
a76bab49	677	}
438e1f47 AB	678	}
438e1f47 AB	679
e98ab097	680	npfd = 0;
e98ab097	681
a153bf52 PB	682	progress \|= aio_bh_poll(ctx);
	683
	684	if (ret > 0) {
a153bf52	685	progress \|= aio_dispatch_handlers(ctx);
9eb0bfca	686	}
bcdc1857	687
bd451435 PB	688	qemu_lockcnt_dec(&ctx->list_lock);
bd451435 PB	689
a153bf52 PB	690	progress \|= timerlistgroup_run_timers(&ctx->tlg);
a153bf52 PB	691
164a101f	692	return progress;
a76bab49	693	}
37fcee5d	694
7e003465	695	void aio_context_setup(AioContext *ctx)
37fcee5d	696	{
4a1cba38 SH	697	/* TODO remove this in final patch submission */
	698	if (getenv("QEMU_AIO_POLL_MAX_NS")) {
	699	fprintf(stderr, "The QEMU_AIO_POLL_MAX_NS environment variable has "
	700	"been replaced with -object iothread,poll-max-ns=NUM\n");
	701	exit(1);
	702	}
	703
147dfab7	704	#ifdef CONFIG_EPOLL_CREATE1
fbe3fc5c FZ	705	assert(!ctx->epollfd);
	706	ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
	707	if (ctx->epollfd == -1) {
7e003465	708	fprintf(stderr, "Failed to create epoll instance: %s", strerror(errno));
fbe3fc5c FZ	709	ctx->epoll_available = false;
	710	} else {
	711	ctx->epoll_available = true;
	712	}
	713	#endif
37fcee5d	714	}
4a1cba38	715
82a41186 SH	716	void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
82a41186 SH	717	int64_t grow, int64_t shrink, Error **errp)
4a1cba38	718	{
82a41186 SH	719	/* No thread synchronization here, it doesn't matter if an incorrect value
82a41186 SH	720	* is used once.
4a1cba38 SH	721	*/
4a1cba38 SH	722	ctx->poll_max_ns = max_ns;
82a41186 SH	723	ctx->poll_ns = 0;
	724	ctx->poll_grow = grow;
	725	ctx->poll_shrink = shrink;
4a1cba38 SH	726
	727	aio_notify(ctx);
	728	}