async.c

   1 /*
   2  * QEMU System Emulator
   3  *
   4  * Copyright (c) 2003-2008 Fabrice Bellard
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to deal
   8  * in the Software without restriction, including without limitation the rights
   9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10  * copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22  * THE SOFTWARE.
  23  */
  24
  25 #include "qemu/osdep.h"
  26 #include "qapi/error.h"
  27 #include "qemu-common.h"
  28 #include "block/aio.h"
  29 #include "block/thread-pool.h"
  30 #include "qemu/main-loop.h"
  31 #include "qemu/atomic.h"
  32 #include "block/raw-aio.h"
  33
  34 /***********************************************************/
  35 /* bottom halves (can be seen as timers which expire ASAP) */
  36
  37 struct QEMUBH {
  38     AioContext *ctx;
  39     QEMUBHFunc *cb;
  40     void *opaque;
  41     QEMUBH *next;
  42     bool scheduled;
  43     bool idle;
  44     bool deleted;
  45 };
  46
  47 void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
  48 {
  49     QEMUBH *bh;
  50     bh = g_new(QEMUBH, 1);
  51     *bh = (QEMUBH){
  52         .ctx = ctx,
  53         .cb = cb,
  54         .opaque = opaque,
  55     };
  56     qemu_lockcnt_lock(&ctx->list_lock);
  57     bh->next = ctx->first_bh;
  58     bh->scheduled = 1;
  59     bh->deleted = 1;
  60     /* Make sure that the members are ready before putting bh into list */
  61     smp_wmb();
  62     ctx->first_bh = bh;
  63     qemu_lockcnt_unlock(&ctx->list_lock);
  64     aio_notify(ctx);
  65 }
  66
  67 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
  68 {
  69     QEMUBH *bh;
  70     bh = g_new(QEMUBH, 1);
  71     *bh = (QEMUBH){
  72         .ctx = ctx,
  73         .cb = cb,
  74         .opaque = opaque,
  75     };
  76     qemu_lockcnt_lock(&ctx->list_lock);
  77     bh->next = ctx->first_bh;
  78     /* Make sure that the members are ready before putting bh into list */
  79     smp_wmb();
  80     ctx->first_bh = bh;
  81     qemu_lockcnt_unlock(&ctx->list_lock);
  82     return bh;
  83 }
  84
  85 void aio_bh_call(QEMUBH *bh)
  86 {
  87     bh->cb(bh->opaque);
  88 }
  89
  90 /* Multiple occurrences of aio_bh_poll cannot be called concurrently */
  91 int aio_bh_poll(AioContext *ctx)
  92 {
  93     QEMUBH *bh, **bhp, *next;
  94     int ret;
  95     bool deleted = false;
  96
  97     qemu_lockcnt_inc(&ctx->list_lock);
  98
  99     ret = 0;
 100     for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) {
 101         next = atomic_rcu_read(&bh->next);
 102         /* The atomic_xchg is paired with the one in qemu_bh_schedule.  The
 103          * implicit memory barrier ensures that the callback sees all writes
 104          * done by the scheduling thread.  It also ensures that the scheduling
 105          * thread sees the zero before bh->cb has run, and thus will call
 106          * aio_notify again if necessary.
 107          */
 108         if (atomic_xchg(&bh->scheduled, 0)) {
 109             /* Idle BHs don't count as progress */
 110             if (!bh->idle) {
 111                 ret = 1;
 112             }
 113             bh->idle = 0;
 114             aio_bh_call(bh);
 115         }
 116         if (bh->deleted) {
 117             deleted = true;
 118         }
 119     }
 120
 121     /* remove deleted bhs */
 122     if (!deleted) {
 123         qemu_lockcnt_dec(&ctx->list_lock);
 124         return ret;
 125     }
 126
 127     if (qemu_lockcnt_dec_and_lock(&ctx->list_lock)) {
 128         bhp = &ctx->first_bh;
 129         while (*bhp) {
 130             bh = *bhp;
 131             if (bh->deleted && !bh->scheduled) {
 132                 *bhp = bh->next;
 133                 g_free(bh);
 134             } else {
 135                 bhp = &bh->next;
 136             }
 137         }
 138         qemu_lockcnt_unlock(&ctx->list_lock);
 139     }
 140     return ret;
 141 }
 142
 143 void qemu_bh_schedule_idle(QEMUBH *bh)
 144 {
 145     bh->idle = 1;
 146     /* Make sure that idle & any writes needed by the callback are done
 147      * before the locations are read in the aio_bh_poll.
 148      */
 149     atomic_mb_set(&bh->scheduled, 1);
 150 }
 151
 152 void qemu_bh_schedule(QEMUBH *bh)
 153 {
 154     AioContext *ctx;
 155
 156     ctx = bh->ctx;
 157     bh->idle = 0;
 158     /* The memory barrier implicit in atomic_xchg makes sure that:
 159      * 1. idle & any writes needed by the callback are done before the
 160      *    locations are read in the aio_bh_poll.
 161      * 2. ctx is loaded before scheduled is set and the callback has a chance
 162      *    to execute.
 163      */
 164     if (atomic_xchg(&bh->scheduled, 1) == 0) {
 165         aio_notify(ctx);
 166     }
 167 }
 168
 169
 170 /* This func is async.
 171  */
 172 void qemu_bh_cancel(QEMUBH *bh)
 173 {
 174     bh->scheduled = 0;
 175 }
 176
 177 /* This func is async.The bottom half will do the delete action at the finial
 178  * end.
 179  */
 180 void qemu_bh_delete(QEMUBH *bh)
 181 {
 182     bh->scheduled = 0;
 183     bh->deleted = 1;
 184 }
 185
 186 int64_t
 187 aio_compute_timeout(AioContext *ctx)
 188 {
 189     int64_t deadline;
 190     int timeout = -1;
 191     QEMUBH *bh;
 192
 193     for (bh = atomic_rcu_read(&ctx->first_bh); bh;
 194          bh = atomic_rcu_read(&bh->next)) {
 195         if (bh->scheduled) {
 196             if (bh->idle) {
 197                 /* idle bottom halves will be polled at least
 198                  * every 10ms */
 199                 timeout = 10000000;
 200             } else {
 201                 /* non-idle bottom halves will be executed
 202                  * immediately */
 203                 return 0;
 204             }
 205         }
 206     }
 207
 208     deadline = timerlistgroup_deadline_ns(&ctx->tlg);
 209     if (deadline == 0) {
 210         return 0;
 211     } else {
 212         return qemu_soonest_timeout(timeout, deadline);
 213     }
 214 }
 215
 216 static gboolean
 217 aio_ctx_prepare(GSource *source, gint    *timeout)
 218 {
 219     AioContext *ctx = (AioContext *) source;
 220
 221     atomic_or(&ctx->notify_me, 1);
 222
 223     /* We assume there is no timeout already supplied */
 224     *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
 225
 226     if (aio_prepare(ctx)) {
 227         *timeout = 0;
 228     }
 229
 230     return *timeout == 0;
 231 }
 232
 233 static gboolean
 234 aio_ctx_check(GSource *source)
 235 {
 236     AioContext *ctx = (AioContext *) source;
 237     QEMUBH *bh;
 238
 239     atomic_and(&ctx->notify_me, ~1);
 240     aio_notify_accept(ctx);
 241
 242     for (bh = ctx->first_bh; bh; bh = bh->next) {
 243         if (bh->scheduled) {
 244             return true;
 245         }
 246     }
 247     return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
 248 }
 249
 250 static gboolean
 251 aio_ctx_dispatch(GSource     *source,
 252                  GSourceFunc  callback,
 253                  gpointer     user_data)
 254 {
 255     AioContext *ctx = (AioContext *) source;
 256
 257     assert(callback == NULL);
 258     aio_dispatch(ctx, true);
 259     return true;
 260 }
 261
 262 static void
 263 aio_ctx_finalize(GSource     *source)
 264 {
 265     AioContext *ctx = (AioContext *) source;
 266
 267     thread_pool_free(ctx->thread_pool);
 268
 269 #ifdef CONFIG_LINUX_AIO
 270     if (ctx->linux_aio) {
 271         laio_detach_aio_context(ctx->linux_aio, ctx);
 272         laio_cleanup(ctx->linux_aio);
 273         ctx->linux_aio = NULL;
 274     }
 275 #endif
 276
 277     qemu_lockcnt_lock(&ctx->list_lock);
 278     assert(!qemu_lockcnt_count(&ctx->list_lock));
 279     while (ctx->first_bh) {
 280         QEMUBH *next = ctx->first_bh->next;
 281
 282         /* qemu_bh_delete() must have been called on BHs in this AioContext */
 283         assert(ctx->first_bh->deleted);
 284
 285         g_free(ctx->first_bh);
 286         ctx->first_bh = next;
 287     }
 288     qemu_lockcnt_unlock(&ctx->list_lock);
 289
 290     aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL);
 291     event_notifier_cleanup(&ctx->notifier);
 292     qemu_rec_mutex_destroy(&ctx->lock);
 293     qemu_lockcnt_destroy(&ctx->list_lock);
 294     timerlistgroup_deinit(&ctx->tlg);
 295 }
 296
 297 static GSourceFuncs aio_source_funcs = {
 298     aio_ctx_prepare,
 299     aio_ctx_check,
 300     aio_ctx_dispatch,
 301     aio_ctx_finalize
 302 };
 303
 304 GSource *aio_get_g_source(AioContext *ctx)
 305 {
 306     g_source_ref(&ctx->source);
 307     return &ctx->source;
 308 }
 309
 310 ThreadPool *aio_get_thread_pool(AioContext *ctx)
 311 {
 312     if (!ctx->thread_pool) {
 313         ctx->thread_pool = thread_pool_new(ctx);
 314     }
 315     return ctx->thread_pool;
 316 }
 317
 318 #ifdef CONFIG_LINUX_AIO
 319 LinuxAioState *aio_get_linux_aio(AioContext *ctx)
 320 {
 321     if (!ctx->linux_aio) {
 322         ctx->linux_aio = laio_init();
 323         laio_attach_aio_context(ctx->linux_aio, ctx);
 324     }
 325     return ctx->linux_aio;
 326 }
 327 #endif
 328
 329 void aio_notify(AioContext *ctx)
 330 {
 331     /* Write e.g. bh->scheduled before reading ctx->notify_me.  Pairs
 332      * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
 333      */
 334     smp_mb();
 335     if (ctx->notify_me) {
 336         event_notifier_set(&ctx->notifier);
 337         atomic_mb_set(&ctx->notified, true);
 338     }
 339 }
 340
 341 void aio_notify_accept(AioContext *ctx)
 342 {
 343     if (atomic_xchg(&ctx->notified, false)) {
 344         event_notifier_test_and_clear(&ctx->notifier);
 345     }
 346 }
 347
 348 static void aio_timerlist_notify(void *opaque)
 349 {
 350     aio_notify(opaque);
 351 }
 352
 353 static void event_notifier_dummy_cb(EventNotifier *e)
 354 {
 355 }
 356
 357 /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
 358 static bool event_notifier_poll(void *opaque)
 359 {
 360     EventNotifier *e = opaque;
 361     AioContext *ctx = container_of(e, AioContext, notifier);
 362
 363     return atomic_read(&ctx->notified);
 364 }
 365
 366 AioContext *aio_context_new(Error **errp)
 367 {
 368     int ret;
 369     AioContext *ctx;
 370
 371     ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
 372     aio_context_setup(ctx);
 373
 374     ret = event_notifier_init(&ctx->notifier, false);
 375     if (ret < 0) {
 376         error_setg_errno(errp, -ret, "Failed to initialize event notifier");
 377         goto fail;
 378     }
 379     g_source_set_can_recurse(&ctx->source, true);
 380     qemu_lockcnt_init(&ctx->list_lock);
 381     aio_set_event_notifier(ctx, &ctx->notifier,
 382                            false,
 383                            (EventNotifierHandler *)
 384                            event_notifier_dummy_cb,
 385                            event_notifier_poll);
 386 #ifdef CONFIG_LINUX_AIO
 387     ctx->linux_aio = NULL;
 388 #endif
 389     ctx->thread_pool = NULL;
 390     qemu_rec_mutex_init(&ctx->lock);
 391     timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
 392
 393     ctx->poll_ns = 0;
 394     ctx->poll_max_ns = 0;
 395     ctx->poll_grow = 0;
 396     ctx->poll_shrink = 0;
 397
 398     return ctx;
 399 fail:
 400     g_source_destroy(&ctx->source);
 401     return NULL;
 402 }
 403
 404 void aio_context_ref(AioContext *ctx)
 405 {
 406     g_source_ref(&ctx->source);
 407 }
 408
 409 void aio_context_unref(AioContext *ctx)
 410 {
 411     g_source_unref(&ctx->source);
 412 }
 413
 414 void aio_context_acquire(AioContext *ctx)
 415 {
 416     qemu_rec_mutex_lock(&ctx->lock);
 417 }
 418
 419 void aio_context_release(AioContext *ctx)
 420 {
 421     qemu_rec_mutex_unlock(&ctx->lock);
 422 }