migration/postcopy-ram.c

   1 /*
   2  * Postcopy migration for RAM
   3  *
   4  * Copyright 2013-2015 Red Hat, Inc. and/or its affiliates
   5  *
   6  * Authors:
   7  *  Dave Gilbert  <dgilbert@redhat.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10  * See the COPYING file in the top-level directory.
  11  *
  12  */
  13
  14 /*
  15  * Postcopy is a migration technique where the execution flips from the
  16  * source to the destination before all the data has been copied.
  17  */
  18
  19 #include "qemu/osdep.h"
  20
  21 #include "qemu-common.h"
  22 #include "exec/target_page.h"
  23 #include "migration/migration.h"
  24 #include "migration/qemu-file.h"
  25 #include "savevm.h"
  26 #include "postcopy-ram.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/balloon.h"
  29 #include "qemu/error-report.h"
  30 #include "trace.h"
  31
  32 /* Arbitrary limit on size of each discard command,
  33  * keeps them around ~200 bytes
  34  */
  35 #define MAX_DISCARDS_PER_COMMAND 12
  36
  37 struct PostcopyDiscardState {
  38     const char *ramblock_name;
  39     uint16_t cur_entry;
  40     /*
  41      * Start and length of a discard range (bytes)
  42      */
  43     uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
  44     uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
  45     unsigned int nsentwords;
  46     unsigned int nsentcmds;
  47 };
  48
  49 /* Postcopy needs to detect accesses to pages that haven't yet been copied
  50  * across, and efficiently map new pages in, the techniques for doing this
  51  * are target OS specific.
  52  */
  53 #if defined(__linux__)
  54
  55 #include <poll.h>
  56 #include <sys/ioctl.h>
  57 #include <sys/syscall.h>
  58 #include <asm/types.h> /* for __u64 */
  59 #endif
  60
  61 #if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
  62 #include <sys/eventfd.h>
  63 #include <linux/userfaultfd.h>
  64
  65 static bool ufd_version_check(int ufd)
  66 {
  67     struct uffdio_api api_struct;
  68     uint64_t ioctl_mask;
  69
  70     api_struct.api = UFFD_API;
  71     api_struct.features = 0;
  72     if (ioctl(ufd, UFFDIO_API, &api_struct)) {
  73         error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s",
  74                      strerror(errno));
  75         return false;
  76     }
  77
  78     ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
  79                  (__u64)1 << _UFFDIO_UNREGISTER;
  80     if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
  81         error_report("Missing userfault features: %" PRIx64,
  82                      (uint64_t)(~api_struct.ioctls & ioctl_mask));
  83         return false;
  84     }
  85
  86     if (getpagesize() != ram_pagesize_summary()) {
  87         bool have_hp = false;
  88         /* We've got a huge page */
  89 #ifdef UFFD_FEATURE_MISSING_HUGETLBFS
  90         have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS;
  91 #endif
  92         if (!have_hp) {
  93             error_report("Userfault on this host does not support huge pages");
  94             return false;
  95         }
  96     }
  97     return true;
  98 }
  99
 100 /* Callback from postcopy_ram_supported_by_host block iterator.
 101  */
 102 static int test_ramblock_postcopiable(const char *block_name, void *host_addr,
 103                              ram_addr_t offset, ram_addr_t length, void *opaque)
 104 {
 105     RAMBlock *rb = qemu_ram_block_by_name(block_name);
 106     size_t pagesize = qemu_ram_pagesize(rb);
 107
 108     if (qemu_ram_is_shared(rb)) {
 109         error_report("Postcopy on shared RAM (%s) is not yet supported",
 110                      block_name);
 111         return 1;
 112     }
 113
 114     if (length % pagesize) {
 115         error_report("Postcopy requires RAM blocks to be a page size multiple,"
 116                      " block %s is 0x" RAM_ADDR_FMT " bytes with a "
 117                      "page size of 0x%zx", block_name, length, pagesize);
 118         return 1;
 119     }
 120     return 0;
 121 }
 122
 123 /*
 124  * Note: This has the side effect of munlock'ing all of RAM, that's
 125  * normally fine since if the postcopy succeeds it gets turned back on at the
 126  * end.
 127  */
 128 bool postcopy_ram_supported_by_host(void)
 129 {
 130     long pagesize = getpagesize();
 131     int ufd = -1;
 132     bool ret = false; /* Error unless we change it */
 133     void *testarea = NULL;
 134     struct uffdio_register reg_struct;
 135     struct uffdio_range range_struct;
 136     uint64_t feature_mask;
 137
 138     if (qemu_target_page_size() > pagesize) {
 139         error_report("Target page size bigger than host page size");
 140         goto out;
 141     }
 142
 143     ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
 144     if (ufd == -1) {
 145         error_report("%s: userfaultfd not available: %s", __func__,
 146                      strerror(errno));
 147         goto out;
 148     }
 149
 150     /* Version and features check */
 151     if (!ufd_version_check(ufd)) {
 152         goto out;
 153     }
 154
 155     /* We don't support postcopy with shared RAM yet */
 156     if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) {
 157         goto out;
 158     }
 159
 160     /*
 161      * userfault and mlock don't go together; we'll put it back later if
 162      * it was enabled.
 163      */
 164     if (munlockall()) {
 165         error_report("%s: munlockall: %s", __func__,  strerror(errno));
 166         return -1;
 167     }
 168
 169     /*
 170      *  We need to check that the ops we need are supported on anon memory
 171      *  To do that we need to register a chunk and see the flags that
 172      *  are returned.
 173      */
 174     testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
 175                                     MAP_ANONYMOUS, -1, 0);
 176     if (testarea == MAP_FAILED) {
 177         error_report("%s: Failed to map test area: %s", __func__,
 178                      strerror(errno));
 179         goto out;
 180     }
 181     g_assert(((size_t)testarea & (pagesize-1)) == 0);
 182
 183     reg_struct.range.start = (uintptr_t)testarea;
 184     reg_struct.range.len = pagesize;
 185     reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 186
 187     if (ioctl(ufd, UFFDIO_REGISTER, &reg_struct)) {
 188         error_report("%s userfault register: %s", __func__, strerror(errno));
 189         goto out;
 190     }
 191
 192     range_struct.start = (uintptr_t)testarea;
 193     range_struct.len = pagesize;
 194     if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
 195         error_report("%s userfault unregister: %s", __func__, strerror(errno));
 196         goto out;
 197     }
 198
 199     feature_mask = (__u64)1 << _UFFDIO_WAKE |
 200                    (__u64)1 << _UFFDIO_COPY |
 201                    (__u64)1 << _UFFDIO_ZEROPAGE;
 202     if ((reg_struct.ioctls & feature_mask) != feature_mask) {
 203         error_report("Missing userfault map features: %" PRIx64,
 204                      (uint64_t)(~reg_struct.ioctls & feature_mask));
 205         goto out;
 206     }
 207
 208     /* Success! */
 209     ret = true;
 210 out:
 211     if (testarea) {
 212         munmap(testarea, pagesize);
 213     }
 214     if (ufd != -1) {
 215         close(ufd);
 216     }
 217     return ret;
 218 }
 219
 220 /*
 221  * Setup an area of RAM so that it *can* be used for postcopy later; this
 222  * must be done right at the start prior to pre-copy.
 223  * opaque should be the MIS.
 224  */
 225 static int init_range(const char *block_name, void *host_addr,
 226                       ram_addr_t offset, ram_addr_t length, void *opaque)
 227 {
 228     trace_postcopy_init_range(block_name, host_addr, offset, length);
 229
 230     /*
 231      * We need the whole of RAM to be truly empty for postcopy, so things
 232      * like ROMs and any data tables built during init must be zero'd
 233      * - we're going to get the copy from the source anyway.
 234      * (Precopy will just overwrite this data, so doesn't need the discard)
 235      */
 236     if (ram_discard_range(block_name, 0, length)) {
 237         return -1;
 238     }
 239
 240     return 0;
 241 }
 242
 243 /*
 244  * At the end of migration, undo the effects of init_range
 245  * opaque should be the MIS.
 246  */
 247 static int cleanup_range(const char *block_name, void *host_addr,
 248                         ram_addr_t offset, ram_addr_t length, void *opaque)
 249 {
 250     MigrationIncomingState *mis = opaque;
 251     struct uffdio_range range_struct;
 252     trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
 253
 254     /*
 255      * We turned off hugepage for the precopy stage with postcopy enabled
 256      * we can turn it back on now.
 257      */
 258     qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE);
 259
 260     /*
 261      * We can also turn off userfault now since we should have all the
 262      * pages.   It can be useful to leave it on to debug postcopy
 263      * if you're not sure it's always getting every page.
 264      */
 265     range_struct.start = (uintptr_t)host_addr;
 266     range_struct.len = length;
 267
 268     if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) {
 269         error_report("%s: userfault unregister %s", __func__, strerror(errno));
 270
 271         return -1;
 272     }
 273
 274     return 0;
 275 }
 276
 277 /*
 278  * Initialise postcopy-ram, setting the RAM to a state where we can go into
 279  * postcopy later; must be called prior to any precopy.
 280  * called from arch_init's similarly named ram_postcopy_incoming_init
 281  */
 282 int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 283 {
 284     if (qemu_ram_foreach_block(init_range, NULL)) {
 285         return -1;
 286     }
 287
 288     return 0;
 289 }
 290
 291 /*
 292  * At the end of a migration where postcopy_ram_incoming_init was called.
 293  */
 294 int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 295 {
 296     trace_postcopy_ram_incoming_cleanup_entry();
 297
 298     if (mis->have_fault_thread) {
 299         uint64_t tmp64;
 300
 301         if (qemu_ram_foreach_block(cleanup_range, mis)) {
 302             return -1;
 303         }
 304         /*
 305          * Tell the fault_thread to exit, it's an eventfd that should
 306          * currently be at 0, we're going to increment it to 1
 307          */
 308         tmp64 = 1;
 309         if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
 310             trace_postcopy_ram_incoming_cleanup_join();
 311             qemu_thread_join(&mis->fault_thread);
 312         } else {
 313             /* Not much we can do here, but may as well report it */
 314             error_report("%s: incrementing userfault_quit_fd: %s", __func__,
 315                          strerror(errno));
 316         }
 317         trace_postcopy_ram_incoming_cleanup_closeuf();
 318         close(mis->userfault_fd);
 319         close(mis->userfault_quit_fd);
 320         mis->have_fault_thread = false;
 321     }
 322
 323     qemu_balloon_inhibit(false);
 324
 325     if (enable_mlock) {
 326         if (os_mlock() < 0) {
 327             error_report("mlock: %s", strerror(errno));
 328             /*
 329              * It doesn't feel right to fail at this point, we have a valid
 330              * VM state.
 331              */
 332         }
 333     }
 334
 335     postcopy_state_set(POSTCOPY_INCOMING_END);
 336     migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
 337
 338     if (mis->postcopy_tmp_page) {
 339         munmap(mis->postcopy_tmp_page, mis->largest_page_size);
 340         mis->postcopy_tmp_page = NULL;
 341     }
 342     if (mis->postcopy_tmp_zero_page) {
 343         munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
 344         mis->postcopy_tmp_zero_page = NULL;
 345     }
 346     trace_postcopy_ram_incoming_cleanup_exit();
 347     return 0;
 348 }
 349
 350 /*
 351  * Disable huge pages on an area
 352  */
 353 static int nhp_range(const char *block_name, void *host_addr,
 354                     ram_addr_t offset, ram_addr_t length, void *opaque)
 355 {
 356     trace_postcopy_nhp_range(block_name, host_addr, offset, length);
 357
 358     /*
 359      * Before we do discards we need to ensure those discards really
 360      * do delete areas of the page, even if THP thinks a hugepage would
 361      * be a good idea, so force hugepages off.
 362      */
 363     qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE);
 364
 365     return 0;
 366 }
 367
 368 /*
 369  * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
 370  * however leaving it until after precopy means that most of the precopy
 371  * data is still THPd
 372  */
 373 int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 374 {
 375     if (qemu_ram_foreach_block(nhp_range, mis)) {
 376         return -1;
 377     }
 378
 379     postcopy_state_set(POSTCOPY_INCOMING_DISCARD);
 380
 381     return 0;
 382 }
 383
 384 /*
 385  * Mark the given area of RAM as requiring notification to unwritten areas
 386  * Used as a  callback on qemu_ram_foreach_block.
 387  *   host_addr: Base of area to mark
 388  *   offset: Offset in the whole ram arena
 389  *   length: Length of the section
 390  *   opaque: MigrationIncomingState pointer
 391  * Returns 0 on success
 392  */
 393 static int ram_block_enable_notify(const char *block_name, void *host_addr,
 394                                    ram_addr_t offset, ram_addr_t length,
 395                                    void *opaque)
 396 {
 397     MigrationIncomingState *mis = opaque;
 398     struct uffdio_register reg_struct;
 399
 400     reg_struct.range.start = (uintptr_t)host_addr;
 401     reg_struct.range.len = length;
 402     reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 403
 404     /* Now tell our userfault_fd that it's responsible for this area */
 405     if (ioctl(mis->userfault_fd, UFFDIO_REGISTER, &reg_struct)) {
 406         error_report("%s userfault register: %s", __func__, strerror(errno));
 407         return -1;
 408     }
 409     if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
 410         error_report("%s userfault: Region doesn't support COPY", __func__);
 411         return -1;
 412     }
 413
 414     return 0;
 415 }
 416
 417 /*
 418  * Handle faults detected by the USERFAULT markings
 419  */
 420 static void *postcopy_ram_fault_thread(void *opaque)
 421 {
 422     MigrationIncomingState *mis = opaque;
 423     struct uffd_msg msg;
 424     int ret;
 425     RAMBlock *rb = NULL;
 426     RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
 427
 428     trace_postcopy_ram_fault_thread_entry();
 429     qemu_sem_post(&mis->fault_thread_sem);
 430
 431     while (true) {
 432         ram_addr_t rb_offset;
 433         struct pollfd pfd[2];
 434
 435         /*
 436          * We're mainly waiting for the kernel to give us a faulting HVA,
 437          * however we can be told to quit via userfault_quit_fd which is
 438          * an eventfd
 439          */
 440         pfd[0].fd = mis->userfault_fd;
 441         pfd[0].events = POLLIN;
 442         pfd[0].revents = 0;
 443         pfd[1].fd = mis->userfault_quit_fd;
 444         pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
 445         pfd[1].revents = 0;
 446
 447         if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
 448             error_report("%s: userfault poll: %s", __func__, strerror(errno));
 449             break;
 450         }
 451
 452         if (pfd[1].revents) {
 453             trace_postcopy_ram_fault_thread_quit();
 454             break;
 455         }
 456
 457         ret = read(mis->userfault_fd, &msg, sizeof(msg));
 458         if (ret != sizeof(msg)) {
 459             if (errno == EAGAIN) {
 460                 /*
 461                  * if a wake up happens on the other thread just after
 462                  * the poll, there is nothing to read.
 463                  */
 464                 continue;
 465             }
 466             if (ret < 0) {
 467                 error_report("%s: Failed to read full userfault message: %s",
 468                              __func__, strerror(errno));
 469                 break;
 470             } else {
 471                 error_report("%s: Read %d bytes from userfaultfd expected %zd",
 472                              __func__, ret, sizeof(msg));
 473                 break; /* Lost alignment, don't know what we'd read next */
 474             }
 475         }
 476         if (msg.event != UFFD_EVENT_PAGEFAULT) {
 477             error_report("%s: Read unexpected event %ud from userfaultfd",
 478                          __func__, msg.event);
 479             continue; /* It's not a page fault, shouldn't happen */
 480         }
 481
 482         rb = qemu_ram_block_from_host(
 483                  (void *)(uintptr_t)msg.arg.pagefault.address,
 484                  true, &rb_offset);
 485         if (!rb) {
 486             error_report("postcopy_ram_fault_thread: Fault outside guest: %"
 487                          PRIx64, (uint64_t)msg.arg.pagefault.address);
 488             break;
 489         }
 490
 491         rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
 492         trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
 493                                                 qemu_ram_get_idstr(rb),
 494                                                 rb_offset);
 495
 496         /*
 497          * Send the request to the source - we want to request one
 498          * of our host page sizes (which is >= TPS)
 499          */
 500         if (rb != last_rb) {
 501             last_rb = rb;
 502             migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
 503                                      rb_offset, qemu_ram_pagesize(rb));
 504         } else {
 505             /* Save some space */
 506             migrate_send_rp_req_pages(mis, NULL,
 507                                      rb_offset, qemu_ram_pagesize(rb));
 508         }
 509     }
 510     trace_postcopy_ram_fault_thread_exit();
 511     return NULL;
 512 }
 513
 514 int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 515 {
 516     /* Open the fd for the kernel to give us userfaults */
 517     mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
 518     if (mis->userfault_fd == -1) {
 519         error_report("%s: Failed to open userfault fd: %s", __func__,
 520                      strerror(errno));
 521         return -1;
 522     }
 523
 524     /*
 525      * Although the host check already tested the API, we need to
 526      * do the check again as an ABI handshake on the new fd.
 527      */
 528     if (!ufd_version_check(mis->userfault_fd)) {
 529         return -1;
 530     }
 531
 532     /* Now an eventfd we use to tell the fault-thread to quit */
 533     mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC);
 534     if (mis->userfault_quit_fd == -1) {
 535         error_report("%s: Opening userfault_quit_fd: %s", __func__,
 536                      strerror(errno));
 537         close(mis->userfault_fd);
 538         return -1;
 539     }
 540
 541     qemu_sem_init(&mis->fault_thread_sem, 0);
 542     qemu_thread_create(&mis->fault_thread, "postcopy/fault",
 543                        postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
 544     qemu_sem_wait(&mis->fault_thread_sem);
 545     qemu_sem_destroy(&mis->fault_thread_sem);
 546     mis->have_fault_thread = true;
 547
 548     /* Mark so that we get notified of accesses to unwritten areas */
 549     if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
 550         return -1;
 551     }
 552
 553     /*
 554      * Ballooning can mark pages as absent while we're postcopying
 555      * that would cause false userfaults.
 556      */
 557     qemu_balloon_inhibit(true);
 558
 559     trace_postcopy_ram_enable_notify();
 560
 561     return 0;
 562 }
 563
 564 /*
 565  * Place a host page (from) at (host) atomically
 566  * returns 0 on success
 567  */
 568 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 569                         size_t pagesize)
 570 {
 571     struct uffdio_copy copy_struct;
 572
 573     copy_struct.dst = (uint64_t)(uintptr_t)host;
 574     copy_struct.src = (uint64_t)(uintptr_t)from;
 575     copy_struct.len = pagesize;
 576     copy_struct.mode = 0;
 577
 578     /* copy also acks to the kernel waking the stalled thread up
 579      * TODO: We can inhibit that ack and only do it if it was requested
 580      * which would be slightly cheaper, but we'd have to be careful
 581      * of the order of updating our page state.
 582      */
 583     if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
 584         int e = errno;
 585         error_report("%s: %s copy host: %p from: %p (size: %zd)",
 586                      __func__, strerror(e), host, from, pagesize);
 587
 588         return -e;
 589     }
 590
 591     trace_postcopy_place_page(host);
 592     return 0;
 593 }
 594
 595 /*
 596  * Place a zero page at (host) atomically
 597  * returns 0 on success
 598  */
 599 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
 600                              size_t pagesize)
 601 {
 602     trace_postcopy_place_page_zero(host);
 603
 604     if (pagesize == getpagesize()) {
 605         struct uffdio_zeropage zero_struct;
 606         zero_struct.range.start = (uint64_t)(uintptr_t)host;
 607         zero_struct.range.len = getpagesize();
 608         zero_struct.mode = 0;
 609
 610         if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
 611             int e = errno;
 612             error_report("%s: %s zero host: %p",
 613                          __func__, strerror(e), host);
 614
 615             return -e;
 616         }
 617     } else {
 618         /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
 619         if (!mis->postcopy_tmp_zero_page) {
 620             mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
 621                                                PROT_READ | PROT_WRITE,
 622                                                MAP_PRIVATE | MAP_ANONYMOUS,
 623                                                -1, 0);
 624             if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
 625                 int e = errno;
 626                 mis->postcopy_tmp_zero_page = NULL;
 627                 error_report("%s: %s mapping large zero page",
 628                              __func__, strerror(e));
 629                 return -e;
 630             }
 631             memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
 632         }
 633         return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
 634                                    pagesize);
 635     }
 636
 637     return 0;
 638 }
 639
 640 /*
 641  * Returns a target page of memory that can be mapped at a later point in time
 642  * using postcopy_place_page
 643  * The same address is used repeatedly, postcopy_place_page just takes the
 644  * backing page away.
 645  * Returns: Pointer to allocated page
 646  *
 647  */
 648 void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 649 {
 650     if (!mis->postcopy_tmp_page) {
 651         mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
 652                              PROT_READ | PROT_WRITE, MAP_PRIVATE |
 653                              MAP_ANONYMOUS, -1, 0);
 654         if (mis->postcopy_tmp_page == MAP_FAILED) {
 655             mis->postcopy_tmp_page = NULL;
 656             error_report("%s: %s", __func__, strerror(errno));
 657             return NULL;
 658         }
 659     }
 660
 661     return mis->postcopy_tmp_page;
 662 }
 663
 664 #else
 665 /* No target OS support, stubs just fail */
 666 bool postcopy_ram_supported_by_host(void)
 667 {
 668     error_report("%s: No OS support", __func__);
 669     return false;
 670 }
 671
 672 int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 673 {
 674     error_report("postcopy_ram_incoming_init: No OS support");
 675     return -1;
 676 }
 677
 678 int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 679 {
 680     assert(0);
 681     return -1;
 682 }
 683
 684 int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 685 {
 686     assert(0);
 687     return -1;
 688 }
 689
 690 int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 691 {
 692     assert(0);
 693     return -1;
 694 }
 695
 696 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 697                         size_t pagesize)
 698 {
 699     assert(0);
 700     return -1;
 701 }
 702
 703 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
 704                         size_t pagesize)
 705 {
 706     assert(0);
 707     return -1;
 708 }
 709
 710 void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 711 {
 712     assert(0);
 713     return NULL;
 714 }
 715
 716 #endif
 717
 718 /* ------------------------------------------------------------------------- */
 719
 720 /**
 721  * postcopy_discard_send_init: Called at the start of each RAMBlock before
 722  *   asking to discard individual ranges.
 723  *
 724  * @ms: The current migration state.
 725  * @offset: the bitmap offset of the named RAMBlock in the migration
 726  *   bitmap.
 727  * @name: RAMBlock that discards will operate on.
 728  *
 729  * returns: a new PDS.
 730  */
 731 PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
 732                                                  const char *name)
 733 {
 734     PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));
 735
 736     if (res) {
 737         res->ramblock_name = name;
 738     }
 739
 740     return res;
 741 }
 742
 743 /**
 744  * postcopy_discard_send_range: Called by the bitmap code for each chunk to
 745  *   discard. May send a discard message, may just leave it queued to
 746  *   be sent later.
 747  *
 748  * @ms: Current migration state.
 749  * @pds: Structure initialised by postcopy_discard_send_init().
 750  * @start,@length: a range of pages in the migration bitmap in the
 751  *   RAM block passed to postcopy_discard_send_init() (length=1 is one page)
 752  */
 753 void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
 754                                 unsigned long start, unsigned long length)
 755 {
 756     size_t tp_size = qemu_target_page_size();
 757     /* Convert to byte offsets within the RAM block */
 758     pds->start_list[pds->cur_entry] = start  * tp_size;
 759     pds->length_list[pds->cur_entry] = length * tp_size;
 760     trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
 761     pds->cur_entry++;
 762     pds->nsentwords++;
 763
 764     if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
 765         /* Full set, ship it! */
 766         qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
 767                                               pds->ramblock_name,
 768                                               pds->cur_entry,
 769                                               pds->start_list,
 770                                               pds->length_list);
 771         pds->nsentcmds++;
 772         pds->cur_entry = 0;
 773     }
 774 }
 775
 776 /**
 777  * postcopy_discard_send_finish: Called at the end of each RAMBlock by the
 778  * bitmap code. Sends any outstanding discard messages, frees the PDS
 779  *
 780  * @ms: Current migration state.
 781  * @pds: Structure initialised by postcopy_discard_send_init().
 782  */
 783 void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds)
 784 {
 785     /* Anything unsent? */
 786     if (pds->cur_entry) {
 787         qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
 788                                               pds->ramblock_name,
 789                                               pds->cur_entry,
 790                                               pds->start_list,
 791                                               pds->length_list);
 792         pds->nsentcmds++;
 793     }
 794
 795     trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords,
 796                                        pds->nsentcmds);
 797
 798     g_free(pds);
 799 }
 800
 801 /*
 802  * Current state of incoming postcopy; note this is not part of
 803  * MigrationIncomingState since it's state is used during cleanup
 804  * at the end as MIS is being freed.
 805  */
 806 static PostcopyState incoming_postcopy_state;
 807
 808 PostcopyState  postcopy_state_get(void)
 809 {
 810     return atomic_mb_read(&incoming_postcopy_state);
 811 }
 812
 813 /* Set the state and return the old state */
 814 PostcopyState postcopy_state_set(PostcopyState new_state)
 815 {
 816     return atomic_xchg(&incoming_postcopy_state, new_state);
 817 }