migration/postcopy-ram.c

   1 /*
   2  * Postcopy migration for RAM
   3  *
   4  * Copyright 2013-2015 Red Hat, Inc. and/or its affiliates
   5  *
   6  * Authors:
   7  *  Dave Gilbert  <dgilbert@redhat.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10  * See the COPYING file in the top-level directory.
  11  *
  12  */
  13
  14 /*
  15  * Postcopy is a migration technique where the execution flips from the
  16  * source to the destination before all the data has been copied.
  17  */
  18
  19 #include "qemu/osdep.h"
  20
  21 #include "qemu-common.h"
  22 #include "exec/target_page.h"
  23 #include "migration.h"
  24 #include "qemu-file.h"
  25 #include "savevm.h"
  26 #include "postcopy-ram.h"
  27 #include "ram.h"
  28 #include "sysemu/sysemu.h"
  29 #include "sysemu/balloon.h"
  30 #include "qemu/error-report.h"
  31 #include "trace.h"
  32
  33 /* Arbitrary limit on size of each discard command,
  34  * keeps them around ~200 bytes
  35  */
  36 #define MAX_DISCARDS_PER_COMMAND 12
  37
  38 struct PostcopyDiscardState {
  39     const char *ramblock_name;
  40     uint16_t cur_entry;
  41     /*
  42      * Start and length of a discard range (bytes)
  43      */
  44     uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
  45     uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
  46     unsigned int nsentwords;
  47     unsigned int nsentcmds;
  48 };
  49
  50 /* Postcopy needs to detect accesses to pages that haven't yet been copied
  51  * across, and efficiently map new pages in, the techniques for doing this
  52  * are target OS specific.
  53  */
  54 #if defined(__linux__)
  55
  56 #include <poll.h>
  57 #include <sys/ioctl.h>
  58 #include <sys/syscall.h>
  59 #include <asm/types.h> /* for __u64 */
  60 #endif
  61
  62 #if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
  63 #include <sys/eventfd.h>
  64 #include <linux/userfaultfd.h>
  65
  66 static bool ufd_version_check(int ufd)
  67 {
  68     struct uffdio_api api_struct;
  69     uint64_t ioctl_mask;
  70
  71     api_struct.api = UFFD_API;
  72     api_struct.features = 0;
  73     if (ioctl(ufd, UFFDIO_API, &api_struct)) {
  74         error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s",
  75                      strerror(errno));
  76         return false;
  77     }
  78
  79     ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
  80                  (__u64)1 << _UFFDIO_UNREGISTER;
  81     if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
  82         error_report("Missing userfault features: %" PRIx64,
  83                      (uint64_t)(~api_struct.ioctls & ioctl_mask));
  84         return false;
  85     }
  86
  87     if (getpagesize() != ram_pagesize_summary()) {
  88         bool have_hp = false;
  89         /* We've got a huge page */
  90 #ifdef UFFD_FEATURE_MISSING_HUGETLBFS
  91         have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS;
  92 #endif
  93         if (!have_hp) {
  94             error_report("Userfault on this host does not support huge pages");
  95             return false;
  96         }
  97     }
  98     return true;
  99 }
 100
 101 /* Callback from postcopy_ram_supported_by_host block iterator.
 102  */
 103 static int test_ramblock_postcopiable(const char *block_name, void *host_addr,
 104                              ram_addr_t offset, ram_addr_t length, void *opaque)
 105 {
 106     RAMBlock *rb = qemu_ram_block_by_name(block_name);
 107     size_t pagesize = qemu_ram_pagesize(rb);
 108
 109     if (qemu_ram_is_shared(rb)) {
 110         error_report("Postcopy on shared RAM (%s) is not yet supported",
 111                      block_name);
 112         return 1;
 113     }
 114
 115     if (length % pagesize) {
 116         error_report("Postcopy requires RAM blocks to be a page size multiple,"
 117                      " block %s is 0x" RAM_ADDR_FMT " bytes with a "
 118                      "page size of 0x%zx", block_name, length, pagesize);
 119         return 1;
 120     }
 121     return 0;
 122 }
 123
 124 /*
 125  * Note: This has the side effect of munlock'ing all of RAM, that's
 126  * normally fine since if the postcopy succeeds it gets turned back on at the
 127  * end.
 128  */
 129 bool postcopy_ram_supported_by_host(void)
 130 {
 131     long pagesize = getpagesize();
 132     int ufd = -1;
 133     bool ret = false; /* Error unless we change it */
 134     void *testarea = NULL;
 135     struct uffdio_register reg_struct;
 136     struct uffdio_range range_struct;
 137     uint64_t feature_mask;
 138
 139     if (qemu_target_page_size() > pagesize) {
 140         error_report("Target page size bigger than host page size");
 141         goto out;
 142     }
 143
 144     ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
 145     if (ufd == -1) {
 146         error_report("%s: userfaultfd not available: %s", __func__,
 147                      strerror(errno));
 148         goto out;
 149     }
 150
 151     /* Version and features check */
 152     if (!ufd_version_check(ufd)) {
 153         goto out;
 154     }
 155
 156     /* We don't support postcopy with shared RAM yet */
 157     if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) {
 158         goto out;
 159     }
 160
 161     /*
 162      * userfault and mlock don't go together; we'll put it back later if
 163      * it was enabled.
 164      */
 165     if (munlockall()) {
 166         error_report("%s: munlockall: %s", __func__,  strerror(errno));
 167         return -1;
 168     }
 169
 170     /*
 171      *  We need to check that the ops we need are supported on anon memory
 172      *  To do that we need to register a chunk and see the flags that
 173      *  are returned.
 174      */
 175     testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
 176                                     MAP_ANONYMOUS, -1, 0);
 177     if (testarea == MAP_FAILED) {
 178         error_report("%s: Failed to map test area: %s", __func__,
 179                      strerror(errno));
 180         goto out;
 181     }
 182     g_assert(((size_t)testarea & (pagesize-1)) == 0);
 183
 184     reg_struct.range.start = (uintptr_t)testarea;
 185     reg_struct.range.len = pagesize;
 186     reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 187
 188     if (ioctl(ufd, UFFDIO_REGISTER, &reg_struct)) {
 189         error_report("%s userfault register: %s", __func__, strerror(errno));
 190         goto out;
 191     }
 192
 193     range_struct.start = (uintptr_t)testarea;
 194     range_struct.len = pagesize;
 195     if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
 196         error_report("%s userfault unregister: %s", __func__, strerror(errno));
 197         goto out;
 198     }
 199
 200     feature_mask = (__u64)1 << _UFFDIO_WAKE |
 201                    (__u64)1 << _UFFDIO_COPY |
 202                    (__u64)1 << _UFFDIO_ZEROPAGE;
 203     if ((reg_struct.ioctls & feature_mask) != feature_mask) {
 204         error_report("Missing userfault map features: %" PRIx64,
 205                      (uint64_t)(~reg_struct.ioctls & feature_mask));
 206         goto out;
 207     }
 208
 209     /* Success! */
 210     ret = true;
 211 out:
 212     if (testarea) {
 213         munmap(testarea, pagesize);
 214     }
 215     if (ufd != -1) {
 216         close(ufd);
 217     }
 218     return ret;
 219 }
 220
 221 /*
 222  * Setup an area of RAM so that it *can* be used for postcopy later; this
 223  * must be done right at the start prior to pre-copy.
 224  * opaque should be the MIS.
 225  */
 226 static int init_range(const char *block_name, void *host_addr,
 227                       ram_addr_t offset, ram_addr_t length, void *opaque)
 228 {
 229     trace_postcopy_init_range(block_name, host_addr, offset, length);
 230
 231     /*
 232      * We need the whole of RAM to be truly empty for postcopy, so things
 233      * like ROMs and any data tables built during init must be zero'd
 234      * - we're going to get the copy from the source anyway.
 235      * (Precopy will just overwrite this data, so doesn't need the discard)
 236      */
 237     if (ram_discard_range(block_name, 0, length)) {
 238         return -1;
 239     }
 240
 241     return 0;
 242 }
 243
 244 /*
 245  * At the end of migration, undo the effects of init_range
 246  * opaque should be the MIS.
 247  */
 248 static int cleanup_range(const char *block_name, void *host_addr,
 249                         ram_addr_t offset, ram_addr_t length, void *opaque)
 250 {
 251     MigrationIncomingState *mis = opaque;
 252     struct uffdio_range range_struct;
 253     trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
 254
 255     /*
 256      * We turned off hugepage for the precopy stage with postcopy enabled
 257      * we can turn it back on now.
 258      */
 259     qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE);
 260
 261     /*
 262      * We can also turn off userfault now since we should have all the
 263      * pages.   It can be useful to leave it on to debug postcopy
 264      * if you're not sure it's always getting every page.
 265      */
 266     range_struct.start = (uintptr_t)host_addr;
 267     range_struct.len = length;
 268
 269     if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) {
 270         error_report("%s: userfault unregister %s", __func__, strerror(errno));
 271
 272         return -1;
 273     }
 274
 275     return 0;
 276 }
 277
 278 /*
 279  * Initialise postcopy-ram, setting the RAM to a state where we can go into
 280  * postcopy later; must be called prior to any precopy.
 281  * called from arch_init's similarly named ram_postcopy_incoming_init
 282  */
 283 int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 284 {
 285     if (qemu_ram_foreach_block(init_range, NULL)) {
 286         return -1;
 287     }
 288
 289     return 0;
 290 }
 291
 292 /*
 293  * At the end of a migration where postcopy_ram_incoming_init was called.
 294  */
 295 int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 296 {
 297     trace_postcopy_ram_incoming_cleanup_entry();
 298
 299     if (mis->have_fault_thread) {
 300         uint64_t tmp64;
 301
 302         if (qemu_ram_foreach_block(cleanup_range, mis)) {
 303             return -1;
 304         }
 305         /*
 306          * Tell the fault_thread to exit, it's an eventfd that should
 307          * currently be at 0, we're going to increment it to 1
 308          */
 309         tmp64 = 1;
 310         if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
 311             trace_postcopy_ram_incoming_cleanup_join();
 312             qemu_thread_join(&mis->fault_thread);
 313         } else {
 314             /* Not much we can do here, but may as well report it */
 315             error_report("%s: incrementing userfault_quit_fd: %s", __func__,
 316                          strerror(errno));
 317         }
 318         trace_postcopy_ram_incoming_cleanup_closeuf();
 319         close(mis->userfault_fd);
 320         close(mis->userfault_quit_fd);
 321         mis->have_fault_thread = false;
 322     }
 323
 324     qemu_balloon_inhibit(false);
 325
 326     if (enable_mlock) {
 327         if (os_mlock() < 0) {
 328             error_report("mlock: %s", strerror(errno));
 329             /*
 330              * It doesn't feel right to fail at this point, we have a valid
 331              * VM state.
 332              */
 333         }
 334     }
 335
 336     postcopy_state_set(POSTCOPY_INCOMING_END);
 337
 338     if (mis->postcopy_tmp_page) {
 339         munmap(mis->postcopy_tmp_page, mis->largest_page_size);
 340         mis->postcopy_tmp_page = NULL;
 341     }
 342     if (mis->postcopy_tmp_zero_page) {
 343         munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
 344         mis->postcopy_tmp_zero_page = NULL;
 345     }
 346     trace_postcopy_ram_incoming_cleanup_exit();
 347     return 0;
 348 }
 349
 350 /*
 351  * Disable huge pages on an area
 352  */
 353 static int nhp_range(const char *block_name, void *host_addr,
 354                     ram_addr_t offset, ram_addr_t length, void *opaque)
 355 {
 356     trace_postcopy_nhp_range(block_name, host_addr, offset, length);
 357
 358     /*
 359      * Before we do discards we need to ensure those discards really
 360      * do delete areas of the page, even if THP thinks a hugepage would
 361      * be a good idea, so force hugepages off.
 362      */
 363     qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE);
 364
 365     return 0;
 366 }
 367
 368 /*
 369  * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
 370  * however leaving it until after precopy means that most of the precopy
 371  * data is still THPd
 372  */
 373 int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 374 {
 375     if (qemu_ram_foreach_block(nhp_range, mis)) {
 376         return -1;
 377     }
 378
 379     postcopy_state_set(POSTCOPY_INCOMING_DISCARD);
 380
 381     return 0;
 382 }
 383
 384 /*
 385  * Mark the given area of RAM as requiring notification to unwritten areas
 386  * Used as a  callback on qemu_ram_foreach_block.
 387  *   host_addr: Base of area to mark
 388  *   offset: Offset in the whole ram arena
 389  *   length: Length of the section
 390  *   opaque: MigrationIncomingState pointer
 391  * Returns 0 on success
 392  */
 393 static int ram_block_enable_notify(const char *block_name, void *host_addr,
 394                                    ram_addr_t offset, ram_addr_t length,
 395                                    void *opaque)
 396 {
 397     MigrationIncomingState *mis = opaque;
 398     struct uffdio_register reg_struct;
 399
 400     reg_struct.range.start = (uintptr_t)host_addr;
 401     reg_struct.range.len = length;
 402     reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 403
 404     /* Now tell our userfault_fd that it's responsible for this area */
 405     if (ioctl(mis->userfault_fd, UFFDIO_REGISTER, &reg_struct)) {
 406         error_report("%s userfault register: %s", __func__, strerror(errno));
 407         return -1;
 408     }
 409     if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
 410         error_report("%s userfault: Region doesn't support COPY", __func__);
 411         return -1;
 412     }
 413
 414     return 0;
 415 }
 416
 417 /*
 418  * Handle faults detected by the USERFAULT markings
 419  */
 420 static void *postcopy_ram_fault_thread(void *opaque)
 421 {
 422     MigrationIncomingState *mis = opaque;
 423     struct uffd_msg msg;
 424     int ret;
 425     RAMBlock *rb = NULL;
 426     RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
 427
 428     trace_postcopy_ram_fault_thread_entry();
 429     qemu_sem_post(&mis->fault_thread_sem);
 430
 431     while (true) {
 432         ram_addr_t rb_offset;
 433         struct pollfd pfd[2];
 434
 435         /*
 436          * We're mainly waiting for the kernel to give us a faulting HVA,
 437          * however we can be told to quit via userfault_quit_fd which is
 438          * an eventfd
 439          */
 440         pfd[0].fd = mis->userfault_fd;
 441         pfd[0].events = POLLIN;
 442         pfd[0].revents = 0;
 443         pfd[1].fd = mis->userfault_quit_fd;
 444         pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
 445         pfd[1].revents = 0;
 446
 447         if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
 448             error_report("%s: userfault poll: %s", __func__, strerror(errno));
 449             break;
 450         }
 451
 452         if (pfd[1].revents) {
 453             trace_postcopy_ram_fault_thread_quit();
 454             break;
 455         }
 456
 457         ret = read(mis->userfault_fd, &msg, sizeof(msg));
 458         if (ret != sizeof(msg)) {
 459             if (errno == EAGAIN) {
 460                 /*
 461                  * if a wake up happens on the other thread just after
 462                  * the poll, there is nothing to read.
 463                  */
 464                 continue;
 465             }
 466             if (ret < 0) {
 467                 error_report("%s: Failed to read full userfault message: %s",
 468                              __func__, strerror(errno));
 469                 break;
 470             } else {
 471                 error_report("%s: Read %d bytes from userfaultfd expected %zd",
 472                              __func__, ret, sizeof(msg));
 473                 break; /* Lost alignment, don't know what we'd read next */
 474             }
 475         }
 476         if (msg.event != UFFD_EVENT_PAGEFAULT) {
 477             error_report("%s: Read unexpected event %ud from userfaultfd",
 478                          __func__, msg.event);
 479             continue; /* It's not a page fault, shouldn't happen */
 480         }
 481
 482         rb = qemu_ram_block_from_host(
 483                  (void *)(uintptr_t)msg.arg.pagefault.address,
 484                  true, &rb_offset);
 485         if (!rb) {
 486             error_report("postcopy_ram_fault_thread: Fault outside guest: %"
 487                          PRIx64, (uint64_t)msg.arg.pagefault.address);
 488             break;
 489         }
 490
 491         rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
 492         trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
 493                                                 qemu_ram_get_idstr(rb),
 494                                                 rb_offset);
 495
 496         /*
 497          * Send the request to the source - we want to request one
 498          * of our host page sizes (which is >= TPS)
 499          */
 500         if (rb != last_rb) {
 501             last_rb = rb;
 502             migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
 503                                      rb_offset, qemu_ram_pagesize(rb));
 504         } else {
 505             /* Save some space */
 506             migrate_send_rp_req_pages(mis, NULL,
 507                                      rb_offset, qemu_ram_pagesize(rb));
 508         }
 509     }
 510     trace_postcopy_ram_fault_thread_exit();
 511     return NULL;
 512 }
 513
 514 int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 515 {
 516     /* Open the fd for the kernel to give us userfaults */
 517     mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
 518     if (mis->userfault_fd == -1) {
 519         error_report("%s: Failed to open userfault fd: %s", __func__,
 520                      strerror(errno));
 521         return -1;
 522     }
 523
 524     /*
 525      * Although the host check already tested the API, we need to
 526      * do the check again as an ABI handshake on the new fd.
 527      */
 528     if (!ufd_version_check(mis->userfault_fd)) {
 529         return -1;
 530     }
 531
 532     /* Now an eventfd we use to tell the fault-thread to quit */
 533     mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC);
 534     if (mis->userfault_quit_fd == -1) {
 535         error_report("%s: Opening userfault_quit_fd: %s", __func__,
 536                      strerror(errno));
 537         close(mis->userfault_fd);
 538         return -1;
 539     }
 540
 541     qemu_sem_init(&mis->fault_thread_sem, 0);
 542     qemu_thread_create(&mis->fault_thread, "postcopy/fault",
 543                        postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
 544     qemu_sem_wait(&mis->fault_thread_sem);
 545     qemu_sem_destroy(&mis->fault_thread_sem);
 546     mis->have_fault_thread = true;
 547
 548     /* Mark so that we get notified of accesses to unwritten areas */
 549     if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
 550         return -1;
 551     }
 552
 553     /*
 554      * Ballooning can mark pages as absent while we're postcopying
 555      * that would cause false userfaults.
 556      */
 557     qemu_balloon_inhibit(true);
 558
 559     trace_postcopy_ram_enable_notify();
 560
 561     return 0;
 562 }
 563
 564 /*
 565  * Place a host page (from) at (host) atomically
 566  * returns 0 on success
 567  */
 568 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 569                         size_t pagesize)
 570 {
 571     struct uffdio_copy copy_struct;
 572
 573     copy_struct.dst = (uint64_t)(uintptr_t)host;
 574     copy_struct.src = (uint64_t)(uintptr_t)from;
 575     copy_struct.len = pagesize;
 576     copy_struct.mode = 0;
 577
 578     /* copy also acks to the kernel waking the stalled thread up
 579      * TODO: We can inhibit that ack and only do it if it was requested
 580      * which would be slightly cheaper, but we'd have to be careful
 581      * of the order of updating our page state.
 582      */
 583     if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
 584         int e = errno;
 585         error_report("%s: %s copy host: %p from: %p (size: %zd)",
 586                      __func__, strerror(e), host, from, pagesize);
 587
 588         return -e;
 589     }
 590
 591     trace_postcopy_place_page(host);
 592     return 0;
 593 }
 594
 595 /*
 596  * Place a zero page at (host) atomically
 597  * returns 0 on success
 598  */
 599 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
 600                              size_t pagesize)
 601 {
 602     trace_postcopy_place_page_zero(host);
 603
 604     if (pagesize == getpagesize()) {
 605         struct uffdio_zeropage zero_struct;
 606         zero_struct.range.start = (uint64_t)(uintptr_t)host;
 607         zero_struct.range.len = getpagesize();
 608         zero_struct.mode = 0;
 609
 610         if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
 611             int e = errno;
 612             error_report("%s: %s zero host: %p",
 613                          __func__, strerror(e), host);
 614
 615             return -e;
 616         }
 617     } else {
 618         /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
 619         if (!mis->postcopy_tmp_zero_page) {
 620             mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
 621                                                PROT_READ | PROT_WRITE,
 622                                                MAP_PRIVATE | MAP_ANONYMOUS,
 623                                                -1, 0);
 624             if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
 625                 int e = errno;
 626                 mis->postcopy_tmp_zero_page = NULL;
 627                 error_report("%s: %s mapping large zero page",
 628                              __func__, strerror(e));
 629                 return -e;
 630             }
 631             memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
 632         }
 633         return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
 634                                    pagesize);
 635     }
 636
 637     return 0;
 638 }
 639
 640 /*
 641  * Returns a target page of memory that can be mapped at a later point in time
 642  * using postcopy_place_page
 643  * The same address is used repeatedly, postcopy_place_page just takes the
 644  * backing page away.
 645  * Returns: Pointer to allocated page
 646  *
 647  */
 648 void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 649 {
 650     if (!mis->postcopy_tmp_page) {
 651         mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
 652                              PROT_READ | PROT_WRITE, MAP_PRIVATE |
 653                              MAP_ANONYMOUS, -1, 0);
 654         if (mis->postcopy_tmp_page == MAP_FAILED) {
 655             mis->postcopy_tmp_page = NULL;
 656             error_report("%s: %s", __func__, strerror(errno));
 657             return NULL;
 658         }
 659     }
 660
 661     return mis->postcopy_tmp_page;
 662 }
 663
 664 #else
 665 /* No target OS support, stubs just fail */
 666 bool postcopy_ram_supported_by_host(void)
 667 {
 668     error_report("%s: No OS support", __func__);
 669     return false;
 670 }
 671
 672 int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 673 {
 674     error_report("postcopy_ram_incoming_init: No OS support");
 675     return -1;
 676 }
 677
 678 int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 679 {
 680     assert(0);
 681     return -1;
 682 }
 683
 684 int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 685 {
 686     assert(0);
 687     return -1;
 688 }
 689
 690 int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 691 {
 692     assert(0);
 693     return -1;
 694 }
 695
 696 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 697                         size_t pagesize)
 698 {
 699     assert(0);
 700     return -1;
 701 }
 702
 703 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
 704                         size_t pagesize)
 705 {
 706     assert(0);
 707     return -1;
 708 }
 709
 710 void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 711 {
 712     assert(0);
 713     return NULL;
 714 }
 715
 716 #endif
 717
 718 /* ------------------------------------------------------------------------- */
 719
 720 /**
 721  * postcopy_discard_send_init: Called at the start of each RAMBlock before
 722  *   asking to discard individual ranges.
 723  *
 724  * @ms: The current migration state.
 725  * @offset: the bitmap offset of the named RAMBlock in the migration
 726  *   bitmap.
 727  * @name: RAMBlock that discards will operate on.
 728  *
 729  * returns: a new PDS.
 730  */
 731 PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
 732                                                  const char *name)
 733 {
 734     PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));
 735
 736     if (res) {
 737         res->ramblock_name = name;
 738     }
 739
 740     return res;
 741 }
 742
 743 /**
 744  * postcopy_discard_send_range: Called by the bitmap code for each chunk to
 745  *   discard. May send a discard message, may just leave it queued to
 746  *   be sent later.
 747  *
 748  * @ms: Current migration state.
 749  * @pds: Structure initialised by postcopy_discard_send_init().
 750  * @start,@length: a range of pages in the migration bitmap in the
 751  *   RAM block passed to postcopy_discard_send_init() (length=1 is one page)
 752  */
 753 void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
 754                                 unsigned long start, unsigned long length)
 755 {
 756     size_t tp_size = qemu_target_page_size();
 757     /* Convert to byte offsets within the RAM block */
 758     pds->start_list[pds->cur_entry] = start  * tp_size;
 759     pds->length_list[pds->cur_entry] = length * tp_size;
 760     trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
 761     pds->cur_entry++;
 762     pds->nsentwords++;
 763
 764     if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
 765         /* Full set, ship it! */
 766         qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
 767                                               pds->ramblock_name,
 768                                               pds->cur_entry,
 769                                               pds->start_list,
 770                                               pds->length_list);
 771         pds->nsentcmds++;
 772         pds->cur_entry = 0;
 773     }
 774 }
 775
 776 /**
 777  * postcopy_discard_send_finish: Called at the end of each RAMBlock by the
 778  * bitmap code. Sends any outstanding discard messages, frees the PDS
 779  *
 780  * @ms: Current migration state.
 781  * @pds: Structure initialised by postcopy_discard_send_init().
 782  */
 783 void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds)
 784 {
 785     /* Anything unsent? */
 786     if (pds->cur_entry) {
 787         qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
 788                                               pds->ramblock_name,
 789                                               pds->cur_entry,
 790                                               pds->start_list,
 791                                               pds->length_list);
 792         pds->nsentcmds++;
 793     }
 794
 795     trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords,
 796                                        pds->nsentcmds);
 797
 798     g_free(pds);
 799 }
 800
 801 /*
 802  * Current state of incoming postcopy; note this is not part of
 803  * MigrationIncomingState since it's state is used during cleanup
 804  * at the end as MIS is being freed.
 805  */
 806 static PostcopyState incoming_postcopy_state;
 807
 808 PostcopyState  postcopy_state_get(void)
 809 {
 810     return atomic_mb_read(&incoming_postcopy_state);
 811 }
 812
 813 /* Set the state and return the old state */
 814 PostcopyState postcopy_state_set(PostcopyState new_state)
 815 {
 816     return atomic_xchg(&incoming_postcopy_state, new_state);
 817 }