]> git.proxmox.com Git - mirror_qemu.git/commitdiff
Merge tag 'pull-riscv-to-apply-20230224' of github.com:palmer-dabbelt/qemu into staging
authorPeter Maydell <peter.maydell@linaro.org>
Sun, 26 Feb 2023 20:14:46 +0000 (20:14 +0000)
committerPeter Maydell <peter.maydell@linaro.org>
Sun, 26 Feb 2023 20:14:46 +0000 (20:14 +0000)
Fourth RISC-V PR for QEMU 8.0, Attempt 2

* A triplet of cleanups to the kernel/initrd loader that avoids
  duplication between the various boards.
* Weiwei Li, Daniel Henrique Barboza, and Liu Zhiwei have been added as
  reviewers.  Thanks for the help!
* A fix for PMP matching to avoid incorrectly appling the default
  permissions on PMP permission violations.
* A cleanup to avoid an unnecessary avoid env_archcpu() in
  cpu_get_tb_cpu_state().
* Fixes for the vector slide instructions to avoid truncating 64-bit
  values (such as doubles) on 32-bit targets.

# -----BEGIN PGP SIGNATURE-----
#
# iQJHBAABCAAxFiEEKzw3R0RoQ7JKlDp6LhMZ81+7GIkFAmP5Br8THHBhbG1lckBk
# YWJiZWx0LmNvbQAKCRAuExnzX7sYiT4RD/9hdSlQlR1g/2h4fbCJ3U0GvyNH0T7N
# mt3AX8hFvmfR1O63qqVVebJSHM1dTm6WsA19vKE5tdtbjV5V8UZuBTSqYeRBSrLd
# LK9IHhwv3k9OQ/EG8CgRo7HEMxAurpC26zTf3chnfwa1Wyl5XxCXNx5hPbhu18G9
# oxw0sBi51T0Tb+N6lOVVSfmiEZWLXRq+lDCZdV0j864brsSjo4x8VEGrLaFTOJLf
# X4MW6vBI4Pcb7EGnHjj5WvRKsf8gdahdx8bSTjORIm8oGri9Iyw6Vrg2khuhjnuH
# 99sD1O06cvrylp+sCOVei8H3S6/xCepQXUXnCBCd1/cetgV+olo+ZR78Z8ZjXPED
# jhZ23lsDcge+4W141lsCiwLgzI0YO3Ac+84zQLIvcx16c8zow3G9FO9sTlBSsgnW
# 0XJrsUF7AZB6quUSMytG7WK+OBizzCRwj7ItC+Mty68wLrei5lDVj8b0t8hAQEdr
# dOb7jku+Dz8OspGZx1aDKKifGDO+Ppv4PjAM2G44OmkM824SvvFg8+FEr9NgbKbp
# VgTZDCeVC6IEpzthKsK8WeompLo7Sc33KITqwMbGiyGs+gsnmgKP2bcTLF8YTlFk
# dqFBWjo3tjH5oukgTLCSYY4xPaHR9q418vGAfRox15GtUVliQ9iL5oH47PVXg4U7
# YsNZ74nD1pUueg==
# =Umli
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 24 Feb 2023 18:49:35 GMT
# gpg:                using RSA key 2B3C3747446843B24A943A7A2E1319F35FBB1889
# gpg:                issuer "palmer@dabbelt.com"
# gpg: Good signature from "Palmer Dabbelt <palmer@dabbelt.com>" [unknown]
# gpg:                 aka "Palmer Dabbelt <palmerdabbelt@google.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 00CE 76D1 8349 60DF CE88  6DF8 EF4C A150 2CCB AB41
#      Subkey fingerprint: 2B3C 3747 4468 43B2 4A94  3A7A 2E13 19F3 5FBB 1889

* tag 'pull-riscv-to-apply-20230224' of github.com:palmer-dabbelt/qemu:
  target/riscv: Fix vslide1up.vf and vslide1down.vf
  target/riscv: avoid env_archcpu() in cpu_get_tb_cpu_state()
  target/riscv: Smepmp: Skip applying default rules when address matches
  MAINTAINERS: Add some RISC-V reviewers
  target/riscv: Remove privileged spec version restriction for RVV
  hw/riscv/boot.c: make riscv_load_initrd() static
  hw/riscv/boot.c: consolidate all kernel init in riscv_load_kernel()
  hw/riscv: handle 32 bit CPUs kernel_entry in riscv_load_kernel()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
355 files changed:
.gitlab-ci.d/buildtest.yml
.gitlab-ci.d/static_checks.yml
.gitmodules
MAINTAINERS
accel/tcg/translator.c
authz/listfile.c
backends/cryptodev-vhost.c
backends/rng.c
backends/vhost-user.c
block.c
block/backup.c
block/blkdebug.c
block/blklogwrites.c
block/blkreplay.c
block/blkverify.c
block/block-backend.c
block/block-copy.c
block/bochs.c
block/commit.c
block/copy-before-write.c
block/copy-on-read.c
block/coroutines.h
block/create.c
block/crypto.c
block/curl.c
block/dirty-bitmap.c
block/file-posix.c
block/file-win32.c
block/filter-compress.c
block/io.c
block/iscsi.c
block/meson.build
block/mirror.c
block/parallels.c
block/preallocate.c
block/qcow.c
block/qcow2-cluster.c
block/qcow2.c
block/qcow2.h
block/qed-check.c
block/qed-table.c
block/qed.c
block/qed.h
block/quorum.c
block/raw-format.c
block/rbd.c
block/replication.c
block/snapshot-access.c
block/stream.c
block/throttle.c
block/vdi.c
block/vhdx.c
block/vmdk.c
block/vpc.c
bsd-user/qemu.h
configs/devices/x86_64-softmmu/x86_64-quintela-devices.mak [deleted file]
configs/devices/x86_64-softmmu/x86_64-quintela2-devices.mak [deleted file]
configs/targets/microblaze-linux-user.mak
configs/targets/microblaze-softmmu.mak
configs/targets/microblazeel-linux-user.mak
configs/targets/microblazeel-softmmu.mak
configure
cpus-common.c
crypto/tlssession.c
docs/about/build-platforms.rst
docs/about/deprecated.rst
docs/about/removed-features.rst
docs/conf.py
docs/devel/fuzzing.rst
docs/devel/qapi-code-gen.rst
docs/devel/vfio-migration.rst
docs/meson.build
docs/system/arm/nuvoton.rst
docs/tools/index.rst
docs/tools/virtiofsd.rst [deleted file]
dump/dump.c
gdb-xml/microblaze-core.xml [new file with mode: 0644]
gdb-xml/microblaze-stack-protect.xml [new file with mode: 0644]
hw/acpi/acpi-stub.c
hw/acpi/vmgenid.c
hw/arm/Kconfig
hw/arm/npcm7xx.c
hw/arm/smmu-common.c
hw/arm/smmuv3-internal.h
hw/arm/smmuv3.c
hw/arm/virt.c
hw/block/virtio-blk.c
hw/char/ibex_uart.c
hw/core/machine-qmp-cmds.c
hw/core/machine.c
hw/core/nmi.c
hw/display/vhost-user-gpu.c
hw/display/virtio-gpu-udmabuf.c
hw/display/virtio-gpu-virgl.c
hw/i386/Kconfig
hw/i386/pc.c
hw/i386/x86.c
hw/intc/armv7m_nvic.c
hw/mem/sparse-mem.c
hw/misc/Kconfig
hw/misc/applesmc.c
hw/misc/meson.build
hw/misc/sga.c [deleted file]
hw/misc/xlnx-zynqmp-apu-ctrl.c
hw/net/lan9118.c
hw/net/rocker/qmp-norocker.c
hw/net/vmxnet3.c
hw/pci/pci.c
hw/s390x/event-facility.c
hw/s390x/s390-stattrib.c
hw/scsi/scsi-disk.c
hw/scsi/scsi-generic.c
hw/scsi/virtio-scsi.c
hw/smbios/smbios-stub.c
hw/ssi/ibex_spi_host.c
hw/ssi/meson.build
hw/ssi/npcm_pspi.c [new file with mode: 0644]
hw/ssi/trace-events
hw/vfio/common.c
hw/vfio/migration.c
hw/vfio/trace-events
hw/virtio/vhost-user-fs.c
include/block/block-common.h
include/block/block-copy.h
include/block/block-global-state.h
include/block/block-io.h
include/block/block_int-common.h
include/block/block_int-io.h
include/block/dirty-bitmap.h
include/crypto/tlssession.h
include/disas/dis-asm.h
include/exec/exec-all.h
include/hw/arm/allwinner-a10.h
include/hw/arm/npcm7xx.h
include/hw/arm/smmu-common.h
include/hw/arm/smmuv3.h
include/hw/char/ibex_uart.h
include/hw/core/cpu.h
include/hw/intc/armv7m_nvic.h
include/hw/ssi/ibex_spi_host.h
include/hw/ssi/npcm_pspi.h [new file with mode: 0644]
include/hw/vfio/vfio-common.h
include/hw/virtio/virtio-scsi.h
include/migration/register.h
include/net/net.h
include/qapi/qmp/qerror.h
include/qemu/bswap.h
include/qemu/envlist.h
include/qemu/hbitmap.h
include/qemu/rcu.h
include/qemu/rcu_queue.h
include/qemu/thread.h
include/qemu/uri.h
include/qemu/vhost-user-server.h
include/standard-headers/drm/drm_fourcc.h
include/standard-headers/linux/ethtool.h
include/standard-headers/linux/fuse.h
include/standard-headers/linux/input-event-codes.h
include/standard-headers/linux/pci_regs.h
include/standard-headers/linux/virtio_blk.h
include/standard-headers/linux/virtio_bt.h
include/standard-headers/linux/virtio_net.h
include/sysemu/block-backend-global-state.h
include/sysemu/block-backend-io.h
include/sysemu/os-win32.h
include/sysemu/replay.h
include/ui/console.h
io/channel-tls.c
linux-headers/asm-arm64/kvm.h
linux-headers/asm-generic/hugetlb_encode.h
linux-headers/asm-generic/mman-common.h
linux-headers/asm-mips/mman.h
linux-headers/asm-riscv/kvm.h
linux-headers/asm-x86/kvm.h
linux-headers/linux/kvm.h
linux-headers/linux/psci.h
linux-headers/linux/userfaultfd.h
linux-headers/linux/vfio.h
linux-user/arm/cpu_loop.c
linux-user/main.c
linux-user/microblaze/cpu_loop.c
linux-user/sparc/cpu_loop.c
linux-user/syscall.c
linux-user/user-internals.h
meson.build
meson_options.txt
migration/block-dirty-bitmap.c
migration/block.c
migration/colo-failover.c
migration/colo.c
migration/migration-hmp-cmds.c
migration/migration.c
migration/migration.h
migration/multifd.c
migration/multifd.c.orig [deleted file]
migration/multifd.h
migration/postcopy-ram.c
migration/postcopy-ram.h
migration/qemu-file.c
migration/qemu-file.h
migration/ram.c
migration/savevm.c
migration/savevm.h
migration/trace-events
migration/xbzrle.c
migration/xbzrle.h
net/l2tpv3.c
net/net.c
net/stream.c
net/vhost-vdpa.c
net/vmnet-common.m
net/vmnet_int.h
pc-bios/README
pc-bios/meson.build
pc-bios/sgabios.bin [deleted file]
python/.gitignore
python/Makefile
python/Pipfile [deleted file]
python/Pipfile.lock [deleted file]
python/README.rst
python/qemu/qmp/protocol.py
python/qemu/qmp/qmp_client.py
python/qemu/utils/qemu_ga_client.py
python/setup.cfg
python/tests/minreqs.txt [new file with mode: 0644]
qapi/block-core.json
qapi/net.json
qemu-img.c
qemu-io-cmds.c
qemu-options.hx
qga/commands.c
qga/main.c
replay/replay.c
replay/stubs-system.c
roms/Makefile
roms/sgabios [deleted submodule]
scripts/block-coroutine-wrapper.py
scripts/ci/org.centos/stream/8/x86_64/configure
scripts/ci/org.centos/stream/8/x86_64/test-avocado
scripts/coverity-scan/COMPONENTS.md
scripts/meson-buildoptions.py
scripts/meson-buildoptions.sh
scripts/qapi/.flake8
scripts/qapi/expr.py
scripts/qapi/parser.py
scripts/qapi/pylintrc
scripts/qapi/schema.py
softmmu/dma-helpers.c
softmmu/qtest.c
softmmu/rtc.c
softmmu/vl.c
stubs/meson.build
stubs/vmgenid.c [deleted file]
target/arm/cpregs.h
target/arm/cpu.c
target/arm/cpu.h
target/arm/cpu_tcg.c
target/arm/helper.c
target/arm/internals.h
target/arm/m_helper.c
target/arm/machine.c
target/i386/gdbstub.c
target/i386/monitor.c
target/i386/sev-sysemu-stub.c
target/i386/sev.c
target/i386/tcg/emit.c.inc
target/microblaze/cpu.c
target/microblaze/cpu.h
target/microblaze/gdbstub.c
tests/avocado/avocado_qemu/__init__.py
tests/avocado/boot_linux.py
tests/avocado/boot_linux_console.py
tests/avocado/machine_aarch64_virt.py
tests/avocado/reverse_debugging.py
tests/avocado/virtiofs_submounts.py [deleted file]
tests/bench/meson.build
tests/bench/xbzrle-bench.c [new file with mode: 0644]
tests/docker/dockerfiles/python.docker
tests/migration/guestperf/engine.py
tests/qemu-iotests/186
tests/qemu-iotests/iotests.py
tests/qemu-iotests/tests/detect-zeroes-registered-buf [new file with mode: 0755]
tests/qemu-iotests/tests/detect-zeroes-registered-buf.out [new file with mode: 0644]
tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test
tests/qtest/arm-cpu-features.c
tests/qtest/bios-tables-test.c
tests/qtest/device-plug-test.c
tests/qtest/drive_del-test.c
tests/qtest/fuzz-lsi53c895a-test.c
tests/qtest/fuzz/fork_fuzz.c [deleted file]
tests/qtest/fuzz/fork_fuzz.h [deleted file]
tests/qtest/fuzz/fork_fuzz.ld [deleted file]
tests/qtest/fuzz/fuzz.c
tests/qtest/fuzz/fuzz.h
tests/qtest/fuzz/generic_fuzz.c
tests/qtest/fuzz/i440fx_fuzz.c
tests/qtest/fuzz/meson.build
tests/qtest/fuzz/virtio_blk_fuzz.c
tests/qtest/fuzz/virtio_net_fuzz.c
tests/qtest/fuzz/virtio_scsi_fuzz.c
tests/qtest/hd-geo-test.c
tests/qtest/libqtest.c
tests/qtest/meson.build
tests/qtest/netdev-socket.c
tests/qtest/npcm7xx_pwm-test.c
tests/qtest/pxe-test.c
tests/tcg/i386/Makefile.target
tests/tcg/i386/test-i386-adcox.c [new file with mode: 0644]
tests/tcg/i386/test-i386-bmi2.c
tests/tcg/s390x/Makefile.softmmu-target
tests/tcg/x86_64/Makefile.target
tests/tcg/x86_64/adox.c [new file with mode: 0644]
tests/unit/test-bdrv-drain.c
tests/unit/test-block-iothread.c
tests/unit/test-xbzrle.c
tools/meson.build
tools/virtiofsd/50-qemu-virtiofsd.json.in [deleted file]
tools/virtiofsd/buffer.c [deleted file]
tools/virtiofsd/fuse_common.h [deleted file]
tools/virtiofsd/fuse_i.h [deleted file]
tools/virtiofsd/fuse_log.c [deleted file]
tools/virtiofsd/fuse_log.h [deleted file]
tools/virtiofsd/fuse_lowlevel.c [deleted file]
tools/virtiofsd/fuse_lowlevel.h [deleted file]
tools/virtiofsd/fuse_misc.h [deleted file]
tools/virtiofsd/fuse_opt.c [deleted file]
tools/virtiofsd/fuse_opt.h [deleted file]
tools/virtiofsd/fuse_signals.c [deleted file]
tools/virtiofsd/fuse_virtio.c [deleted file]
tools/virtiofsd/fuse_virtio.h [deleted file]
tools/virtiofsd/helper.c [deleted file]
tools/virtiofsd/meson.build [deleted file]
tools/virtiofsd/passthrough_helpers.h [deleted file]
tools/virtiofsd/passthrough_ll.c [deleted file]
tools/virtiofsd/passthrough_seccomp.c [deleted file]
tools/virtiofsd/passthrough_seccomp.h [deleted file]
ui/console.c
ui/dbus-clipboard.c
ui/dbus-console.c
ui/dbus-listener.c
ui/dbus.c
ui/egl-headless.c
ui/gtk.c
ui/spice-app.c
ui/spice-core.c
ui/spice-display.c
ui/udmabuf.c
ui/vdagent.c
util/cacheflush.c
util/hbitmap.c
util/qemu-config.c
util/qemu-thread-posix.c
util/trace-events
util/userfaultfd.c
util/vhost-user-server.c

index 0aa149a352453628a20fba2f5ed233b70e929dc8..8f332fc36f512861a097332d291519ffc8163946 100644 (file)
@@ -467,27 +467,16 @@ tsan-build:
     TARGETS: x86_64-softmmu ppc64-softmmu riscv64-softmmu x86_64-linux-user
     MAKE_CHECK_ARGS: bench V=1
 
-# gprof/gcov are GCC features
-build-gprof-gcov:
+# gcov is a GCC features
+gcov:
   extends: .native_build_job_template
   needs:
     job: amd64-ubuntu2004-container
+  timeout: 80m
   variables:
     IMAGE: ubuntu2004
-    CONFIGURE_ARGS: --enable-gprof --enable-gcov
+    CONFIGURE_ARGS: --enable-gcov
     TARGETS: aarch64-softmmu ppc64-softmmu s390x-softmmu x86_64-softmmu
-  artifacts:
-    expire_in: 1 days
-    paths:
-      - build
-
-check-gprof-gcov:
-  extends: .native_test_job_template
-  needs:
-    - job: build-gprof-gcov
-      artifacts: true
-  variables:
-    IMAGE: ubuntu2004
     MAKE_CHECK_ARGS: check
   after_script:
     - cd build
index 289ad1359e3a83d5222a6fcc299591923e413c54..b4cbdbce2abfe96127e20af8801e82f6ad15e7d8 100644 (file)
@@ -23,12 +23,12 @@ check-dco:
   before_script:
     - apk -U add git
 
-check-python-pipenv:
+check-python-minreqs:
   extends: .base_job_template
   stage: test
   image: $CI_REGISTRY_IMAGE/qemu/python:latest
   script:
-    - make -C python check-pipenv
+    - make -C python check-minreqs
   variables:
     GIT_DEPTH: 1
   needs:
index 24cffa87d42083a09aab938dfd4776ed4bad7b60..6ce5bf49c5883137592cd4aee016b72473963146 100644 (file)
@@ -13,9 +13,6 @@
 [submodule "roms/qemu-palcode"]
        path = roms/qemu-palcode
        url = https://gitlab.com/qemu-project/qemu-palcode.git
-[submodule "roms/sgabios"]
-       path = roms/sgabios
-       url = https://gitlab.com/qemu-project/sgabios.git
 [submodule "dtc"]
        path = dtc
        url = https://gitlab.com/qemu-project/dtc.git
index 847bc7f1315f3dfa76c37754adeeac20ad82e2c5..5c1ee411397596938a95d996549da841d80fff70 100644 (file)
@@ -810,13 +810,13 @@ F: include/hw/net/mv88w8618_eth.h
 F: docs/system/arm/musicpal.rst
 
 Nuvoton NPCM7xx
-M: Havard Skinnemoen <hskinnemoen@google.com>
 M: Tyrone Ting <kfting@nuvoton.com>
+M: Hao Wu <wuhaotsh@google.com>
 L: qemu-arm@nongnu.org
 S: Supported
-F: hw/*/npcm7xx*
-F: include/hw/*/npcm7xx*
-F: tests/qtest/npcm7xx*
+F: hw/*/npcm*
+F: include/hw/*/npcm*
+F: tests/qtest/npcm*
 F: pc-bios/npcm7xx_bootrom.bin
 F: roms/vbootrom
 F: docs/system/arm/nuvoton.rst
@@ -1679,7 +1679,6 @@ F: hw/acpi/piix4.c
 F: hw/acpi/ich9*.c
 F: include/hw/acpi/ich9*.h
 F: include/hw/southbridge/piix.h
-F: hw/misc/sga.c
 F: hw/isa/apm.c
 F: include/hw/isa/apm.h
 F: tests/unit/test-x86-cpuid.c
@@ -1999,6 +1998,7 @@ F: hw/usb/dev-serial.c
 
 VFIO
 M: Alex Williamson <alex.williamson@redhat.com>
+R: Cédric Le Goater <clg@redhat.com>
 S: Supported
 F: hw/vfio/*
 F: include/hw/vfio/
@@ -2100,10 +2100,8 @@ virtiofs
 M: Dr. David Alan Gilbert <dgilbert@redhat.com>
 M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Supported
-F: tools/virtiofsd/*
 F: hw/virtio/vhost-user-fs*
 F: include/hw/virtio/vhost-user-fs.h
-F: docs/tools/virtiofsd.rst
 L: virtio-fs@redhat.com
 
 virtio-input
@@ -2278,7 +2276,6 @@ F: hw/acpi/vmgenid.c
 F: include/hw/acpi/vmgenid.h
 F: docs/specs/vmgenid.txt
 F: tests/qtest/vmgenid-test.c
-F: stubs/vmgenid.c
 
 LED
 M: Philippe Mathieu-Daudé <philmd@linaro.org>
@@ -3583,13 +3580,11 @@ F: block/dmg.c
 parallels
 M: Stefan Hajnoczi <stefanha@redhat.com>
 M: Denis V. Lunev <den@openvz.org>
-M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/parallels.c
 F: block/parallels-ext.c
 F: docs/interop/parallels.txt
-T: git https://gitlab.com/vsementsov/qemu.git block
 
 qed
 M: Stefan Hajnoczi <stefanha@redhat.com>
index ef5193c67e383154ea4cebfd65996e386795e0c4..1cf404ced0d042362d1c3f908910a33f281417d4 100644 (file)
@@ -176,8 +176,16 @@ static void *translator_access(CPUArchState *env, DisasContextBase *db,
         if (host == NULL) {
             tb_page_addr_t phys_page =
                 get_page_addr_code_hostp(env, base, &db->host_addr[1]);
-            /* We cannot handle MMIO as second page. */
-            assert(phys_page != -1);
+
+            /*
+             * If the second page is MMIO, treat as if the first page
+             * was MMIO as well, so that we do not cache the TB.
+             */
+            if (unlikely(phys_page == -1)) {
+                tb_set_page_addr0(tb, -1);
+                return NULL;
+            }
+
             tb_set_page_addr1(tb, phys_page);
 #ifdef CONFIG_USER_ONLY
             page_protect(end);
index da3a0e69a2ed472c053a2b64113405f371e1b2e1..45a60e987df9e96310c16a5fbe44de0342d56a74 100644 (file)
@@ -30,7 +30,6 @@
 #include "qapi/qapi-visit-authz.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qobject.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/qobject-input-visitor.h"
 
 
index 572f87b3bee1f561328fd7775f668b6041c4678a..74ea0ad63deb6e0298ea9abbc61b2d9a3385d5fa 100644 (file)
@@ -28,7 +28,6 @@
 
 #ifdef CONFIG_VHOST_CRYPTO
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/error-report.h"
 #include "hw/virtio/virtio-crypto.h"
 #include "sysemu/cryptodev-vhost-user.h"
index 6c7bf64426afc4ea5290373162f8b35db8d6abae..9bbd0c77b6959f2fc03c4fc77908a8237de09b5a 100644 (file)
@@ -13,7 +13,6 @@
 #include "qemu/osdep.h"
 #include "sysemu/rng.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/module.h"
 #include "qom/object_interfaces.h"
 
index 7bfcaef976c4c6842953dfa446f7ed44f7465f1f..0596223ac468ca302c96a0a3097b9b67f365ec10 100644 (file)
@@ -13,7 +13,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/error-report.h"
 #include "qom/object_interfaces.h"
 #include "sysemu/vhost-user-backend.h"
diff --git a/block.c b/block.c
index aa9062f2c1e5dbbb84d306a9a24c6109eea263c4..0dd604d0f6a8d18ab7db0130638dbcb6ec9ffeff 100644 (file)
--- a/block.c
+++ b/block.c
@@ -277,8 +277,8 @@ bool bdrv_is_read_only(BlockDriverState *bs)
     return !(bs->open_flags & BDRV_O_RDWR);
 }
 
-int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
-                           bool ignore_allow_rdw, Error **errp)
+static int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
+                                  bool ignore_allow_rdw, Error **errp)
 {
     IO_CODE();
 
@@ -533,6 +533,7 @@ int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
     int ret;
     GLOBAL_STATE_CODE();
     ERRP_GUARD();
+    assert_bdrv_graph_readable();
 
     if (!drv->bdrv_co_create_opts) {
         error_setg(errp, "Driver '%s' does not support image creation",
@@ -657,8 +658,8 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
     options = qdict_new();
     qdict_put_str(options, "driver", drv->format_name);
 
-    blk = blk_new_open(filename, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_RESIZE, errp);
+    blk = blk_co_new_open(filename, NULL, options,
+                          BDRV_O_RDWR | BDRV_O_RESIZE, errp);
     if (!blk) {
         error_prepend(errp, "Protocol driver '%s' does not support image "
                       "creation, and opening the image failed: ",
@@ -739,6 +740,7 @@ int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
 
     IO_CODE();
     assert(bs != NULL);
+    assert_bdrv_graph_readable();
 
     if (!bs->drv) {
         error_setg(errp, "Block node '%s' is not opened", bs->filename);
@@ -1040,6 +1042,7 @@ int coroutine_fn bdrv_co_refresh_total_sectors(BlockDriverState *bs,
 {
     BlockDriver *drv = bs->drv;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         return -ENOMEDIUM;
@@ -3807,13 +3810,11 @@ out:
  * function eventually calls bdrv_refresh_total_sectors() which polls
  * when called from non-coroutine context.
  */
-static BlockDriverState *bdrv_open_inherit(const char *filename,
-                                           const char *reference,
-                                           QDict *options, int flags,
-                                           BlockDriverState *parent,
-                                           const BdrvChildClass *child_class,
-                                           BdrvChildRole child_role,
-                                           Error **errp)
+static BlockDriverState * no_coroutine_fn
+bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
+                  int flags, BlockDriverState *parent,
+                  const BdrvChildClass *child_class, BdrvChildRole child_role,
+                  Error **errp)
 {
     int ret;
     BlockBackend *file = NULL;
@@ -3829,6 +3830,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
     assert(!child_class || !flags);
     assert(!child_class == !parent);
     GLOBAL_STATE_CODE();
+    assert(!qemu_in_coroutine());
 
     if (reference) {
         bool options_non_empty = options ? qdict_size(options) : false;
@@ -5266,6 +5268,8 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
  * child.
  *
  * This function does not create any image files.
+ *
+ * The caller must hold the AioContext lock for @bs_top.
  */
 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
                 Error **errp)
@@ -5273,11 +5277,14 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
     int ret;
     BdrvChild *child;
     Transaction *tran = tran_new();
+    AioContext *old_context, *new_context = NULL;
 
     GLOBAL_STATE_CODE();
 
     assert(!bs_new->backing);
 
+    old_context = bdrv_get_aio_context(bs_top);
+
     child = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
                                      &child_of_bds, bdrv_backing_role(bs_new),
                                      tran, errp);
@@ -5286,6 +5293,19 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
         goto out;
     }
 
+    /*
+     * bdrv_attach_child_noperm could change the AioContext of bs_top.
+     * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily
+     * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE
+     * that assumes the new lock is taken.
+     */
+    new_context = bdrv_get_aio_context(bs_top);
+
+    if (old_context != new_context) {
+        aio_context_release(old_context);
+        aio_context_acquire(new_context);
+    }
+
     ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
     if (ret < 0) {
         goto out;
@@ -5297,6 +5317,11 @@ out:
 
     bdrv_refresh_limits(bs_top, NULL, NULL);
 
+    if (new_context && old_context != new_context) {
+        aio_context_release(new_context);
+        aio_context_acquire(old_context);
+    }
+
     return ret;
 }
 
@@ -5819,6 +5844,7 @@ int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs)
 {
     BlockDriver *drv = bs->drv;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv)
         return -ENOMEDIUM;
@@ -5840,6 +5866,7 @@ int64_t coroutine_fn bdrv_co_getlength(BlockDriverState *bs)
 {
     int64_t ret;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     ret = bdrv_co_nb_sectors(bs);
     if (ret < 0) {
@@ -6803,6 +6830,7 @@ bool coroutine_fn bdrv_co_is_inserted(BlockDriverState *bs)
     BlockDriver *drv = bs->drv;
     BdrvChild *child;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         return false;
@@ -6825,6 +6853,7 @@ void coroutine_fn bdrv_co_eject(BlockDriverState *bs, bool eject_flag)
 {
     BlockDriver *drv = bs->drv;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (drv && drv->bdrv_co_eject) {
         drv->bdrv_co_eject(bs, eject_flag);
@@ -6839,6 +6868,7 @@ void coroutine_fn bdrv_co_lock_medium(BlockDriverState *bs, bool locked)
 {
     BlockDriver *drv = bs->drv;
     IO_CODE();
+    assert_bdrv_graph_readable();
     trace_bdrv_lock_medium(bs, locked);
 
     if (drv && drv->bdrv_co_lock_medium) {
index 824d39acaacdf6c46b81560ba5a271388f0eb948..db3791f4d16d5f45e880959f6e065b9c13539ced 100644 (file)
@@ -22,7 +22,6 @@
 #include "block/block-copy.h"
 #include "block/dirty-bitmap.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/cutils.h"
 #include "sysemu/block-backend.h"
 #include "qemu/bitmap.h"
@@ -270,7 +269,10 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
                 return -ECANCELED;
             }
 
+            /* rdlock protects the subsequent call to bdrv_is_allocated() */
+            bdrv_graph_co_rdlock();
             ret = block_copy_reset_unallocated(s->bcs, offset, &count);
+            bdrv_graph_co_rdunlock();
             if (ret < 0) {
                 return ret;
             }
index 28772be73f0554460ca495d52d9efa9931923d5b..978c8cff9e334af4862c506bb84694c3adeb10d0 100644 (file)
@@ -626,7 +626,7 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     return -error;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -647,7 +647,7 @@ blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                     QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -668,7 +668,7 @@ blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn blkdebug_co_flush(BlockDriverState *bs)
+static int GRAPH_RDLOCK coroutine_fn blkdebug_co_flush(BlockDriverState *bs)
 {
     int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH);
 
@@ -679,9 +679,9 @@ static int coroutine_fn blkdebug_co_flush(BlockDriverState *bs)
     return bdrv_co_flush(bs->file->bs);
 }
 
-static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int64_t bytes,
-                                                  BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkdebug_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 {
     uint32_t align = MAX(bs->bl.request_alignment,
                          bs->bl.pwrite_zeroes_alignment);
@@ -712,8 +712,8 @@ static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
-static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+blkdebug_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     uint32_t align = bs->bl.pdiscard_alignment;
     int err;
@@ -967,7 +967,8 @@ static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
     return false;
 }
 
-static int64_t coroutine_fn blkdebug_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+blkdebug_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }
index b00b8a6dd0de9519398592bc44312010dfd5dac0..3ea7141cb5cfc261b46de9a9ca15182c3f7103c8 100644 (file)
@@ -267,7 +267,8 @@ static void blk_log_writes_close(BlockDriverState *bs)
     s->log_file = NULL;
 }
 
-static int64_t coroutine_fn blk_log_writes_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }
@@ -294,7 +295,7 @@ static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp)
     bs->bl.request_alignment = s->sectorsize;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
                          QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -307,7 +308,7 @@ typedef struct BlkLogWritesFileReq {
     uint64_t bytes;
     int file_flags;
     QEMUIOVector *qiov;
-    int (*func)(struct BlkLogWritesFileReq *r);
+    int GRAPH_RDLOCK_PTR (*func)(struct BlkLogWritesFileReq *r);
     int file_ret;
 } BlkLogWritesFileReq;
 
@@ -319,7 +320,8 @@ typedef struct {
     int log_ret;
 } BlkLogWritesLogReq;
 
-static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
+static void coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
 {
     BDRVBlkLogWritesState *s = lr->bs->opaque;
     uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits;
@@ -368,15 +370,16 @@ static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
     }
 }
 
-static void coroutine_fn blk_log_writes_co_do_file(BlkLogWritesFileReq *fr)
+static void coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_do_file(BlkLogWritesFileReq *fr)
 {
     fr->file_ret = fr->func(fr);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                       QEMUIOVector *qiov, int flags,
-                      int (*file_func)(BlkLogWritesFileReq *r),
+                      int /*GRAPH_RDLOCK*/ (*file_func)(BlkLogWritesFileReq *r),
                       uint64_t entry_flags, bool is_zero_write)
 {
     QEMUIOVector log_qiov;
@@ -428,32 +431,33 @@ blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     return fr.file_ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_do_file_pwritev(BlkLogWritesFileReq *fr)
 {
     return bdrv_co_pwritev(fr->bs->file, fr->offset, fr->bytes,
                            fr->qiov, fr->file_flags);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_do_file_pwrite_zeroes(BlkLogWritesFileReq *fr)
 {
     return bdrv_co_pwrite_zeroes(fr->bs->file, fr->offset, fr->bytes,
                                  fr->file_flags);
 }
 
-static int coroutine_fn blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr)
+static int coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr)
 {
     return bdrv_co_flush(fr->bs->file->bs);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr)
 {
     return bdrv_co_pdiscard(fr->bs->file, fr->offset, fr->bytes);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                           QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -461,7 +465,7 @@ blk_log_writes_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                                  blk_log_writes_co_do_file_pwritev, 0, false);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
                                 int64_t bytes, BdrvRequestFlags flags)
 {
@@ -470,14 +474,15 @@ blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
                                  true);
 }
 
-static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_flush_to_disk(BlockDriverState *bs)
 {
     return blk_log_writes_co_log(bs, 0, 0, NULL, 0,
                                  blk_log_writes_co_do_file_flush,
                                  LOG_FLUSH_FLAG, false);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return blk_log_writes_co_log(bs, offset, bytes, NULL, 0,
index 16543f585ad02264c414a38c8023a24c8cd3b446..04f53eea417b3d977df2baf5d9f778cc6b901343 100644 (file)
@@ -40,7 +40,8 @@ fail:
     return ret;
 }
 
-static int64_t coroutine_fn blkreplay_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+blkreplay_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }
@@ -69,8 +70,9 @@ static void block_request_create(uint64_t reqid, BlockDriverState *bs,
     replay_block_event(req->bh, reqid);
 }
 
-static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkreplay_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
@@ -80,8 +82,9 @@ static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkreplay_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@@ -91,8 +94,9 @@ static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkreplay_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                           BdrvRequestFlags flags)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
@@ -102,8 +106,8 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
-                                              int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+blkreplay_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pdiscard(bs->file, offset, bytes);
@@ -113,7 +117,7 @@ static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK blkreplay_co_flush(BlockDriverState *bs)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_flush(bs->file->bs);
index edf1a550f297818fc1b86e82156761c73fc10b29..1c16f86b2e702330375adb893ead5489cfb84406 100644 (file)
@@ -155,7 +155,8 @@ static void blkverify_close(BlockDriverState *bs)
     s->test_file = NULL;
 }
 
-static int64_t coroutine_fn blkverify_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+blkverify_co_getlength(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
@@ -256,7 +257,7 @@ blkverify_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
     return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true);
 }
 
-static int coroutine_fn blkverify_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK blkverify_co_flush(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
index ef512f7c4837015ddd6177b0680ef2412cc77c92..278b04ce69ab7343cdc6f453daaf90473ef926a0 100644 (file)
@@ -1235,8 +1235,8 @@ void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
     blk->disable_request_queuing = disable;
 }
 
-static coroutine_fn int blk_check_byte_request(BlockBackend *blk,
-                                               int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+blk_check_byte_request(BlockBackend *blk, int64_t offset, int64_t bytes)
 {
     int64_t len;
 
@@ -1244,7 +1244,7 @@ static coroutine_fn int blk_check_byte_request(BlockBackend *blk,
         return -EIO;
     }
 
-    if (!blk_is_available(blk)) {
+    if (!blk_co_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -1289,6 +1289,7 @@ blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes,
     IO_CODE();
 
     blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
     /* Call blk_bs() only after waiting, the graph may have changed */
     bs = blk_bs(blk);
@@ -1363,6 +1364,7 @@ blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
     IO_CODE();
 
     blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
     /* Call blk_bs() only after waiting, the graph may have changed */
     bs = blk_bs(blk);
@@ -1431,6 +1433,7 @@ int coroutine_fn blk_co_block_status_above(BlockBackend *blk,
                                            BlockDriverState **file)
 {
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
     return bdrv_co_block_status_above(blk_bs(blk), base, offset, bytes, pnum,
                                       map, file);
 }
@@ -1441,6 +1444,7 @@ int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk,
                                            int64_t bytes, int64_t *pnum)
 {
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
     return bdrv_co_is_allocated_above(blk_bs(blk), base, include_base, offset,
                                       bytes, pnum);
 }
@@ -1602,8 +1606,9 @@ BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
 int64_t coroutine_fn blk_co_getlength(BlockBackend *blk)
 {
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
-    if (!blk_is_available(blk)) {
+    if (!blk_co_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -1623,8 +1628,9 @@ void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
 int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk)
 {
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
-    if (!blk_is_available(blk)) {
+    if (!blk_co_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -1670,8 +1676,9 @@ blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
     IO_CODE();
 
     blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
-    if (!blk_is_available(blk)) {
+    if (!blk_co_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -1716,6 +1723,7 @@ blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
     IO_CODE();
 
     blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
     ret = blk_check_byte_request(blk, offset, bytes);
     if (ret < 0) {
@@ -1759,10 +1767,11 @@ int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
 static int coroutine_fn blk_co_do_flush(BlockBackend *blk)
 {
-    blk_wait_while_drained(blk);
     IO_CODE();
+    blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
-    if (!blk_is_available(blk)) {
+    if (!blk_co_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -1989,20 +1998,22 @@ bool coroutine_fn blk_co_is_inserted(BlockBackend *blk)
 {
     BlockDriverState *bs = blk_bs(blk);
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     return bs && bdrv_co_is_inserted(bs);
 }
 
-bool blk_is_available(BlockBackend *blk)
+bool coroutine_fn blk_co_is_available(BlockBackend *blk)
 {
     IO_CODE();
-    return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
+    return blk_co_is_inserted(blk) && !blk_dev_is_tray_open(blk);
 }
 
 void coroutine_fn blk_co_lock_medium(BlockBackend *blk, bool locked)
 {
     BlockDriverState *bs = blk_bs(blk);
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     if (bs) {
         bdrv_co_lock_medium(bs, locked);
@@ -2014,6 +2025,7 @@ void coroutine_fn blk_co_eject(BlockBackend *blk, bool eject_flag)
     BlockDriverState *bs = blk_bs(blk);
     char *id;
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     if (bs) {
         bdrv_co_eject(bs, eject_flag);
@@ -2321,6 +2333,7 @@ void coroutine_fn blk_co_io_plug(BlockBackend *blk)
 {
     BlockDriverState *bs = blk_bs(blk);
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     if (bs) {
         bdrv_co_io_plug(bs);
@@ -2331,6 +2344,7 @@ void coroutine_fn blk_co_io_unplug(BlockBackend *blk)
 {
     BlockDriverState *bs = blk_bs(blk);
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     if (bs) {
         bdrv_co_io_unplug(bs);
@@ -2372,7 +2386,8 @@ int coroutine_fn blk_co_truncate(BlockBackend *blk, int64_t offset, bool exact,
                                  Error **errp)
 {
     IO_OR_GS_CODE();
-    if (!blk_is_available(blk)) {
+    GRAPH_RDLOCK_GUARD();
+    if (!blk_co_is_available(blk)) {
         error_setg(errp, "No medium inserted");
         return -ENOMEDIUM;
     }
@@ -2627,6 +2642,7 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
 {
     int r;
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     r = blk_check_byte_request(blk_in, off_in, bytes);
     if (r) {
@@ -2636,6 +2652,7 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
     if (r) {
         return r;
     }
+
     return bdrv_co_copy_range(blk_in->root, off_in,
                               blk_out->root, off_out,
                               bytes, read_flags, write_flags);
index 30a4da0f2e61cadd159cad082524d969659eb035..e13d7bc6b69fbe89049281c212f68629799fa2c7 100644 (file)
@@ -469,10 +469,9 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
  * value of @method should be used for subsequent tasks.
  * Returns 0 on success.
  */
-static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
-                                           int64_t offset, int64_t bytes,
-                                           BlockCopyMethod *method,
-                                           bool *error_is_read)
+static int coroutine_fn GRAPH_RDLOCK
+block_copy_do_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
+                   BlockCopyMethod *method, bool *error_is_read)
 {
     int ret;
     int64_t nbytes = MIN(offset + bytes, s->len) - offset;
@@ -558,8 +557,10 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
     BlockCopyMethod method = t->method;
     int ret;
 
-    ret = block_copy_do_copy(s, t->req.offset, t->req.bytes, &method,
-                             &error_is_read);
+    WITH_GRAPH_RDLOCK_GUARD() {
+        ret = block_copy_do_copy(s, t->req.offset, t->req.bytes, &method,
+                                 &error_is_read);
+    }
 
     WITH_QEMU_LOCK_GUARD(&s->lock) {
         if (s->method == t->method) {
@@ -581,9 +582,9 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
     return ret;
 }
 
-static coroutine_fn int block_copy_block_status(BlockCopyState *s,
-                                                int64_t offset,
-                                                int64_t bytes, int64_t *pnum)
+static coroutine_fn GRAPH_RDLOCK
+int block_copy_block_status(BlockCopyState *s, int64_t offset, int64_t bytes,
+                            int64_t *pnum)
 {
     int64_t num;
     BlockDriverState *base;
@@ -618,9 +619,9 @@ static coroutine_fn int block_copy_block_status(BlockCopyState *s,
  * Check if the cluster starting at offset is allocated or not.
  * return via pnum the number of contiguous clusters sharing this allocation.
  */
-static int coroutine_fn block_copy_is_cluster_allocated(BlockCopyState *s,
-                                                        int64_t offset,
-                                                        int64_t *pnum)
+static int coroutine_fn GRAPH_RDLOCK
+block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
+                                int64_t *pnum)
 {
     BlockDriverState *bs = s->source->bs;
     int64_t count, total_count = 0;
@@ -630,6 +631,7 @@ static int coroutine_fn block_copy_is_cluster_allocated(BlockCopyState *s,
     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
 
     while (true) {
+        /* protected in backup_run() */
         ret = bdrv_co_is_allocated(bs, offset, bytes, &count);
         if (ret < 0) {
             return ret;
@@ -704,7 +706,7 @@ int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s,
  * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty
  * clusters found and -errno on failure.
  */
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 block_copy_dirty_clusters(BlockCopyCallState *call_state)
 {
     BlockCopyState *s = call_state->s;
@@ -827,7 +829,8 @@ void block_copy_kick(BlockCopyCallState *call_state)
  * it means that some I/O operation failed in context of _this_ block_copy call,
  * not some parallel operation.
  */
-static int coroutine_fn block_copy_common(BlockCopyCallState *call_state)
+static int coroutine_fn GRAPH_RDLOCK
+block_copy_common(BlockCopyCallState *call_state)
 {
     int ret;
     BlockCopyState *s = call_state->s;
@@ -892,6 +895,7 @@ static int coroutine_fn block_copy_common(BlockCopyCallState *call_state)
 
 static void coroutine_fn block_copy_async_co_entry(void *opaque)
 {
+    GRAPH_RDLOCK_GUARD();
     block_copy_common(opaque);
 }
 
index 46e79583165d700d7f068629196a37a3f4f632fc..2f5ae52c908d873770782886beb8c3fde59f645b 100644 (file)
@@ -237,7 +237,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
     return bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 bochs_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
                 QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
index 41e3599281aa659915a611c1b775b3ab92d5aefb..2b20fd0fd4d2c56a9adc3ada7b91a0a944894c54 100644 (file)
@@ -18,7 +18,6 @@
 #include "block/block_int.h"
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "qemu/memalign.h"
 #include "sysemu/block-backend.h"
@@ -207,8 +206,9 @@ static const BlockJobDriver commit_job_driver = {
     },
 };
 
-static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_commit_top_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }
index c9fb809ba0f5c3a6dd1542cc88730f7704479df1..646d8227a4616edb896b010ee3687f19df9ce37d 100644 (file)
@@ -78,9 +78,9 @@ typedef struct BDRVCopyBeforeWriteState {
     int snapshot_error;
 } BDRVCopyBeforeWriteState;
 
-static coroutine_fn int cbw_co_preadv(
-        BlockDriverState *bs, int64_t offset, int64_t bytes,
-        QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
@@ -149,8 +149,8 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
     return 0;
 }
 
-static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     int ret = cbw_do_copy_before_write(bs, offset, bytes, 0);
     if (ret < 0) {
@@ -160,8 +160,9 @@ static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs,
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
-static int coroutine_fn cbw_co_pwrite_zeroes(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     BdrvRequestFlags flags)
 {
     int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
     if (ret < 0) {
@@ -171,11 +172,9 @@ static int coroutine_fn cbw_co_pwrite_zeroes(BlockDriverState *bs,
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
-static coroutine_fn int cbw_co_pwritev(BlockDriverState *bs,
-                                       int64_t offset,
-                                       int64_t bytes,
-                                       QEMUIOVector *qiov,
-                                       BdrvRequestFlags flags)
+static coroutine_fn GRAPH_RDLOCK
+int cbw_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
     if (ret < 0) {
@@ -185,7 +184,7 @@ static coroutine_fn int cbw_co_pwritev(BlockDriverState *bs,
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn cbw_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK cbw_co_flush(BlockDriverState *bs)
 {
     if (!bs->file) {
         return 0;
@@ -257,7 +256,7 @@ cbw_snapshot_read_unlock(BlockDriverState *bs, BlockReq *req)
     g_free(req);
 }
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes,
                        QEMUIOVector *qiov, size_t qiov_offset)
 {
@@ -289,7 +288,7 @@ cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes,
     return 0;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 cbw_co_snapshot_block_status(BlockDriverState *bs,
                              bool want_zero, int64_t offset, int64_t bytes,
                              int64_t *pnum, int64_t *map,
@@ -322,8 +321,8 @@ cbw_co_snapshot_block_status(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn cbw_co_pdiscard_snapshot(BlockDriverState *bs,
-                                                 int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     BDRVCopyBeforeWriteState *s = bs->opaque;
 
index 3280eb2febffe4a79778cfccdc145a62da563dbd..cc0f848b0f108f0b2ca7a419bd7238bf9577b220 100644 (file)
@@ -121,17 +121,16 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild *c,
 }
 
 
-static int64_t coroutine_fn cor_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK cor_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }
 
 
-static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
-                                           int64_t offset, int64_t bytes,
-                                           QEMUIOVector *qiov,
-                                           size_t qiov_offset,
-                                           BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, size_t qiov_offset,
+                   BdrvRequestFlags flags)
 {
     int64_t n;
     int local_flags;
@@ -180,50 +179,49 @@ static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
 }
 
 
-static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
-                                            int64_t offset,
-                                            int64_t bytes,
-                                            QEMUIOVector *qiov,
-                                            size_t qiov_offset,
-                                            BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, size_t qiov_offset,
+                    BdrvRequestFlags flags)
 {
     return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
                                 flags);
 }
 
 
-static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes,
-                                             BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     BdrvRequestFlags flags)
 {
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
 
-static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
 
-static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs,
-                                                  int64_t offset,
-                                                  int64_t bytes,
-                                                  QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          QEMUIOVector *qiov)
 {
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
                            BDRV_REQ_WRITE_COMPRESSED);
 }
 
 
-static void coroutine_fn cor_co_eject(BlockDriverState *bs, bool eject_flag)
+static void coroutine_fn GRAPH_RDLOCK
+cor_co_eject(BlockDriverState *bs, bool eject_flag)
 {
     bdrv_co_eject(bs->file->bs, eject_flag);
 }
 
 
-static void coroutine_fn cor_co_lock_medium(BlockDriverState *bs, bool locked)
+static void coroutine_fn GRAPH_RDLOCK
+cor_co_lock_medium(BlockDriverState *bs, bool locked)
 {
     bdrv_co_lock_medium(bs->file->bs, locked);
 }
index 2a1e0b3c9d529e30b3068529c0f2bc8823946784..dd9f3d449bd52e2fb0f757b284e7f13bb5c59d86 100644 (file)
@@ -43,7 +43,7 @@ bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
 int coroutine_fn GRAPH_RDLOCK
 bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
 
-int coroutine_fn
+int coroutine_fn GRAPH_RDLOCK
 bdrv_co_common_block_status_above(BlockDriverState *bs,
                                   BlockDriverState *base,
                                   bool include_base,
index 4df43f11f4520052847974eaff2bbd5228aa6b5f..bf67b9947cb1697e3d761b858664cd16a8973eca 100644 (file)
@@ -43,6 +43,7 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp)
     int ret;
 
     GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     job_progress_set_remaining(&s->common, 1);
     ret = s->drv->bdrv_co_create(s->opts, errp);
@@ -59,6 +60,12 @@ static const JobDriver blockdev_create_job_driver = {
     .run           = blockdev_create_run,
 };
 
+/* Checking whether the function is present doesn't require the graph lock */
+static inline bool TSA_NO_TSA has_bdrv_co_create(BlockDriver *drv)
+{
+    return drv->bdrv_co_create;
+}
+
 void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options,
                          Error **errp)
 {
@@ -79,7 +86,7 @@ void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options,
     }
 
     /* Error out if the driver doesn't support .bdrv_co_create */
-    if (!drv->bdrv_co_create) {
+    if (!has_bdrv_co_create(drv)) {
         error_setg(errp, "Driver does not support blockdev-create");
         return;
     }
index b70cec97c72b31e94d045b2f9462eb8997ae0b8f..ca67289187eedecad7fcd96204d475bd069853e6 100644 (file)
@@ -314,19 +314,18 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
 }
 
 
-static int block_crypto_co_create_generic(BlockDriverState *bs,
-                                          int64_t size,
-                                          QCryptoBlockCreateOptions *opts,
-                                          PreallocMode prealloc,
-                                          Error **errp)
+static int coroutine_fn
+block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
+                               QCryptoBlockCreateOptions *opts,
+                               PreallocMode prealloc, Error **errp)
 {
     int ret;
     BlockBackend *blk;
     QCryptoBlock *crypto = NULL;
     struct BlockCryptoCreateData data;
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto cleanup;
@@ -360,7 +359,7 @@ static int block_crypto_co_create_generic(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
                          PreallocMode prealloc, BdrvRequestFlags flags,
                          Error **errp)
@@ -398,7 +397,7 @@ static int block_crypto_reopen_prepare(BDRVReopenState *state,
  */
 #define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024)
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
                        QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -460,7 +459,7 @@ block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
 }
 
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 block_crypto_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                         QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -531,7 +530,8 @@ static void block_crypto_refresh_limits(BlockDriverState *bs, Error **errp)
 }
 
 
-static int64_t coroutine_fn block_crypto_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+block_crypto_co_getlength(BlockDriverState *bs)
 {
     BlockCrypto *crypto = bs->opaque;
     int64_t len = bdrv_co_getlength(bs->file->bs);
@@ -639,7 +639,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
     assert(create_options->driver == BLOCKDEV_DRIVER_LUKS);
     luks_opts = &create_options->u.luks;
 
-    bs = bdrv_open_blockdev_ref(luks_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(luks_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
@@ -665,10 +665,9 @@ fail:
     return ret;
 }
 
-static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv,
-                                                         const char *filename,
-                                                         QemuOpts *opts,
-                                                         Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+block_crypto_co_create_opts_luks(BlockDriver *drv, const char *filename,
+                                 QemuOpts *opts, Error **errp)
 {
     QCryptoBlockCreateOptions *create_opts = NULL;
     BlockDriverState *bs = NULL;
@@ -708,8 +707,8 @@ static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv,
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (!bs) {
         ret = -EINVAL;
         goto fail;
index cbada22e9e80f6fce07b75683549a6a570ac8a32..8bb39a134e4bfe668c80b16fb33afe9861822281 100644 (file)
 
 // #define DEBUG_VERBOSE
 
+/* CURL 7.85.0 switches to a string based API for specifying
+ * the desired protocols.
+ */
+#if LIBCURL_VERSION_NUM >= 0x075500
+#define PROTOCOLS "HTTP,HTTPS,FTP,FTPS"
+#else
 #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
                    CURLPROTO_FTP | CURLPROTO_FTPS)
+#endif
 
 #define CURL_NUM_STATES 8
 #define CURL_NUM_ACB    8
@@ -510,9 +517,18 @@ static int curl_init_state(BDRVCURLState *s, CURLState *state)
          * obscure protocols.  For example, do not allow POP3/SMTP/IMAP see
          * CVE-2013-0249.
          *
-         * Restricting protocols is only supported from 7.19.4 upwards.
+         * Restricting protocols is only supported from 7.19.4 upwards. Note:
+         * version 7.85.0 deprecates CURLOPT_*PROTOCOLS in favour of a string
+         * based CURLOPT_*PROTOCOLS_STR API.
          */
-#if LIBCURL_VERSION_NUM >= 0x071304
+#if LIBCURL_VERSION_NUM >= 0x075500
+        if (curl_easy_setopt(state->curl,
+                             CURLOPT_PROTOCOLS_STR, PROTOCOLS) ||
+            curl_easy_setopt(state->curl,
+                             CURLOPT_REDIR_PROTOCOLS_STR, PROTOCOLS)) {
+            goto err;
+        }
+#elif LIBCURL_VERSION_NUM >= 0x071304
         if (curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS) ||
             curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS)) {
             goto err;
@@ -670,7 +686,12 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     const char *file;
     const char *cookie;
     const char *cookie_secret;
-    double d;
+    /* CURL >= 7.55.0 uses curl_off_t for content length instead of a double */
+#if LIBCURL_VERSION_NUM >= 0x073700
+    curl_off_t cl;
+#else
+    double cl;
+#endif
     const char *secretid;
     const char *protocol_delimiter;
     int ret;
@@ -797,27 +818,36 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     }
     if (curl_easy_perform(state->curl))
         goto out;
-    if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d)) {
+    /* CURL 7.55.0 deprecates CURLINFO_CONTENT_LENGTH_DOWNLOAD in favour of
+     * the *_T version which returns a more sensible type for content length.
+     */
+#if LIBCURL_VERSION_NUM >= 0x073700
+    if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &cl)) {
         goto out;
     }
+#else
+    if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl)) {
+        goto out;
+    }
+#endif
     /* Prior CURL 7.19.4 return value of 0 could mean that the file size is not
      * know or the size is zero. From 7.19.4 CURL returns -1 if size is not
      * known and zero if it is really zero-length file. */
 #if LIBCURL_VERSION_NUM >= 0x071304
-    if (d < 0) {
+    if (cl < 0) {
         pstrcpy(state->errmsg, CURL_ERROR_SIZE,
                 "Server didn't report file size.");
         goto out;
     }
 #else
-    if (d <= 0) {
+    if (cl <= 0) {
         pstrcpy(state->errmsg, CURL_ERROR_SIZE,
                 "Unknown file size or zero-length file.");
         goto out;
     }
 #endif
 
-    s->len = d;
+    s->len = cl;
 
     if ((!strncasecmp(s->url, "http://", strlen("http://"))
         || !strncasecmp(s->url, "https://", strlen("https://")))
@@ -850,8 +880,10 @@ out_noclean:
     g_free(s->username);
     g_free(s->proxyusername);
     g_free(s->proxypassword);
-    curl_drop_all_sockets(s->sockets);
-    g_hash_table_destroy(s->sockets);
+    if (s->sockets) {
+        curl_drop_all_sockets(s->sockets);
+        g_hash_table_destroy(s->sockets);
+    }
     qemu_opts_del(opts);
     return -EINVAL;
 }
index 1e7aee40107c817a851a8db0a4d06202e80a4e78..13a1979755d8a52b2369a6f3bdd1145a7bdeae4f 100644 (file)
@@ -394,6 +394,7 @@ int coroutine_fn
 bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
                                        Error **errp)
 {
+    assert_bdrv_graph_readable();
     if (bs->drv && bs->drv->bdrv_co_remove_persistent_dirty_bitmap) {
         return bs->drv->bdrv_co_remove_persistent_dirty_bitmap(bs, name, errp);
     }
@@ -415,6 +416,7 @@ bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
                                    uint32_t granularity, Error **errp)
 {
     BlockDriver *drv = bs->drv;
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         error_setg_errno(errp, ENOMEDIUM,
index d3073a7caa9e8a433bcead9a6362247fe6a4fad1..5760cf22d17d4d884615a88e60e37a5cd6d05757 100644 (file)
@@ -1738,7 +1738,7 @@ static int handle_aiocb_write_zeroes(void *opaque)
 #ifdef CONFIG_FALLOCATE
     /* Last resort: we are trying to extend the file with zeroed data. This
      * can be done via fallocate(fd, 0) */
-    len = bdrv_getlength(aiocb->bs);
+    len = raw_co_getlength(aiocb->bs);
     if (s->has_fallocate && len >= 0 && aiocb->aio_offset >= len) {
         int ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
         if (ret == 0 || ret != -ENOTSUP) {
@@ -2607,10 +2607,9 @@ out:
     return result;
 }
 
-static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions options;
     int64_t total_size = 0;
@@ -2920,8 +2919,8 @@ static void coroutine_fn check_cache_dropped(BlockDriverState *bs, Error **errp)
 }
 #endif /* __linux__ */
 
-static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs,
-                                                 Error **errp)
+static void coroutine_fn GRAPH_RDLOCK
+raw_co_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
     int ret;
@@ -3272,7 +3271,7 @@ static void raw_abort_perm_update(BlockDriverState *bs)
     raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
 }
 
-static int coroutine_fn raw_co_copy_range_from(
+static int coroutine_fn GRAPH_RDLOCK raw_co_copy_range_from(
         BlockDriverState *bs, BdrvChild *src, int64_t src_offset,
         BdrvChild *dst, int64_t dst_offset, int64_t bytes,
         BdrvRequestFlags read_flags, BdrvRequestFlags write_flags)
@@ -3281,14 +3280,12 @@ static int coroutine_fn raw_co_copy_range_from(
                                  read_flags, write_flags);
 }
 
-static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
-                                             BdrvChild *src,
-                                             int64_t src_offset,
-                                             BdrvChild *dst,
-                                             int64_t dst_offset,
-                                             int64_t bytes,
-                                             BdrvRequestFlags read_flags,
-                                             BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_copy_range_to(BlockDriverState *bs,
+                     BdrvChild *src, int64_t src_offset,
+                     BdrvChild *dst, int64_t dst_offset,
+                     int64_t bytes, BdrvRequestFlags read_flags,
+                     BdrvRequestFlags write_flags)
 {
     RawPosixAIOData acb;
     BDRVRawState *s = bs->opaque;
index 200d244116428076dcfba9bdbbddf5a45def7f07..c7d0b8530637f31a657f4e35d446e9b99c50f1e2 100644 (file)
@@ -613,10 +613,9 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp)
     return 0;
 }
 
-static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions options;
     int64_t total_size = 0;
index 2e2a65966cfc227d2d0d05408fa16b33d205ba1d..ac285f4b665776f195f6c7213a53f88bcccfce55 100644 (file)
@@ -55,45 +55,43 @@ static int compress_open(BlockDriverState *bs, QDict *options, int flags,
 }
 
 
-static int64_t coroutine_fn compress_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+compress_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }
 
 
-static int coroutine_fn compress_co_preadv_part(BlockDriverState *bs,
-                                                int64_t offset, int64_t bytes,
-                                                QEMUIOVector *qiov,
-                                                size_t qiov_offset,
-                                                BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+compress_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                        QEMUIOVector *qiov, size_t qiov_offset,
+                        BdrvRequestFlags flags)
 {
     return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
                                flags);
 }
 
 
-static int coroutine_fn compress_co_pwritev_part(BlockDriverState *bs,
-                                                 int64_t offset,
-                                                 int64_t bytes,
-                                                 QEMUIOVector *qiov,
-                                                 size_t qiov_offset,
-                                                 BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+compress_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                         QEMUIOVector *qiov, size_t qiov_offset,
+                         BdrvRequestFlags flags)
 {
     return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
                                 flags | BDRV_REQ_WRITE_COMPRESSED);
 }
 
 
-static int coroutine_fn compress_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int64_t bytes,
-                                                  BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+compress_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 {
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
 
-static int coroutine_fn compress_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+compress_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
@@ -117,14 +115,14 @@ static void compress_refresh_limits(BlockDriverState *bs, Error **errp)
 }
 
 
-static void coroutine_fn
+static void coroutine_fn GRAPH_RDLOCK
 compress_co_eject(BlockDriverState *bs, bool eject_flag)
 {
     bdrv_co_eject(bs->file->bs, eject_flag);
 }
 
 
-static void coroutine_fn
+static void coroutine_fn GRAPH_RDLOCK
 compress_co_lock_medium(BlockDriverState *bs, bool locked)
 {
     bdrv_co_lock_medium(bs->file->bs, locked);
index 2dc0c13e41936b49651fcc0e929e93bf1f228e36..8974d46941b55b2a2bfec569bd099f40a0acf174 100644 (file)
@@ -160,6 +160,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
     bool have_limits;
 
     GLOBAL_STATE_CODE();
+    assume_graph_lock(); /* FIXME */
 
     if (tran) {
         BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
@@ -932,6 +933,7 @@ int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
 {
     int ret;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     ret = bdrv_co_pwrite(child, offset, bytes, buf, flags);
     if (ret < 0) {
@@ -959,16 +961,16 @@ static void bdrv_co_io_em_complete(void *opaque, int ret)
     aio_co_wake(co->coroutine);
 }
 
-static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
-                                           int64_t offset, int64_t bytes,
-                                           QEMUIOVector *qiov,
-                                           size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_driver_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
     BlockDriver *drv = bs->drv;
     int64_t sector_num;
     unsigned int nb_sectors;
     QEMUIOVector local_qiov;
     int ret;
+    assert_bdrv_graph_readable();
 
     bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
     assert(!(flags & ~bs->supported_read_flags));
@@ -1028,11 +1030,10 @@ out:
     return ret;
 }
 
-static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
-                                            int64_t offset, int64_t bytes,
-                                            QEMUIOVector *qiov,
-                                            size_t qiov_offset,
-                                            BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_driver_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, size_t qiov_offset,
+                    BdrvRequestFlags flags)
 {
     BlockDriver *drv = bs->drv;
     bool emulate_fua = false;
@@ -1040,6 +1041,7 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
     unsigned int nb_sectors;
     QEMUIOVector local_qiov;
     int ret;
+    assert_bdrv_graph_readable();
 
     bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
 
@@ -1110,7 +1112,7 @@ emulate_flags:
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
                                int64_t bytes, QEMUIOVector *qiov,
                                size_t qiov_offset)
@@ -1118,6 +1120,7 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
     BlockDriver *drv = bs->drv;
     QEMUIOVector local_qiov;
     int ret;
+    assert_bdrv_graph_readable();
 
     bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
 
@@ -1145,9 +1148,9 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
     return ret;
 }
 
-static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
-        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
-        size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
+                         QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
     BlockDriverState *bs = child->bs;
 
@@ -1309,9 +1312,10 @@ err:
  * handles copy on read, zeroing after EOF, and fragmentation of large
  * reads; any other features must be implemented by the caller.
  */
-static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
-    BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
-    int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req,
+                    int64_t offset, int64_t bytes, int64_t align,
+                    QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
     BlockDriverState *bs = child->bs;
     int64_t total_bytes, max_bytes;
@@ -1478,10 +1482,9 @@ static bool bdrv_init_padding(BlockDriverState *bs,
     return true;
 }
 
-static coroutine_fn int bdrv_padding_rmw_read(BdrvChild *child,
-                                              BdrvTrackedRequest *req,
-                                              BdrvRequestPadding *pad,
-                                              bool zero_middle)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_padding_rmw_read(BdrvChild *child, BdrvTrackedRequest *req,
+                      BdrvRequestPadding *pad, bool zero_middle)
 {
     QEMUIOVector local_qiov;
     BlockDriverState *bs = child->bs;
@@ -1669,8 +1672,9 @@ fail:
     return ret;
 }
 
-static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                         BdrvRequestFlags flags)
 {
     BlockDriver *drv = bs->drv;
     QEMUIOVector qiov;
@@ -1686,6 +1690,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
                         bs->bl.request_alignment);
     int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
 
+    assert_bdrv_graph_readable();
     bdrv_check_request(offset, bytes, &error_abort);
 
     if (!drv) {
@@ -1889,10 +1894,11 @@ bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
  * Forwards an already correctly aligned write request to the BlockDriver,
  * after possibly fragmenting it.
  */
-static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
-    BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
-    int64_t align, QEMUIOVector *qiov, size_t qiov_offset,
-    BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_aligned_pwritev(BdrvChild *child, BdrvTrackedRequest *req,
+                     int64_t offset, int64_t bytes, int64_t align,
+                     QEMUIOVector *qiov, size_t qiov_offset,
+                     BdrvRequestFlags flags)
 {
     BlockDriverState *bs = child->bs;
     BlockDriver *drv = bs->drv;
@@ -1926,6 +1932,9 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
         if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
             flags |= BDRV_REQ_MAY_UNMAP;
         }
+
+        /* Can't use optimization hint with bufferless zero write */
+        flags &= ~BDRV_REQ_REGISTERED_BUF;
     }
 
     if (ret < 0) {
@@ -1973,11 +1982,9 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
     return ret;
 }
 
-static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
-                                                int64_t offset,
-                                                int64_t bytes,
-                                                BdrvRequestFlags flags,
-                                                BdrvTrackedRequest *req)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
+                        BdrvRequestFlags flags, BdrvTrackedRequest *req)
 {
     BlockDriverState *bs = child->bs;
     QEMUIOVector local_qiov;
@@ -2150,6 +2157,7 @@ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
 {
     IO_CODE();
     trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
+    assert_bdrv_graph_readable();
 
     if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
         flags &= ~BDRV_REQ_MAY_UNMAP;
@@ -2221,11 +2229,10 @@ int bdrv_flush_all(void)
  * BDRV_BLOCK_OFFSET_VALID bit is set, 'map' and 'file' (if non-NULL) are
  * set to the host mapping and BDS corresponding to the guest offset.
  */
-static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
-                                             bool want_zero,
-                                             int64_t offset, int64_t bytes,
-                                             int64_t *pnum, int64_t *map,
-                                             BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_block_status(BlockDriverState *bs, bool want_zero,
+                     int64_t offset, int64_t bytes,
+                     int64_t *pnum, int64_t *map, BlockDriverState **file)
 {
     int64_t total_size;
     int64_t n; /* bytes */
@@ -2237,6 +2244,7 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
     bool has_filtered_child;
 
     assert(pnum);
+    assert_bdrv_graph_readable();
     *pnum = 0;
     total_size = bdrv_getlength(bs);
     if (total_size < 0) {
@@ -2467,6 +2475,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
     IO_CODE();
 
     assert(!include_base || base); /* Can't include NULL base */
+    assert_bdrv_graph_readable();
 
     if (!depth) {
         depth = &dummy;
@@ -2833,6 +2842,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
     int ret = 0;
     IO_CODE();
 
+    assert_bdrv_graph_readable();
     bdrv_inc_in_flight(bs);
 
     if (!bdrv_co_is_inserted(bs) || bdrv_is_read_only(bs) ||
@@ -2958,6 +2968,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
     int head, tail, align;
     BlockDriverState *bs = child->bs;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!bs || !bs->drv || !bdrv_co_is_inserted(bs)) {
         return -ENOMEDIUM;
@@ -3077,6 +3088,7 @@ int coroutine_fn bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
     };
     BlockAIOCB *acb;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     bdrv_inc_in_flight(bs);
     if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
@@ -3141,6 +3153,7 @@ void coroutine_fn bdrv_co_io_plug(BlockDriverState *bs)
 {
     BdrvChild *child;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     QLIST_FOREACH(child, &bs->children, next) {
         bdrv_co_io_plug(child->bs);
@@ -3158,6 +3171,7 @@ void coroutine_fn bdrv_co_io_unplug(BlockDriverState *bs)
 {
     BdrvChild *child;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     assert(bs->io_plugged);
     if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
@@ -3173,13 +3187,15 @@ void coroutine_fn bdrv_co_io_unplug(BlockDriverState *bs)
 }
 
 /* Helper that undoes bdrv_register_buf() when it fails partway through */
-static void bdrv_register_buf_rollback(BlockDriverState *bs,
-                                       void *host,
-                                       size_t size,
-                                       BdrvChild *final_child)
+static void GRAPH_RDLOCK
+bdrv_register_buf_rollback(BlockDriverState *bs, void *host, size_t size,
+                           BdrvChild *final_child)
 {
     BdrvChild *child;
 
+    GLOBAL_STATE_CODE();
+    assert_bdrv_graph_readable();
+
     QLIST_FOREACH(child, &bs->children, next) {
         if (child == final_child) {
             break;
@@ -3199,6 +3215,8 @@ bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size,
     BdrvChild *child;
 
     GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (bs->drv && bs->drv->bdrv_register_buf) {
         if (!bs->drv->bdrv_register_buf(bs, host, size, errp)) {
             return false;
@@ -3218,6 +3236,8 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size)
     BdrvChild *child;
 
     GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (bs->drv && bs->drv->bdrv_unregister_buf) {
         bs->drv->bdrv_unregister_buf(bs, host, size);
     }
@@ -3226,7 +3246,7 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size)
     }
 }
 
-static int coroutine_fn bdrv_co_copy_range_internal(
+static int coroutine_fn GRAPH_RDLOCK bdrv_co_copy_range_internal(
         BdrvChild *src, int64_t src_offset, BdrvChild *dst,
         int64_t dst_offset, int64_t bytes,
         BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
@@ -3234,6 +3254,7 @@ static int coroutine_fn bdrv_co_copy_range_internal(
 {
     BdrvTrackedRequest req;
     int ret;
+    assert_bdrv_graph_readable();
 
     /* TODO We can support BDRV_REQ_NO_FALLBACK here */
     assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
@@ -3315,6 +3336,7 @@ int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
                                          BdrvRequestFlags write_flags)
 {
     IO_CODE();
+    assert_bdrv_graph_readable();
     trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
                                   read_flags, write_flags);
     return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
@@ -3332,6 +3354,7 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
                                        BdrvRequestFlags write_flags)
 {
     IO_CODE();
+    assert_bdrv_graph_readable();
     trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
                                 read_flags, write_flags);
     return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
@@ -3344,6 +3367,8 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
                                     BdrvRequestFlags write_flags)
 {
     IO_CODE();
+    assert_bdrv_graph_readable();
+
     return bdrv_co_copy_range_from(src, src_offset,
                                    dst, dst_offset,
                                    bytes, read_flags, write_flags);
@@ -3377,6 +3402,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
     int64_t old_size, new_bytes;
     int ret;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
     if (!drv) {
@@ -3514,6 +3540,7 @@ bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes,
     BlockDriver *drv = bs->drv;
     int ret;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         return -ENOMEDIUM;
@@ -3539,6 +3566,7 @@ bdrv_co_snapshot_block_status(BlockDriverState *bs,
     BlockDriver *drv = bs->drv;
     int ret;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         return -ENOMEDIUM;
@@ -3562,6 +3590,7 @@ bdrv_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
     BlockDriver *drv = bs->drv;
     int ret;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         return -ENOMEDIUM;
index b3e10f40b649cf31eb1afdba339b664176c044d4..9fc0bed90b81ef1361542475174e048de53f2893 100644 (file)
@@ -269,6 +269,7 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
                 timer_mod(&iTask->retry_timer,
                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
                 iTask->do_retry = 1;
+                return;
             } else if (status == SCSI_STATUS_CHECK_CONDITION) {
                 int error = iscsi_translate_sense(&task->sense);
                 if (error == EAGAIN) {
@@ -1353,6 +1354,9 @@ static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
     } else if (!password) {
         error_setg(errp, "CHAP username specified but no password was given");
         return;
+    } else {
+        warn_report("iSCSI block driver 'password' option is deprecated, "
+                    "use 'password-secret' instead");
     }
 
     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
@@ -2186,14 +2190,12 @@ static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs,
     iscsi_allocmap_invalidate(iscsilun);
 }
 
-static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs,
-                                                 BdrvChild *src,
-                                                 int64_t src_offset,
-                                                 BdrvChild *dst,
-                                                 int64_t dst_offset,
-                                                 int64_t bytes,
-                                                 BdrvRequestFlags read_flags,
-                                                 BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+iscsi_co_copy_range_from(BlockDriverState *bs,
+                         BdrvChild *src, int64_t src_offset,
+                         BdrvChild *dst, int64_t dst_offset,
+                         int64_t bytes, BdrvRequestFlags read_flags,
+                         BdrvRequestFlags write_flags)
 {
     return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
                                  read_flags, write_flags);
@@ -2327,14 +2329,12 @@ static void iscsi_xcopy_data(struct iscsi_data *data,
                               src_lba, dst_lba);
 }
 
-static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs,
-                                               BdrvChild *src,
-                                               int64_t src_offset,
-                                               BdrvChild *dst,
-                                               int64_t dst_offset,
-                                               int64_t bytes,
-                                               BdrvRequestFlags read_flags,
-                                               BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+iscsi_co_copy_range_to(BlockDriverState *bs,
+                       BdrvChild *src, int64_t src_offset,
+                       BdrvChild *dst, int64_t dst_offset,
+                       int64_t bytes, BdrvRequestFlags read_flags,
+                       BdrvRequestFlags write_flags)
 {
     IscsiLun *dst_lun = dst->bs->opaque;
     IscsiLun *src_lun;
index 3662852dc25d1c20d48f603108c71b4b5ab69148..382bec0e7d57da9fdb44d16a34369d9b416e404f 100644 (file)
@@ -141,6 +141,7 @@ block_gen_c = custom_target('block-gen.c',
                                       '../include/block/dirty-bitmap.h',
                                       '../include/block/block_int-io.h',
                                       '../include/block/block-global-state.h',
+                                      '../include/sysemu/block-backend-global-state.h',
                                       '../include/sysemu/block-backend-io.h',
                                       'coroutines.h'
                                       ),
index ab326b67c977cf3b483804cf872493e95baaf48c..663e2b700241b637dc4b1f3d4fa1126395ef5ad5 100644 (file)
@@ -21,7 +21,6 @@
 #include "block/dirty-bitmap.h"
 #include "sysemu/block-backend.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
 #include "qemu/memalign.h"
@@ -390,8 +389,10 @@ static void coroutine_fn mirror_co_read(void *opaque)
     op->is_in_flight = true;
     trace_mirror_one_iteration(s, op->offset, op->bytes);
 
-    ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes,
-                         &op->qiov, 0);
+    WITH_GRAPH_RDLOCK_GUARD() {
+        ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes,
+                             &op->qiov, 0);
+    }
     mirror_read_complete(op, ret);
 }
 
@@ -558,9 +559,11 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         MirrorMethod mirror_method = MIRROR_METHOD_COPY;
 
         assert(!(offset % s->granularity));
-        ret = bdrv_block_status_above(source, NULL, offset,
-                                      nb_chunks * s->granularity,
-                                      &io_bytes, NULL, NULL);
+        WITH_GRAPH_RDLOCK_GUARD() {
+            ret = bdrv_block_status_above(source, NULL, offset,
+                                        nb_chunks * s->granularity,
+                                        &io_bytes, NULL, NULL);
+        }
         if (ret < 0) {
             io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes);
         } else if (ret & BDRV_BLOCK_DATA) {
@@ -863,8 +866,10 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
             return 0;
         }
 
-        ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes,
-                                      &count);
+        WITH_GRAPH_RDLOCK_GUARD() {
+            ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset,
+                                          bytes, &count);
+        }
         if (ret < 0) {
             return ret;
         }
@@ -896,6 +901,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
     BlockDriverState *bs = s->mirror_top_bs->backing->bs;
+    MirrorBDSOpaque *mirror_top_opaque = s->mirror_top_bs->opaque;
     BlockDriverState *target_bs = blk_bs(s->target);
     bool need_drain = true;
     BlockDeviceIoStatus iostatus;
@@ -910,7 +916,10 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
         goto immediate_exit;
     }
 
+    bdrv_graph_co_rdlock();
     s->bdev_length = bdrv_co_getlength(bs);
+    bdrv_graph_co_rdunlock();
+
     if (s->bdev_length < 0) {
         ret = s->bdev_length;
         goto immediate_exit;
@@ -985,6 +994,12 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
         }
     }
 
+    /*
+     * Only now the job is fully initialised and mirror_top_bs should start
+     * accessing it.
+     */
+    mirror_top_opaque->job = s;
+
     assert(!s->dbi);
     s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap);
     for (;;) {
@@ -1426,15 +1441,17 @@ static void coroutine_fn active_write_settle(MirrorOp *op)
     g_free(op);
 }
 
-static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs,
-    MirrorMethod method, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
-    int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_do_write(BlockDriverState *bs, MirrorMethod method,
+                         uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
+                         int flags)
 {
     MirrorOp *op = NULL;
     MirrorBDSOpaque *s = bs->opaque;
@@ -1483,8 +1500,9 @@ out:
     return ret;
 }
 
-static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                        QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     MirrorBDSOpaque *s = bs->opaque;
     QEMUIOVector bounce_qiov;
@@ -1524,7 +1542,7 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK bdrv_mirror_top_flush(BlockDriverState *bs)
 {
     if (bs->backing == NULL) {
         /* we can be here after failed bdrv_append in mirror_start_job */
@@ -1533,15 +1551,16 @@ static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
     return bdrv_co_flush(bs->backing->bs);
 }
 
-static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                              int64_t bytes, BdrvRequestFlags flags)
 {
     return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, offset, bytes, NULL,
                                     flags);
 }
 
-static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
-    int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes,
                                     NULL, 0);
@@ -1704,7 +1723,6 @@ static BlockJob *mirror_start_job(
     if (!s) {
         goto fail;
     }
-    bs_opaque->job = s;
 
     /* The block job now has a reference to this node */
     bdrv_unref(mirror_top_bs);
index bbea2f2221ffaf028a9a1604a82b8af113bffb3c..013684801a61af4cd7cab346fdd67a4bcea63ec2 100644 (file)
@@ -165,9 +165,9 @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
     return start_off;
 }
 
-static coroutine_fn int64_t allocate_clusters(BlockDriverState *bs,
-                                              int64_t sector_num,
-                                              int nb_sectors, int *pnum)
+static int64_t coroutine_fn GRAPH_RDLOCK
+allocate_clusters(BlockDriverState *bs, int64_t sector_num,
+                  int nb_sectors, int *pnum)
 {
     int ret = 0;
     BDRVParallelsState *s = bs->opaque;
@@ -261,7 +261,8 @@ static coroutine_fn int64_t allocate_clusters(BlockDriverState *bs,
 }
 
 
-static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_flush_to_os(BlockDriverState *bs)
 {
     BDRVParallelsState *s = bs->opaque;
     unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block);
@@ -320,9 +321,9 @@ static int coroutine_fn parallels_co_block_status(BlockDriverState *bs,
     return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
 }
 
-static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
-                                            int64_t sector_num, int nb_sectors,
-                                            QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                    QEMUIOVector *qiov, int flags)
 {
     BDRVParallelsState *s = bs->opaque;
     uint64_t bytes_done = 0;
@@ -363,8 +364,9 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
     return ret;
 }
 
-static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                   QEMUIOVector *qiov)
 {
     BDRVParallelsState *s = bs->opaque;
     uint64_t bytes_done = 0;
@@ -414,9 +416,9 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
 }
 
 
-static int coroutine_fn parallels_co_check(BlockDriverState *bs,
-                                           BdrvCheckResult *res,
-                                           BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
+                   BdrvCheckMode fix)
 {
     BDRVParallelsState *s = bs->opaque;
     int64_t size, prev_off, high_off;
@@ -565,13 +567,13 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(parallels_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(parallels_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto out;
@@ -620,10 +622,9 @@ exit:
     goto out;
 }
 
-static int coroutine_fn parallels_co_create_opts(BlockDriver *drv,
-                                                 const char *filename,
-                                                 QemuOpts *opts,
-                                                 Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_create_opts(BlockDriver *drv, const char *filename,
+                         QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     BlockDriverState *bs = NULL;
@@ -651,8 +652,8 @@ static int coroutine_fn parallels_co_create_opts(BlockDriver *drv,
         goto done;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto done;
index c0dcf8c346119c93e3c1b75cbaf1bab2e7a99212..71c360180945b57f44cd8392d61daee570098a7a 100644 (file)
@@ -226,16 +226,17 @@ static void preallocate_reopen_abort(BDRVReopenState *state)
     state->opaque = NULL;
 }
 
-static coroutine_fn int preallocate_co_preadv_part(
-        BlockDriverState *bs, int64_t offset, int64_t bytes,
-        QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                           QEMUIOVector *qiov, size_t qiov_offset,
+                           BdrvRequestFlags flags)
 {
     return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
                                flags);
 }
 
-static int coroutine_fn preallocate_co_pdiscard(BlockDriverState *bs,
-                                               int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
@@ -269,8 +270,9 @@ static bool has_prealloc_perms(BlockDriverState *bs)
  * want_merge_zero is used to merge write-zero request with preallocation in
  * one bdrv_co_pwrite_zeroes() call.
  */
-static bool coroutine_fn handle_write(BlockDriverState *bs, int64_t offset,
-                                      int64_t bytes, bool want_merge_zero)
+static bool coroutine_fn GRAPH_RDLOCK
+handle_write(BlockDriverState *bs, int64_t offset, int64_t bytes,
+             bool want_merge_zero)
 {
     BDRVPreallocateState *s = bs->opaque;
     int64_t end = offset + bytes;
@@ -345,8 +347,9 @@ static bool coroutine_fn handle_write(BlockDriverState *bs, int64_t offset,
     return want_merge_zero;
 }
 
-static int coroutine_fn preallocate_co_pwrite_zeroes(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                             int64_t bytes, BdrvRequestFlags flags)
 {
     bool want_merge_zero =
         !(flags & ~(BDRV_REQ_ZERO_WRITE | BDRV_REQ_NO_FALLBACK));
@@ -357,12 +360,10 @@ static int coroutine_fn preallocate_co_pwrite_zeroes(BlockDriverState *bs,
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
-static coroutine_fn int preallocate_co_pwritev_part(BlockDriverState *bs,
-                                                    int64_t offset,
-                                                    int64_t bytes,
-                                                    QEMUIOVector *qiov,
-                                                    size_t qiov_offset,
-                                                    BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                            QEMUIOVector *qiov, size_t qiov_offset,
+                            BdrvRequestFlags flags)
 {
     handle_write(bs, offset, bytes, false);
 
@@ -370,7 +371,7 @@ static coroutine_fn int preallocate_co_pwritev_part(BlockDriverState *bs,
                                 flags);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 preallocate_co_truncate(BlockDriverState *bs, int64_t offset,
                         bool exact, PreallocMode prealloc,
                         BdrvRequestFlags flags, Error **errp)
@@ -437,12 +438,13 @@ preallocate_co_truncate(BlockDriverState *bs, int64_t offset,
     return 0;
 }
 
-static int coroutine_fn preallocate_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK preallocate_co_flush(BlockDriverState *bs)
 {
     return bdrv_co_flush(bs->file->bs);
 }
 
-static int64_t coroutine_fn preallocate_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+preallocate_co_getlength(BlockDriverState *bs)
 {
     int64_t ret;
     BDRVPreallocateState *s = bs->opaque;
index 5f0801f545da78908b41838b37d8fa4347fc19cc..490e4f819ed15e01a681d70b01fc5349514a7246 100644 (file)
@@ -92,8 +92,8 @@ typedef struct BDRVQcowState {
 
 static QemuOptsList qcow_create_opts;
 
-static int coroutine_fn decompress_cluster(BlockDriverState *bs,
-                                           uint64_t cluster_offset);
+static int coroutine_fn GRAPH_RDLOCK
+decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
 
 static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
@@ -350,11 +350,10 @@ static int qcow_reopen_prepare(BDRVReopenState *state,
  * return 0 if not allocated, 1 if *result is assigned, and negative
  * errno on failure.
  */
-static int coroutine_fn get_cluster_offset(BlockDriverState *bs,
-                                           uint64_t offset, int allocate,
-                                           int compressed_size,
-                                           int n_start, int n_end,
-                                           uint64_t *result)
+static int coroutine_fn GRAPH_RDLOCK
+get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate,
+                   int compressed_size, int n_start, int n_end,
+                   uint64_t *result)
 {
     BDRVQcowState *s = bs->opaque;
     int min_index, i, j, l1_index, l2_index, ret;
@@ -525,11 +524,10 @@ static int coroutine_fn get_cluster_offset(BlockDriverState *bs,
     return 1;
 }
 
-static int coroutine_fn qcow_co_block_status(BlockDriverState *bs,
-                                             bool want_zero,
-                                             int64_t offset, int64_t bytes,
-                                             int64_t *pnum, int64_t *map,
-                                             BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+qcow_co_block_status(BlockDriverState *bs, bool want_zero,
+                     int64_t offset, int64_t bytes, int64_t *pnum,
+                     int64_t *map, BlockDriverState **file)
 {
     BDRVQcowState *s = bs->opaque;
     int index_in_cluster, ret;
@@ -586,8 +584,8 @@ static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
     return 0;
 }
 
-static int coroutine_fn decompress_cluster(BlockDriverState *bs,
-                                           uint64_t cluster_offset)
+static int coroutine_fn GRAPH_RDLOCK
+decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
 {
     BDRVQcowState *s = bs->opaque;
     int ret, csize;
@@ -619,9 +617,9 @@ static void qcow_refresh_limits(BlockDriverState *bs, Error **errp)
     bs->bl.request_alignment = BDRV_SECTOR_SIZE;
 }
 
-static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, int64_t offset,
-                                       int64_t bytes, QEMUIOVector *qiov,
-                                       BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVQcowState *s = bs->opaque;
     int offset_in_cluster;
@@ -715,9 +713,9 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, int64_t offset,
     return ret;
 }
 
-static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, int64_t offset,
-                                        int64_t bytes, QEMUIOVector *qiov,
-                                        BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVQcowState *s = bs->opaque;
     int offset_in_cluster;
@@ -833,13 +831,13 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(qcow_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(qcow_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    qcow_blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE,
-                               BLK_PERM_ALL, errp);
+    qcow_blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE,
+                                  BLK_PERM_ALL, errp);
     if (!qcow_blk) {
         ret = -EPERM;
         goto exit;
@@ -923,9 +921,9 @@ exit:
     return ret;
 }
 
-static int coroutine_fn qcow_co_create_opts(BlockDriver *drv,
-                                            const char *filename,
-                                            QemuOpts *opts, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow_co_create_opts(BlockDriver *drv, const char *filename,
+                    QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     BlockDriverState *bs = NULL;
@@ -978,8 +976,8 @@ static int coroutine_fn qcow_co_create_opts(BlockDriver *drv,
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto fail;
@@ -1046,7 +1044,7 @@ static int qcow_make_empty(BlockDriverState *bs)
 
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 qcow_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
                            QEMUIOVector *qiov)
 {
index 870be106b6cd63bf1b4d689b9256527082fe4bde..a9e6622fe300c9ca9719436a647879024f668242 100644 (file)
@@ -491,10 +491,9 @@ static int count_contiguous_subclusters(BlockDriverState *bs, int nb_clusters,
     return count;
 }
 
-static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
-                                            uint64_t src_cluster_offset,
-                                            unsigned offset_in_cluster,
-                                            QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+do_perform_cow_read(BlockDriverState *bs, uint64_t src_cluster_offset,
+                    unsigned offset_in_cluster, QEMUIOVector *qiov)
 {
     int ret;
 
@@ -535,10 +534,9 @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
     return 0;
 }
 
-static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
-                                             uint64_t cluster_offset,
-                                             unsigned offset_in_cluster,
-                                             QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+do_perform_cow_write(BlockDriverState *bs, uint64_t cluster_offset,
+                     unsigned offset_in_cluster, QEMUIOVector *qiov)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret;
@@ -886,7 +884,8 @@ int coroutine_fn qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     return 0;
 }
 
-static int coroutine_fn perform_cow(BlockDriverState *bs, QCowL2Meta *m)
+static int coroutine_fn GRAPH_RDLOCK
+perform_cow(BlockDriverState *bs, QCowL2Meta *m)
 {
     BDRVQcow2State *s = bs->opaque;
     Qcow2COWRegion *start = &m->cow_start;
index 21aa4c6b7af8db631eb644db8dea526e0f4274c2..30fd53fa64bcd11eaddab8a53b558548b0c532fb 100644 (file)
@@ -601,9 +601,9 @@ static void qcow2_add_check_result(BdrvCheckResult *out,
     }
 }
 
-static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
-                                              BdrvCheckResult *result,
-                                              BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_check_locked(BlockDriverState *bs, BdrvCheckResult *result,
+                      BdrvCheckMode fix)
 {
     BdrvCheckResult snapshot_res = {};
     BdrvCheckResult refcount_res = {};
@@ -640,9 +640,9 @@ static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn qcow2_co_check(BlockDriverState *bs,
-                                       BdrvCheckResult *result,
-                                       BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_check(BlockDriverState *bs, BdrvCheckResult *result,
+               BdrvCheckMode fix)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret;
@@ -1294,9 +1294,9 @@ static int validate_compression_type(BDRVQcow2State *s, Error **errp)
 }
 
 /* Called with s->lock held.  */
-static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
-                                      int flags, bool open_data_file,
-                                      Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
+              bool open_data_file, Error **errp)
 {
     ERRP_GUARD();
     BDRVQcow2State *s = bs->opaque;
@@ -1617,9 +1617,9 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
 
     if (open_data_file) {
         /* Open external data file */
-        s->data_file = bdrv_open_child(NULL, options, "data-file", bs,
-                                       &child_of_bds, BDRV_CHILD_DATA,
-                                       true, errp);
+        s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs,
+                                          &child_of_bds, BDRV_CHILD_DATA,
+                                          true, errp);
         if (*errp) {
             ret = -EINVAL;
             goto fail;
@@ -1627,9 +1627,10 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
 
         if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
             if (!s->data_file && s->image_data_file) {
-                s->data_file = bdrv_open_child(s->image_data_file, options,
-                                               "data-file", bs, &child_of_bds,
-                                               BDRV_CHILD_DATA, false, errp);
+                s->data_file = bdrv_co_open_child(s->image_data_file, options,
+                                                  "data-file", bs,
+                                                  &child_of_bds,
+                                                  BDRV_CHILD_DATA, false, errp);
                 if (!s->data_file) {
                     ret = -EINVAL;
                     goto fail;
@@ -1889,6 +1890,8 @@ static void coroutine_fn qcow2_open_entry(void *opaque)
     QCow2OpenCo *qoc = opaque;
     BDRVQcow2State *s = qoc->bs->opaque;
 
+    assume_graph_lock(); /* FIXME */
+
     qemu_co_mutex_lock(&s->lock);
     qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, true,
                              qoc->errp);
@@ -2136,9 +2139,8 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
     return status;
 }
 
-static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
-                                            QCowL2Meta **pl2meta,
-                                            bool link_l2)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_handle_l2meta(BlockDriverState *bs, QCowL2Meta **pl2meta, bool link_l2)
 {
     int ret = 0;
     QCowL2Meta *l2meta = *pl2meta;
@@ -2169,7 +2171,7 @@ out:
     return ret;
 }
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_preadv_encrypted(BlockDriverState *bs,
                            uint64_t host_offset,
                            uint64_t offset,
@@ -2270,12 +2272,10 @@ static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
     return 0;
 }
 
-static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
-                                             QCow2SubclusterType subc_type,
-                                             uint64_t host_offset,
-                                             uint64_t offset, uint64_t bytes,
-                                             QEMUIOVector *qiov,
-                                             size_t qiov_offset)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_preadv_task(BlockDriverState *bs, QCow2SubclusterType subc_type,
+                     uint64_t host_offset, uint64_t offset, uint64_t bytes,
+                     QEMUIOVector *qiov, size_t qiov_offset)
 {
     BDRVQcow2State *s = bs->opaque;
 
@@ -2314,7 +2314,11 @@ static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
     g_assert_not_reached();
 }
 
-static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
+/*
+ * This function can count as GRAPH_RDLOCK because qcow2_co_preadv_part() holds
+ * the graph lock and keeps it until this coroutine has terminated.
+ */
+static int coroutine_fn GRAPH_RDLOCK qcow2_co_preadv_task_entry(AioTask *task)
 {
     Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
 
@@ -2325,11 +2329,10 @@ static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
                                 t->qiov, t->qiov_offset);
 }
 
-static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes,
-                                             QEMUIOVector *qiov,
-                                             size_t qiov_offset,
-                                             BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     QEMUIOVector *qiov, size_t qiov_offset,
+                     BdrvRequestFlags flags)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret = 0;
@@ -2449,7 +2452,8 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
  * Return 1 if the COW regions read as zeroes, 0 if not, < 0 on error.
  * Note that returning 0 does not guarantee non-zero data.
  */
-static int coroutine_fn is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
+static int coroutine_fn GRAPH_RDLOCK
+is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
 {
     /*
      * This check is designed for optimization shortcut so it must be
@@ -2467,8 +2471,8 @@ static int coroutine_fn is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
                                 m->cow_end.nb_bytes);
 }
 
-static int coroutine_fn handle_alloc_space(BlockDriverState *bs,
-                                           QCowL2Meta *l2meta)
+static int coroutine_fn GRAPH_RDLOCK
+handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
 {
     BDRVQcow2State *s = bs->opaque;
     QCowL2Meta *m;
@@ -2531,12 +2535,10 @@ static int coroutine_fn handle_alloc_space(BlockDriverState *bs,
  * l2meta  - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must
  *           not use it somehow after qcow2_co_pwritev_task() call
  */
-static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs,
-                                              uint64_t host_offset,
-                                              uint64_t offset, uint64_t bytes,
-                                              QEMUIOVector *qiov,
-                                              uint64_t qiov_offset,
-                                              QCowL2Meta *l2meta)
+static coroutine_fn GRAPH_RDLOCK
+int qcow2_co_pwritev_task(BlockDriverState *bs, uint64_t host_offset,
+                          uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
+                          uint64_t qiov_offset, QCowL2Meta *l2meta)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
@@ -2602,7 +2604,11 @@ out_locked:
     return ret;
 }
 
-static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
+/*
+ * This function can count as GRAPH_RDLOCK because qcow2_co_pwritev_part() holds
+ * the graph lock and keeps it until this coroutine has terminated.
+ */
+static coroutine_fn GRAPH_RDLOCK int qcow2_co_pwritev_task_entry(AioTask *task)
 {
     Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
 
@@ -2613,9 +2619,10 @@ static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
                                  t->l2meta);
 }
 
-static coroutine_fn int qcow2_co_pwritev_part(
-        BlockDriverState *bs, int64_t offset, int64_t bytes,
-        QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                      QEMUIOVector *qiov, size_t qiov_offset,
+                      BdrvRequestFlags flags)
 {
     BDRVQcow2State *s = bs->opaque;
     int offset_in_cluster;
@@ -2770,8 +2777,8 @@ static void qcow2_close(BlockDriverState *bs)
     qcow2_do_close(bs, true);
 }
 
-static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs,
-                                                   Error **errp)
+static void coroutine_fn GRAPH_RDLOCK
+qcow2_co_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
     ERRP_GUARD();
     BDRVQcow2State *s = bs->opaque;
@@ -3182,9 +3189,9 @@ static int qcow2_set_up_encryption(BlockDriverState *bs,
  *
  * Returns: 0 on success, -errno on failure.
  */
-static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t new_length, PreallocMode mode,
-                                       Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co(BlockDriverState *bs, uint64_t offset, uint64_t new_length,
+               PreallocMode mode, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t bytes;
@@ -3454,7 +3461,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2);
     qcow2_opts = &create_options->u.qcow2;
 
-    bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(qcow2_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
@@ -3596,7 +3603,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
             ret = -EINVAL;
             goto out;
         }
-        data_bs = bdrv_open_blockdev_ref(qcow2_opts->data_file, errp);
+        data_bs = bdrv_co_open_blockdev_ref(qcow2_opts->data_file, errp);
         if (data_bs == NULL) {
             ret = -EIO;
             goto out;
@@ -3629,8 +3636,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     }
 
     /* Create BlockBackend to write to the image */
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto out;
@@ -3712,9 +3719,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     if (data_bs) {
         qdict_put_str(options, "data-file", data_bs->node_name);
     }
-    blk = blk_new_open(NULL, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
-                       errp);
+    blk = blk_co_new_open(NULL, NULL, options,
+                          BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
+                          errp);
     if (blk == NULL) {
         ret = -EIO;
         goto out;
@@ -3793,9 +3800,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     if (data_bs) {
         qdict_put_str(options, "data-file", data_bs->node_name);
     }
-    blk = blk_new_open(NULL, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
-                       errp);
+    blk = blk_co_new_open(NULL, NULL, options,
+                          BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
+                          errp);
     if (blk == NULL) {
         ret = -EIO;
         goto out;
@@ -3809,10 +3816,9 @@ out:
     return ret;
 }
 
-static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv,
-                                             const char *filename,
-                                             QemuOpts *opts,
-                                             Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
+                     Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     QDict *qdict;
@@ -3877,8 +3883,8 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv,
         goto finish;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto finish;
@@ -3892,9 +3898,9 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv,
             goto finish;
         }
 
-        data_bs = bdrv_open(val, NULL, NULL,
-                            BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                            errp);
+        data_bs = bdrv_co_open(val, NULL, NULL,
+                               BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                               errp);
         if (data_bs == NULL) {
             ret = -EIO;
             goto finish;
@@ -3973,8 +3979,9 @@ static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
     return res >= 0 && (res & BDRV_BLOCK_ZERO) && bytes == 0;
 }
 
-static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       BdrvRequestFlags flags)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
@@ -4057,7 +4064,7 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_copy_range_from(BlockDriverState *bs,
                          BdrvChild *src, int64_t src_offset,
                          BdrvChild *dst, int64_t dst_offset,
@@ -4140,7 +4147,7 @@ out:
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_copy_range_to(BlockDriverState *bs,
                        BdrvChild *src, int64_t src_offset,
                        BdrvChild *dst, int64_t dst_offset,
@@ -4208,9 +4215,9 @@ fail:
     return ret;
 }
 
-static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
-                                          bool exact, PreallocMode prealloc,
-                                          BdrvRequestFlags flags, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
+                  PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t old_length;
@@ -4584,7 +4591,7 @@ fail:
     return ret;
 }
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
                                  uint64_t offset, uint64_t bytes,
                                  QEMUIOVector *qiov, size_t qiov_offset)
@@ -4648,7 +4655,13 @@ fail:
     return ret;
 }
 
-static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task)
+/*
+ * This function can count as GRAPH_RDLOCK because
+ * qcow2_co_pwritev_compressed_part() holds the graph lock and keeps it until
+ * this coroutine has terminated.
+ */
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_pwritev_compressed_task_entry(AioTask *task)
 {
     Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
 
@@ -4662,7 +4675,7 @@ static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task)
  * XXX: put compressed sectors first, then all the cluster aligned
  * tables to avoid losing bytes in alignment
  */
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
                                  int64_t offset, int64_t bytes,
                                  QEMUIOVector *qiov, size_t qiov_offset)
@@ -4725,7 +4738,7 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_preadv_compressed(BlockDriverState *bs,
                            uint64_t l2_entry,
                            uint64_t offset,
@@ -5287,8 +5300,8 @@ static int64_t qcow2_check_vmstate_request(BlockDriverState *bs,
     return pos;
 }
 
-static coroutine_fn int qcow2_co_save_vmstate(BlockDriverState *bs,
-                                              QEMUIOVector *qiov, int64_t pos)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
     int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
     if (offset < 0) {
@@ -5299,8 +5312,8 @@ static coroutine_fn int qcow2_co_save_vmstate(BlockDriverState *bs,
     return bs->drv->bdrv_co_pwritev_part(bs, offset, qiov->size, qiov, 0, 0);
 }
 
-static coroutine_fn int qcow2_co_load_vmstate(BlockDriverState *bs,
-                                              QEMUIOVector *qiov, int64_t pos)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
     int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
     if (offset < 0) {
index 2285f18a734b17814a77c3bf14e49a25dc4179bf..c59e33c01cc91e583b277e302eab62f357dd6192 100644 (file)
@@ -846,7 +846,7 @@ int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
                          Error **errp);
 
 /* qcow2-refcount.c functions */
-int coroutine_fn qcow2_refcount_init(BlockDriverState *bs);
+int coroutine_fn GRAPH_RDLOCK qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);
 
 int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
@@ -893,14 +893,17 @@ int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
 int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
                                 BlockDriverAmendStatusCB *status_cb,
                                 void *cb_opaque, Error **errp);
-int coroutine_fn qcow2_shrink_reftable(BlockDriverState *bs);
+int coroutine_fn GRAPH_RDLOCK qcow2_shrink_reftable(BlockDriverState *bs);
 int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
 int coroutine_fn qcow2_detect_metadata_preallocation(BlockDriverState *bs);
 
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                         bool exact_size);
-int coroutine_fn qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size);
+
 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
 int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
                           uint8_t *buf, int nb_sectors, bool enc, Error **errp);
@@ -918,14 +921,17 @@ int coroutine_fn qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry,
                                      uint64_t *coffset, int *csize);
 
-int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs,
-                                             QCowL2Meta *m);
+int coroutine_fn GRAPH_RDLOCK
+qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
+
 void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m);
 int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
                           uint64_t bytes, enum qcow2_discard_type type,
                           bool full_discard);
-int coroutine_fn qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset,
-                                          uint64_t bytes, int flags);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                         int flags);
 
 int qcow2_expand_zero_clusters(BlockDriverState *bs,
                                BlockDriverAmendStatusCB *status_cb,
@@ -948,9 +954,10 @@ void qcow2_free_snapshots(BlockDriverState *bs);
 int qcow2_read_snapshots(BlockDriverState *bs, Error **errp);
 int qcow2_write_snapshots(BlockDriverState *bs);
 
-int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
-                                                 BdrvCheckResult *result,
-                                                 BdrvCheckMode fix);
+int coroutine_fn GRAPH_RDLOCK
+qcow2_check_read_snapshot_table(BlockDriverState *bs, BdrvCheckResult *result,
+                                BdrvCheckMode fix);
+
 int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs,
                                                 BdrvCheckResult *result,
                                                 BdrvCheckMode fix);
index a6612be00f578012c773ca539c1e17af05ffe64a..8fd94f405ed7e5d268c947777246b6346d6ccaf5 100644 (file)
@@ -107,7 +107,8 @@ static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
 /**
  * Descend tables and check each cluster is referenced once only
  */
-static int coroutine_fn qed_check_l1_table(QEDCheck *check, QEDTable *table)
+static int coroutine_fn GRAPH_RDLOCK
+qed_check_l1_table(QEDCheck *check, QEDTable *table)
 {
     BDRVQEDState *s = check->s;
     unsigned int i, num_invalid_l1 = 0;
index e41c87a157d079302d9ccc0f70fccf11c8708b5d..3b331ce70986125481d0739a86720129bea00ea8 100644 (file)
@@ -21,8 +21,8 @@
 #include "qemu/memalign.h"
 
 /* Called with table_lock held.  */
-static int coroutine_fn qed_read_table(BDRVQEDState *s, uint64_t offset,
-                                       QEDTable *table)
+static int coroutine_fn GRAPH_RDLOCK
+qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
 {
     unsigned int bytes = s->header.cluster_size * s->header.table_size;
 
@@ -63,9 +63,9 @@ out:
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_write_table(BDRVQEDState *s, uint64_t offset,
-                                        QEDTable *table, unsigned int index,
-                                        unsigned int n, bool flush)
+static int coroutine_fn GRAPH_RDLOCK
+qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
+                unsigned int index, unsigned int n, bool flush)
 {
     unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
     unsigned int start, end, i;
index 4473465bba8c549d1fdca8b77cfa0cd6bc62ba69..ed94bb61cab3af8abe7a17ba3a0ce4af38e18c04 100644 (file)
@@ -100,7 +100,7 @@ int qed_write_header_sync(BDRVQEDState *s)
  *
  * No new allocating reqs can start while this function runs.
  */
-static int coroutine_fn qed_write_header(BDRVQEDState *s)
+static int coroutine_fn GRAPH_RDLOCK qed_write_header(BDRVQEDState *s)
 {
     /* We must write full sectors for O_DIRECT but cannot necessarily generate
      * the data following the header if an unrecognized compat feature is
@@ -282,11 +282,12 @@ static void coroutine_fn qed_unplug_allocating_write_reqs(BDRVQEDState *s)
     qemu_co_mutex_unlock(&s->table_lock);
 }
 
-static void coroutine_fn qed_need_check_timer(BDRVQEDState *s)
+static void coroutine_fn GRAPH_RDLOCK qed_need_check_timer(BDRVQEDState *s)
 {
     int ret;
 
     trace_qed_need_check_timer_cb(s);
+    assert_bdrv_graph_readable();
 
     if (!qed_plug_allocating_write_reqs(s)) {
         return;
@@ -312,6 +313,7 @@ static void coroutine_fn qed_need_check_timer(BDRVQEDState *s)
 static void coroutine_fn qed_need_check_timer_entry(void *opaque)
 {
     BDRVQEDState *s = opaque;
+    GRAPH_RDLOCK_GUARD();
 
     qed_need_check_timer(opaque);
     bdrv_dec_in_flight(s->bs);
@@ -393,8 +395,8 @@ static void bdrv_qed_init_state(BlockDriverState *bs)
 }
 
 /* Called with table_lock held.  */
-static int coroutine_fn bdrv_qed_do_open(BlockDriverState *bs, QDict *options,
-                                         int flags, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
 {
     BDRVQEDState *s = bs->opaque;
     QEDHeader le_header;
@@ -555,7 +557,7 @@ typedef struct QEDOpenCo {
     int ret;
 } QEDOpenCo;
 
-static void coroutine_fn bdrv_qed_open_entry(void *opaque)
+static void coroutine_fn GRAPH_RDLOCK bdrv_qed_open_entry(void *opaque)
 {
     QEDOpenCo *qoc = opaque;
     BDRVQEDState *s = qoc->bs->opaque;
@@ -577,6 +579,8 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
     };
     int ret;
 
+    assume_graph_lock(); /* FIXME */
+
     ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
     if (ret < 0) {
         return ret;
@@ -676,13 +680,13 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(qed_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(qed_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto out;
@@ -750,10 +754,9 @@ out:
     return ret;
 }
 
-static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv,
-                                                const char *filename,
-                                                QemuOpts *opts,
-                                                Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,
+                        QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     QDict *qdict;
@@ -783,8 +786,8 @@ static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv,
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto fail;
@@ -822,11 +825,10 @@ fail:
     return ret;
 }
 
-static int coroutine_fn bdrv_qed_co_block_status(BlockDriverState *bs,
-                                                 bool want_zero,
-                                                 int64_t pos, int64_t bytes,
-                                                 int64_t *pnum, int64_t *map,
-                                                 BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_block_status(BlockDriverState *bs, bool want_zero, int64_t pos,
+                         int64_t bytes, int64_t *pnum, int64_t *map,
+                         BlockDriverState **file)
 {
     BDRVQEDState *s = bs->opaque;
     size_t len = MIN(bytes, SIZE_MAX);
@@ -879,8 +881,8 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
  * This function reads qiov->size bytes starting at pos from the backing file.
  * If there is no backing file then zeroes are read.
  */
-static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
-                                              QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+qed_read_backing_file(BDRVQEDState *s, uint64_t pos, QEMUIOVector *qiov)
 {
     if (s->bs->backing) {
         BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
@@ -898,9 +900,9 @@ static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
  * @len:        Number of bytes
  * @offset:     Byte offset in image file
  */
-static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s,
-                                                   uint64_t pos, uint64_t len,
-                                                   uint64_t offset)
+static int coroutine_fn GRAPH_RDLOCK
+qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, uint64_t len,
+                           uint64_t offset)
 {
     QEMUIOVector qiov;
     int ret;
@@ -993,7 +995,7 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK qed_aio_write_l1_update(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     CachedL2Table *l2_table = acb->request.l2_table;
@@ -1023,7 +1025,8 @@ static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
 {
     BDRVQEDState *s = acb_to_s(acb);
     bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
@@ -1061,7 +1064,7 @@ static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
  *
  * Called with table_lock *not* held.
  */
-static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK qed_aio_write_main(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     uint64_t offset = acb->cur_cluster +
@@ -1079,7 +1082,7 @@ static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK qed_aio_write_cow(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     uint64_t start, len, offset;
@@ -1157,7 +1160,8 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 {
     BDRVQEDState *s = acb_to_s(acb);
     int ret;
@@ -1220,8 +1224,8 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
-                                              size_t len)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
 {
     BDRVQEDState *s = acb_to_s(acb);
     int r;
@@ -1263,8 +1267,8 @@ out:
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
-                                           uint64_t offset, size_t len)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_write_data(void *opaque, int ret, uint64_t offset, size_t len)
 {
     QEDAIOCB *acb = opaque;
 
@@ -1296,8 +1300,8 @@ static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
-                                          uint64_t offset, size_t len)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len)
 {
     QEDAIOCB *acb = opaque;
     BDRVQEDState *s = acb_to_s(acb);
@@ -1334,7 +1338,7 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
 /**
  * Begin next I/O or complete the request
  */
-static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK qed_aio_next_io(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     uint64_t offset;
@@ -1379,9 +1383,9 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
     return ret;
 }
 
-static int coroutine_fn qed_co_request(BlockDriverState *bs, int64_t sector_num,
-                                       QEMUIOVector *qiov, int nb_sectors,
-                                       int flags)
+static int coroutine_fn GRAPH_RDLOCK
+qed_co_request(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov,
+               int nb_sectors, int flags)
 {
     QEDAIOCB acb = {
         .bs         = bs,
@@ -1398,24 +1402,23 @@ static int coroutine_fn qed_co_request(BlockDriverState *bs, int64_t sector_num,
     return qed_aio_next_io(&acb);
 }
 
-static int coroutine_fn bdrv_qed_co_readv(BlockDriverState *bs,
-                                          int64_t sector_num, int nb_sectors,
-                                          QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                  QEMUIOVector *qiov)
 {
     return qed_co_request(bs, sector_num, qiov, nb_sectors, 0);
 }
 
-static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs,
-                                           int64_t sector_num, int nb_sectors,
-                                           QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                   QEMUIOVector *qiov, int flags)
 {
     return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE);
 }
 
-static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset,
-                                                  int64_t bytes,
-                                                  BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 {
     BDRVQEDState *s = bs->opaque;
 
@@ -1569,8 +1572,8 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
     return ret;
 }
 
-static void coroutine_fn bdrv_qed_co_invalidate_cache(BlockDriverState *bs,
-                                                      Error **errp)
+static void coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
     BDRVQEDState *s = bs->opaque;
     int ret;
@@ -1586,9 +1589,9 @@ static void coroutine_fn bdrv_qed_co_invalidate_cache(BlockDriverState *bs,
     }
 }
 
-static int coroutine_fn bdrv_qed_co_check(BlockDriverState *bs,
-                                          BdrvCheckResult *result,
-                                          BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_check(BlockDriverState *bs, BdrvCheckResult *result,
+                  BdrvCheckMode fix)
 {
     BDRVQEDState *s = bs->opaque;
     int ret;
index 3d12bf78d4128356e62b9772ab8695f41f997d9b..988654cb860263c0f5325cc978ecbca907e36715 100644 (file)
@@ -200,33 +200,40 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
 /**
  * Table I/O functions
  */
-int coroutine_fn qed_read_l1_table_sync(BDRVQEDState *s);
-int coroutine_fn qed_write_l1_table(BDRVQEDState *s, unsigned int index,
-                                    unsigned int n);
-int coroutine_fn qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
-                                         unsigned int n);
-int coroutine_fn qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
-                                        uint64_t offset);
-int coroutine_fn qed_read_l2_table(BDRVQEDState *s, QEDRequest *request,
-                                   uint64_t offset);
-int coroutine_fn qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
-                                    unsigned int index, unsigned int n,
-                                    bool flush);
-int coroutine_fn qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
-                                         unsigned int index, unsigned int n,
-                                         bool flush);
+int coroutine_fn GRAPH_RDLOCK qed_read_l1_table_sync(BDRVQEDState *s);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, unsigned int n);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, unsigned int index,
+                   unsigned int n, bool flush);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
+                        unsigned int index, unsigned int n, bool flush);
 
 /**
  * Cluster functions
  */
-int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
-                                  uint64_t pos, size_t *len,
-                                  uint64_t *img_offset);
+int coroutine_fn GRAPH_RDLOCK
+qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
+                 size_t *len, uint64_t *img_offset);
 
 /**
  * Consistency check
  */
-int coroutine_fn qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
+int coroutine_fn GRAPH_RDLOCK
+qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
 
 QEDTable *qed_alloc_table(BDRVQEDState *s);
 
index d1dcf2eabac3849c90d70764eca86007b9d76cc9..ff5a0a2da3e1ad593b34042d122d90a05d679589 100644 (file)
@@ -270,7 +270,11 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
     }
 }
 
-static void coroutine_fn quorum_rewrite_entry(void *opaque)
+/*
+ * This function can count as GRAPH_RDLOCK because read_quorum_children() holds
+ * the graph lock and keeps it until this coroutine has terminated.
+ */
+static void coroutine_fn GRAPH_RDLOCK quorum_rewrite_entry(void *opaque)
 {
     QuorumCo *co = opaque;
     QuorumAIOCB *acb = co->acb;
@@ -290,8 +294,8 @@ static void coroutine_fn quorum_rewrite_entry(void *opaque)
     }
 }
 
-static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
-                                        QuorumVoteValue *value)
+static bool coroutine_fn GRAPH_RDLOCK
+quorum_rewrite_bad_versions(QuorumAIOCB *acb, QuorumVoteValue *value)
 {
     QuorumVoteVersion *version;
     QuorumVoteItem *item;
@@ -491,7 +495,7 @@ static int quorum_vote_error(QuorumAIOCB *acb)
     return ret;
 }
 
-static void quorum_vote(QuorumAIOCB *acb)
+static void coroutine_fn GRAPH_RDLOCK quorum_vote(QuorumAIOCB *acb)
 {
     bool quorum = true;
     int i, j, ret;
@@ -571,7 +575,11 @@ free_exit:
     quorum_free_vote_list(&acb->votes);
 }
 
-static void coroutine_fn read_quorum_children_entry(void *opaque)
+/*
+ * This function can count as GRAPH_RDLOCK because read_quorum_children() holds
+ * the graph lock and keeps it until this coroutine has terminated.
+ */
+static void coroutine_fn GRAPH_RDLOCK read_quorum_children_entry(void *opaque)
 {
     QuorumCo *co = opaque;
     QuorumAIOCB *acb = co->acb;
@@ -599,7 +607,7 @@ static void coroutine_fn read_quorum_children_entry(void *opaque)
     }
 }
 
-static int coroutine_fn read_quorum_children(QuorumAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK read_quorum_children(QuorumAIOCB *acb)
 {
     BDRVQuorumState *s = acb->bs->opaque;
     int i;
@@ -640,7 +648,7 @@ static int coroutine_fn read_quorum_children(QuorumAIOCB *acb)
     return acb->vote_ret;
 }
 
-static int coroutine_fn read_fifo_child(QuorumAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK read_fifo_child(QuorumAIOCB *acb)
 {
     BDRVQuorumState *s = acb->bs->opaque;
     int n, ret;
@@ -661,10 +669,9 @@ static int coroutine_fn read_fifo_child(QuorumAIOCB *acb)
     return ret;
 }
 
-static int coroutine_fn quorum_co_preadv(BlockDriverState *bs,
-                                         int64_t offset, int64_t bytes,
-                                         QEMUIOVector *qiov,
-                                         BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+quorum_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                 QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVQuorumState *s = bs->opaque;
     QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
@@ -683,7 +690,11 @@ static int coroutine_fn quorum_co_preadv(BlockDriverState *bs,
     return ret;
 }
 
-static void coroutine_fn write_quorum_entry(void *opaque)
+/*
+ * This function can count as GRAPH_RDLOCK because quorum_co_pwritev() holds the
+ * graph lock and keeps it until this coroutine has terminated.
+ */
+static void coroutine_fn GRAPH_RDLOCK write_quorum_entry(void *opaque)
 {
     QuorumCo *co = opaque;
     QuorumAIOCB *acb = co->acb;
@@ -714,9 +725,9 @@ static void coroutine_fn write_quorum_entry(void *opaque)
     }
 }
 
-static int coroutine_fn quorum_co_pwritev(BlockDriverState *bs, int64_t offset,
-                                          int64_t bytes, QEMUIOVector *qiov,
-                                          BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+quorum_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                  QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVQuorumState *s = bs->opaque;
     QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
@@ -745,16 +756,16 @@ static int coroutine_fn quorum_co_pwritev(BlockDriverState *bs, int64_t offset,
     return ret;
 }
 
-static int coroutine_fn quorum_co_pwrite_zeroes(BlockDriverState *bs,
-                                                int64_t offset, int64_t bytes,
-                                                BdrvRequestFlags flags)
-
+static int coroutine_fn GRAPH_RDLOCK
+quorum_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                        BdrvRequestFlags flags)
 {
     return quorum_co_pwritev(bs, offset, bytes, NULL,
                              flags | BDRV_REQ_ZERO_WRITE);
 }
 
-static int64_t coroutine_fn quorum_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+quorum_co_getlength(BlockDriverState *bs)
 {
     BDRVQuorumState *s = bs->opaque;
     int64_t result;
@@ -778,7 +789,7 @@ static int64_t coroutine_fn quorum_co_getlength(BlockDriverState *bs)
     return result;
 }
 
-static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
+static coroutine_fn GRAPH_RDLOCK int quorum_co_flush(BlockDriverState *bs)
 {
     BDRVQuorumState *s = bs->opaque;
     QuorumVoteVersion *winner = NULL;
@@ -1217,11 +1228,10 @@ static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c,
  * return BDRV_BLOCK_ZERO if *all* children agree that a certain
  * region contains zeroes, and BDRV_BLOCK_DATA otherwise.
  */
-static int coroutine_fn quorum_co_block_status(BlockDriverState *bs,
-                                               bool want_zero,
-                                               int64_t offset, int64_t count,
-                                               int64_t *pnum, int64_t *map,
-                                               BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+quorum_co_block_status(BlockDriverState *bs, bool want_zero,
+                       int64_t offset, int64_t count,
+                       int64_t *pnum, int64_t *map, BlockDriverState **file)
 {
     BDRVQuorumState *s = bs->opaque;
     int i, ret;
index 0dc469b629bcadca423c8e7cb6ddd34601ec20c9..66783ed8e77bbc93079f88e18307020ae7c45216 100644 (file)
@@ -203,9 +203,9 @@ static inline int raw_adjust_offset(BlockDriverState *bs, int64_t *offset,
     return 0;
 }
 
-static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
-                                      int64_t bytes, QEMUIOVector *qiov,
-                                      BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret;
 
@@ -218,9 +218,9 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
-                                       int64_t bytes, QEMUIOVector *qiov,
-                                       BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     void *buf = NULL;
     BlockDriver *drv;
@@ -292,9 +292,9 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
 }
 
-static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes,
-                                             BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     BdrvRequestFlags flags)
 {
     int ret;
 
@@ -305,8 +305,8 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
-static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     int ret;
 
@@ -317,7 +317,8 @@ static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
-static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+raw_co_getlength(BlockDriverState *bs)
 {
     int64_t len;
     BDRVRawState *s = bs->opaque;
@@ -384,9 +385,9 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
     }
 }
 
-static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
-                                        bool exact, PreallocMode prealloc,
-                                        BdrvRequestFlags flags, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
+                PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
 
@@ -405,18 +406,20 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
     return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
 }
 
-static void coroutine_fn raw_co_eject(BlockDriverState *bs, bool eject_flag)
+static void coroutine_fn GRAPH_RDLOCK
+raw_co_eject(BlockDriverState *bs, bool eject_flag)
 {
     bdrv_co_eject(bs->file->bs, eject_flag);
 }
 
-static void coroutine_fn raw_co_lock_medium(BlockDriverState *bs, bool locked)
+static void coroutine_fn GRAPH_RDLOCK
+raw_co_lock_medium(BlockDriverState *bs, bool locked)
 {
     bdrv_co_lock_medium(bs->file->bs, locked);
 }
 
-static int coroutine_fn raw_co_ioctl(BlockDriverState *bs,
-                                     unsigned long int req, void *buf)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
     BDRVRawState *s = bs->opaque;
     if (s->offset || s->has_size) {
@@ -430,10 +433,9 @@ static int raw_has_zero_init(BlockDriverState *bs)
     return bdrv_has_zero_init(bs->file->bs);
 }
 
-static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     return bdrv_co_create_file(filename, opts, errp);
 }
@@ -536,14 +538,12 @@ static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
     return bdrv_probe_geometry(bs->file->bs, geo);
 }
 
-static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
-                                               BdrvChild *src,
-                                               int64_t src_offset,
-                                               BdrvChild *dst,
-                                               int64_t dst_offset,
-                                               int64_t bytes,
-                                               BdrvRequestFlags read_flags,
-                                               BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_copy_range_from(BlockDriverState *bs,
+                       BdrvChild *src, int64_t src_offset,
+                       BdrvChild *dst, int64_t dst_offset,
+                       int64_t bytes, BdrvRequestFlags read_flags,
+                       BdrvRequestFlags write_flags)
 {
     int ret;
 
@@ -555,14 +555,12 @@ static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
                                    bytes, read_flags, write_flags);
 }
 
-static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
-                                             BdrvChild *src,
-                                             int64_t src_offset,
-                                             BdrvChild *dst,
-                                             int64_t dst_offset,
-                                             int64_t bytes,
-                                             BdrvRequestFlags read_flags,
-                                             BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_copy_range_to(BlockDriverState *bs,
+                     BdrvChild *src, int64_t src_offset,
+                     BdrvChild *dst, int64_t dst_offset,
+                     int64_t bytes, BdrvRequestFlags read_flags,
+                     BdrvRequestFlags write_flags)
 {
     int ret;
 
index 5e102fea0d8637dc71f005109fba471c644eb191..978671411ec77796d8a2c05bf4cf3306e709609a 100644 (file)
@@ -72,6 +72,16 @@ static const char rbd_luks2_header_verification[
     'L', 'U', 'K', 'S', 0xBA, 0xBE, 0, 2
 };
 
+static const char rbd_layered_luks_header_verification[
+        RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {
+    'R', 'B', 'D', 'L', 0xBA, 0xBE, 0, 1
+};
+
+static const char rbd_layered_luks2_header_verification[
+        RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {
+    'R', 'B', 'D', 'L', 0xBA, 0xBE, 0, 2
+};
+
 typedef enum {
     RBD_AIO_READ,
     RBD_AIO_WRITE,
@@ -386,7 +396,6 @@ static int qemu_rbd_encryption_format(rbd_image_t image,
 {
     int r = 0;
     g_autofree char *passphrase = NULL;
-    size_t passphrase_len;
     rbd_encryption_format_t format;
     rbd_encryption_options_t opts;
     rbd_encryption_luks1_format_options_t luks_opts;
@@ -408,12 +417,12 @@ static int qemu_rbd_encryption_format(rbd_image_t image,
             opts_size = sizeof(luks_opts);
             r = qemu_rbd_convert_luks_create_options(
                     qapi_RbdEncryptionCreateOptionsLUKS_base(&encrypt->u.luks),
-                    &luks_opts.alg, &passphrase, &passphrase_len, errp);
+                    &luks_opts.alg, &passphrase, &luks_opts.passphrase_size,
+                    errp);
             if (r < 0) {
                 return r;
             }
             luks_opts.passphrase = passphrase;
-            luks_opts.passphrase_size = passphrase_len;
             break;
         }
         case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2: {
@@ -424,12 +433,12 @@ static int qemu_rbd_encryption_format(rbd_image_t image,
             r = qemu_rbd_convert_luks_create_options(
                     qapi_RbdEncryptionCreateOptionsLUKS2_base(
                             &encrypt->u.luks2),
-                    &luks2_opts.alg, &passphrase, &passphrase_len, errp);
+                    &luks2_opts.alg, &passphrase, &luks2_opts.passphrase_size,
+                    errp);
             if (r < 0) {
                 return r;
             }
             luks2_opts.passphrase = passphrase;
-            luks2_opts.passphrase_size = passphrase_len;
             break;
         }
         default: {
@@ -468,9 +477,11 @@ static int qemu_rbd_encryption_load(rbd_image_t image,
 {
     int r = 0;
     g_autofree char *passphrase = NULL;
-    size_t passphrase_len;
     rbd_encryption_luks1_format_options_t luks_opts;
     rbd_encryption_luks2_format_options_t luks2_opts;
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
+    rbd_encryption_luks_format_options_t luks_any_opts;
+#endif
     rbd_encryption_format_t format;
     rbd_encryption_options_t opts;
     size_t opts_size;
@@ -483,12 +494,11 @@ static int qemu_rbd_encryption_load(rbd_image_t image,
             opts_size = sizeof(luks_opts);
             r = qemu_rbd_convert_luks_options(
                     qapi_RbdEncryptionOptionsLUKS_base(&encrypt->u.luks),
-                    &passphrase, &passphrase_len, errp);
+                    &passphrase, &luks_opts.passphrase_size, errp);
             if (r < 0) {
                 return r;
             }
             luks_opts.passphrase = passphrase;
-            luks_opts.passphrase_size = passphrase_len;
             break;
         }
         case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2: {
@@ -498,14 +508,29 @@ static int qemu_rbd_encryption_load(rbd_image_t image,
             opts_size = sizeof(luks2_opts);
             r = qemu_rbd_convert_luks_options(
                     qapi_RbdEncryptionOptionsLUKS2_base(&encrypt->u.luks2),
-                    &passphrase, &passphrase_len, errp);
+                    &passphrase, &luks2_opts.passphrase_size, errp);
             if (r < 0) {
                 return r;
             }
             luks2_opts.passphrase = passphrase;
-            luks2_opts.passphrase_size = passphrase_len;
             break;
         }
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
+        case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS_ANY: {
+            memset(&luks_any_opts, 0, sizeof(luks_any_opts));
+            format = RBD_ENCRYPTION_FORMAT_LUKS;
+            opts = &luks_any_opts;
+            opts_size = sizeof(luks_any_opts);
+            r = qemu_rbd_convert_luks_options(
+                    qapi_RbdEncryptionOptionsLUKSAny_base(&encrypt->u.luks_any),
+                    &passphrase, &luks_any_opts.passphrase_size, errp);
+            if (r < 0) {
+                return r;
+            }
+            luks_any_opts.passphrase = passphrase;
+            break;
+        }
+#endif
         default: {
             r = -ENOTSUP;
             error_setg_errno(
@@ -523,6 +548,128 @@ static int qemu_rbd_encryption_load(rbd_image_t image,
 
     return 0;
 }
+
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
+static int qemu_rbd_encryption_load2(rbd_image_t image,
+                                     RbdEncryptionOptions *encrypt,
+                                     Error **errp)
+{
+    int r = 0;
+    int encrypt_count = 1;
+    int i;
+    RbdEncryptionOptions *curr_encrypt;
+    rbd_encryption_spec_t *specs;
+    rbd_encryption_luks1_format_options_t *luks_opts;
+    rbd_encryption_luks2_format_options_t *luks2_opts;
+    rbd_encryption_luks_format_options_t *luks_any_opts;
+
+    /* count encryption options */
+    for (curr_encrypt = encrypt->parent; curr_encrypt;
+         curr_encrypt = curr_encrypt->parent) {
+        ++encrypt_count;
+    }
+
+    specs = g_new0(rbd_encryption_spec_t, encrypt_count);
+
+    curr_encrypt = encrypt;
+    for (i = 0; i < encrypt_count; ++i) {
+        switch (curr_encrypt->format) {
+            case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS: {
+                specs[i].format = RBD_ENCRYPTION_FORMAT_LUKS1;
+
+                luks_opts = g_new0(rbd_encryption_luks1_format_options_t, 1);
+                specs[i].opts = luks_opts;
+                specs[i].opts_size = sizeof(*luks_opts);
+
+                r = qemu_rbd_convert_luks_options(
+                        qapi_RbdEncryptionOptionsLUKS_base(
+                                &curr_encrypt->u.luks),
+                        (char **)&luks_opts->passphrase,
+                        &luks_opts->passphrase_size,
+                        errp);
+                break;
+            }
+            case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2: {
+                specs[i].format = RBD_ENCRYPTION_FORMAT_LUKS2;
+
+                luks2_opts = g_new0(rbd_encryption_luks2_format_options_t, 1);
+                specs[i].opts = luks2_opts;
+                specs[i].opts_size = sizeof(*luks2_opts);
+
+                r = qemu_rbd_convert_luks_options(
+                        qapi_RbdEncryptionOptionsLUKS2_base(
+                                &curr_encrypt->u.luks2),
+                        (char **)&luks2_opts->passphrase,
+                        &luks2_opts->passphrase_size,
+                        errp);
+                break;
+            }
+            case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS_ANY: {
+                specs[i].format = RBD_ENCRYPTION_FORMAT_LUKS;
+
+                luks_any_opts = g_new0(rbd_encryption_luks_format_options_t, 1);
+                specs[i].opts = luks_any_opts;
+                specs[i].opts_size = sizeof(*luks_any_opts);
+
+                r = qemu_rbd_convert_luks_options(
+                        qapi_RbdEncryptionOptionsLUKSAny_base(
+                                &curr_encrypt->u.luks_any),
+                        (char **)&luks_any_opts->passphrase,
+                        &luks_any_opts->passphrase_size,
+                        errp);
+                break;
+            }
+            default: {
+                r = -ENOTSUP;
+                error_setg_errno(
+                        errp, -r, "unknown image encryption format: %u",
+                        curr_encrypt->format);
+            }
+        }
+
+        if (r < 0) {
+            goto exit;
+        }
+
+        curr_encrypt = curr_encrypt->parent;
+    }
+
+    r = rbd_encryption_load2(image, specs, encrypt_count);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "layered encryption load fail");
+        goto exit;
+    }
+
+exit:
+    for (i = 0; i < encrypt_count; ++i) {
+        if (!specs[i].opts) {
+            break;
+        }
+
+        switch (specs[i].format) {
+            case RBD_ENCRYPTION_FORMAT_LUKS1: {
+                luks_opts = specs[i].opts;
+                g_free((void *)luks_opts->passphrase);
+                break;
+            }
+            case RBD_ENCRYPTION_FORMAT_LUKS2: {
+                luks2_opts = specs[i].opts;
+                g_free((void *)luks2_opts->passphrase);
+                break;
+            }
+            case RBD_ENCRYPTION_FORMAT_LUKS: {
+                luks_any_opts = specs[i].opts;
+                g_free((void *)luks_any_opts->passphrase);
+                break;
+            }
+        }
+
+        g_free(specs[i].opts);
+    }
+    g_free(specs);
+    return r;
+}
+#endif
 #endif
 
 /* FIXME Deprecate and remove keypairs or make it available in QMP. */
@@ -989,7 +1136,16 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
 
     if (opts->encrypt) {
 #ifdef LIBRBD_SUPPORTS_ENCRYPTION
-        r = qemu_rbd_encryption_load(s->image, opts->encrypt, errp);
+        if (opts->encrypt->parent) {
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
+            r = qemu_rbd_encryption_load2(s->image, opts->encrypt, errp);
+#else
+            r = -ENOTSUP;
+            error_setg(errp, "RBD library does not support layered encryption");
+#endif
+        } else {
+            r = qemu_rbd_encryption_load(s->image, opts->encrypt, errp);
+        }
         if (r < 0) {
             goto failed_post_open;
         }
@@ -1281,6 +1437,16 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
         spec_info->u.rbd.data->encryption_format =
                 RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2;
         spec_info->u.rbd.data->has_encryption_format = true;
+    } else if (memcmp(buf, rbd_layered_luks_header_verification,
+               RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+        spec_info->u.rbd.data->encryption_format =
+                RBD_IMAGE_ENCRYPTION_FORMAT_LUKS;
+        spec_info->u.rbd.data->has_encryption_format = true;
+    } else if (memcmp(buf, rbd_layered_luks2_header_verification,
+               RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+        spec_info->u.rbd.data->encryption_format =
+                RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2;
+        spec_info->u.rbd.data->has_encryption_format = true;
     } else {
         spec_info->u.rbd.data->has_encryption_format = false;
     }
index a27417d310802d09c25ef6e9902daa594f02efed..de01f9618467158130d6db712e1cbff3f49b0d3a 100644 (file)
@@ -179,7 +179,8 @@ static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
     return;
 }
 
-static int64_t coroutine_fn replication_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+replication_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }
@@ -220,10 +221,9 @@ static int replication_return_value(BDRVReplicationState *s, int ret)
     return ret;
 }
 
-static coroutine_fn int replication_co_readv(BlockDriverState *bs,
-                                             int64_t sector_num,
-                                             int remaining_sectors,
-                                             QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+replication_co_readv(BlockDriverState *bs, int64_t sector_num,
+                     int remaining_sectors, QEMUIOVector *qiov)
 {
     BDRVReplicationState *s = bs->opaque;
     int ret;
@@ -244,11 +244,9 @@ static coroutine_fn int replication_co_readv(BlockDriverState *bs,
     return replication_return_value(s, ret);
 }
 
-static coroutine_fn int replication_co_writev(BlockDriverState *bs,
-                                              int64_t sector_num,
-                                              int remaining_sectors,
-                                              QEMUIOVector *qiov,
-                                              int flags)
+static int coroutine_fn GRAPH_RDLOCK
+replication_co_writev(BlockDriverState *bs, int64_t sector_num,
+                      int remaining_sectors, QEMUIOVector *qiov, int flags)
 {
     BDRVReplicationState *s = bs->opaque;
     QEMUIOVector hd_qiov;
index 0a30ec6cd9aab4eb744a2376508efab79c2b7206..67ea339da992835dcc46f3384945a48117d3da82 100644 (file)
@@ -26,7 +26,7 @@
 #include "qemu/cutils.h"
 #include "block/block_int.h"
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 snapshot_access_co_preadv_part(BlockDriverState *bs,
                                int64_t offset, int64_t bytes,
                                QEMUIOVector *qiov, size_t qiov_offset,
@@ -39,7 +39,7 @@ snapshot_access_co_preadv_part(BlockDriverState *bs,
     return bdrv_co_preadv_snapshot(bs->file, offset, bytes, qiov, qiov_offset);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 snapshot_access_co_block_status(BlockDriverState *bs,
                                 bool want_zero, int64_t offset,
                                 int64_t bytes, int64_t *pnum,
@@ -49,8 +49,8 @@ snapshot_access_co_block_status(BlockDriverState *bs,
                                          bytes, pnum, map, file);
 }
 
-static int coroutine_fn snapshot_access_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+snapshot_access_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard_snapshot(bs->file->bs, offset, bytes);
 }
index 8744ad103f71ebd130ff3ac2aa906b9a7cad0a8e..7f9e1ecdbb41b8d4ab6231bea498a48b52a7a8c6 100644 (file)
@@ -16,7 +16,6 @@
 #include "block/block_int.h"
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
@@ -141,9 +140,11 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
         return 0;
     }
 
-    len = bdrv_getlength(s->target_bs);
-    if (len < 0) {
-        return len;
+    WITH_GRAPH_RDLOCK_GUARD() {
+        len = bdrv_co_getlength(s->target_bs);
+        if (len < 0) {
+            return len;
+        }
     }
     job_progress_set_remaining(&s->common.job, len);
 
@@ -161,21 +162,25 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 
         copy = false;
 
-        ret = bdrv_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n);
-        if (ret == 1) {
-            /* Allocated in the top, no need to copy.  */
-        } else if (ret >= 0) {
-            /* Copy if allocated in the intermediate images.  Limit to the
-             * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).  */
-            ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
-                                          s->base_overlay, true,
-                                          offset, n, &n);
-            /* Finish early if end of backing file has been reached */
-            if (ret == 0 && n == 0) {
-                n = len - offset;
+        WITH_GRAPH_RDLOCK_GUARD() {
+            ret = bdrv_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n);
+            if (ret == 1) {
+                /* Allocated in the top, no need to copy.  */
+            } else if (ret >= 0) {
+                /*
+                 * Copy if allocated in the intermediate images.  Limit to the
+                 * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).
+                 */
+                ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
+                                            s->base_overlay, true,
+                                            offset, n, &n);
+                /* Finish early if end of backing file has been reached */
+                if (ret == 0 && n == 0) {
+                    n = len - offset;
+                }
+
+                copy = (ret > 0);
             }
-
-            copy = (ret > 0);
         }
         trace_stream_one_iteration(s, offset, n, ret);
         if (copy) {
index 64fa0f5acc104d4d01bad8a0bcd68a8264f6e21c..3aaef18d4ed4212ef7ee5c67fa14df78706ab263 100644 (file)
@@ -106,15 +106,15 @@ static void throttle_close(BlockDriverState *bs)
 }
 
 
-static int64_t coroutine_fn throttle_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+throttle_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }
 
-static int coroutine_fn throttle_co_preadv(BlockDriverState *bs,
-                                           int64_t offset, int64_t bytes,
-                                           QEMUIOVector *qiov,
-                                           BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
 
     ThrottleGroupMember *tgm = bs->opaque;
@@ -123,10 +123,9 @@ static int coroutine_fn throttle_co_preadv(BlockDriverState *bs,
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn throttle_co_pwritev(BlockDriverState *bs,
-                                            int64_t offset, int64_t bytes,
-                                            QEMUIOVector *qiov,
-                                            BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     ThrottleGroupMember *tgm = bs->opaque;
     throttle_group_co_io_limits_intercept(tgm, bytes, true);
@@ -134,9 +133,9 @@ static int coroutine_fn throttle_co_pwritev(BlockDriverState *bs,
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn throttle_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int64_t bytes,
-                                                  BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 {
     ThrottleGroupMember *tgm = bs->opaque;
     throttle_group_co_io_limits_intercept(tgm, bytes, true);
@@ -144,8 +143,8 @@ static int coroutine_fn throttle_co_pwrite_zeroes(BlockDriverState *bs,
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
-static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     ThrottleGroupMember *tgm = bs->opaque;
     throttle_group_co_io_limits_intercept(tgm, bytes, true);
@@ -153,16 +152,15 @@ static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs,
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
-static int coroutine_fn throttle_co_pwritev_compressed(BlockDriverState *bs,
-                                                       int64_t offset,
-                                                       int64_t bytes,
-                                                       QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_pwritev_compressed(BlockDriverState *bs, int64_t offset,
+                               int64_t bytes, QEMUIOVector *qiov)
 {
     return throttle_co_pwritev(bs, offset, bytes, qiov,
                                BDRV_REQ_WRITE_COMPRESSED);
 }
 
-static int coroutine_fn throttle_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK throttle_co_flush(BlockDriverState *bs)
 {
     return bdrv_co_flush(bs->file->bs);
 }
index 9c8736b26fb5e0346b94f7dc6c3b7a3e99042cb2..f2434d6153e1a9b8bc947f131a8c488c1a24ef46 100644 (file)
@@ -544,7 +544,7 @@ static int coroutine_fn vdi_co_block_status(BlockDriverState *bs,
         (s->header.image_type == VDI_TYPE_STATIC ? BDRV_BLOCK_RECURSE : 0);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vdi_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -600,7 +600,7 @@ vdi_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vdi_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -800,14 +800,14 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
     }
 
     /* Create BlockBackend to write to the image */
-    bs_file = bdrv_open_blockdev_ref(vdi_opts->file, errp);
+    bs_file = bdrv_co_open_blockdev_ref(vdi_opts->file, errp);
     if (!bs_file) {
         ret = -EIO;
         goto exit;
     }
 
-    blk = blk_new_with_bs(bs_file, BLK_PERM_WRITE | BLK_PERM_RESIZE,
-                          BLK_PERM_ALL, errp);
+    blk = blk_co_new_with_bs(bs_file, BLK_PERM_WRITE | BLK_PERM_RESIZE,
+                             BLK_PERM_ALL, errp);
     if (!blk) {
         ret = -EPERM;
         goto exit;
@@ -898,10 +898,9 @@ static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options,
     return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp);
 }
 
-static int coroutine_fn vdi_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vdi_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     QDict *qdict = NULL;
     BlockdevCreateOptions *create_options = NULL;
@@ -940,8 +939,8 @@ static int coroutine_fn vdi_co_create_opts(BlockDriver *drv,
         goto done;
     }
 
-    bs_file = bdrv_open(filename, NULL, NULL,
-                        BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs_file = bdrv_co_open(filename, NULL, NULL,
+                           BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (!bs_file) {
         ret = -EIO;
         goto done;
index ef1f65d917a7fda9a9f459b643e03dbf5cfb9d16..81420722a188e1dfe1b6385e1fbc0c2b0d7dfb13 100644 (file)
@@ -1172,8 +1172,9 @@ vhdx_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 }
 
 
-static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
-                                      int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+vhdx_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+              QEMUIOVector *qiov)
 {
     BDRVVHDXState *s = bs->opaque;
     int ret = 0;
@@ -1324,9 +1325,9 @@ int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s)
     return ret;
 }
 
-static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
-                                       int nb_sectors, QEMUIOVector *qiov,
-                                       int flags)
+static int coroutine_fn GRAPH_RDLOCK
+vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+               QEMUIOVector *qiov, int flags)
 {
     int ret = -ENOTSUP;
     BDRVVHDXState *s = bs->opaque;
@@ -1991,13 +1992,13 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(vhdx_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(vhdx_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto delete_and_exit;
@@ -2058,10 +2059,9 @@ delete_and_exit:
     return ret;
 }
 
-static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv,
-                                            const char *filename,
-                                            QemuOpts *opts,
-                                            Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vhdx_co_create_opts(BlockDriver *drv, const char *filename,
+                    QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     QDict *qdict;
@@ -2090,8 +2090,8 @@ static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv,
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto fail;
index 5b0eae877ea44530d8c50b960ef69f8d76ac8bc7..f5f49018fe4a795bd38347840cd4c8328c9a5a8c 100644 (file)
@@ -1403,13 +1403,11 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
  * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
  * it for call to write user data in the request.
  */
-static int coroutine_fn get_whole_cluster(BlockDriverState *bs,
-                                          VmdkExtent *extent,
-                                          uint64_t cluster_offset,
-                                          uint64_t offset,
-                                          uint64_t skip_start_bytes,
-                                          uint64_t skip_end_bytes,
-                                          bool zeroed)
+static int coroutine_fn GRAPH_RDLOCK
+get_whole_cluster(BlockDriverState *bs, VmdkExtent *extent,
+                  uint64_t cluster_offset, uint64_t offset,
+                  uint64_t skip_start_bytes, uint64_t skip_end_bytes,
+                  bool zeroed)
 {
     int ret = VMDK_OK;
     int64_t cluster_bytes;
@@ -1484,8 +1482,8 @@ exit:
     return ret;
 }
 
-static int coroutine_fn vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
-                                      uint32_t offset)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, uint32_t offset)
 {
     offset = cpu_to_le32(offset);
     /* update L2 table */
@@ -1536,14 +1534,11 @@ static int coroutine_fn vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
  *          VMDK_UNALLOC if cluster is not mapped and @allocate is false.
  *          VMDK_ERROR if failed.
  */
-static int coroutine_fn get_cluster_offset(BlockDriverState *bs,
-                                           VmdkExtent *extent,
-                                           VmdkMetaData *m_data,
-                                           uint64_t offset,
-                                           bool allocate,
-                                           uint64_t *cluster_offset,
-                                           uint64_t skip_start_bytes,
-                                           uint64_t skip_end_bytes)
+static int coroutine_fn GRAPH_RDLOCK
+get_cluster_offset(BlockDriverState *bs, VmdkExtent *extent,
+                   VmdkMetaData *m_data, uint64_t offset, bool allocate,
+                   uint64_t *cluster_offset, uint64_t skip_start_bytes,
+                   uint64_t skip_end_bytes)
 {
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
@@ -1736,11 +1731,10 @@ static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent,
     return extent_relative_offset % cluster_size;
 }
 
-static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs,
-                                             bool want_zero,
-                                             int64_t offset, int64_t bytes,
-                                             int64_t *pnum, int64_t *map,
-                                             BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_block_status(BlockDriverState *bs, bool want_zero,
+                     int64_t offset, int64_t bytes, int64_t *pnum,
+                     int64_t *map, BlockDriverState **file)
 {
     BDRVVmdkState *s = bs->opaque;
     int64_t index_in_cluster, n, ret;
@@ -1785,7 +1779,7 @@ static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
                   int64_t offset_in_cluster, QEMUIOVector *qiov,
                   uint64_t qiov_offset, uint64_t n_bytes,
@@ -1867,10 +1861,9 @@ vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
-                 int64_t offset_in_cluster, QEMUIOVector *qiov,
-                 int bytes)
+                 int64_t offset_in_cluster, QEMUIOVector *qiov, int bytes)
 {
     int ret;
     int cluster_bytes, buf_bytes;
@@ -1934,7 +1927,7 @@ vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vmdk_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -2016,9 +2009,9 @@ fail:
  *
  * Returns: error code with 0 for success.
  */
-static int coroutine_fn vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
-                                     uint64_t bytes, QEMUIOVector *qiov,
-                                     bool zeroed, bool zero_dry_run)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+             QEMUIOVector *qiov, bool zeroed, bool zero_dry_run)
 {
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;
@@ -2114,7 +2107,7 @@ static int coroutine_fn vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
     return 0;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vmdk_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                 QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -2126,7 +2119,7 @@ vmdk_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vmdk_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
                            QEMUIOVector *qiov)
 {
@@ -2154,10 +2147,9 @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
     return vmdk_co_pwritev(bs, offset, bytes, qiov, 0);
 }
 
-static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
-                                              int64_t offset,
-                                              int64_t bytes,
-                                              BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                      BdrvRequestFlags flags)
 {
     int ret;
     BDRVVmdkState *s = bs->opaque;
@@ -2285,11 +2277,10 @@ exit:
     return ret;
 }
 
-static int coroutine_fn vmdk_create_extent(const char *filename,
-                                           int64_t filesize, bool flat,
-                                           bool compress, bool zeroed_grain,
-                                           BlockBackend **pbb,
-                                           QemuOpts *opts, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_create_extent(const char *filename, int64_t filesize, bool flat,
+                   bool compress, bool zeroed_grain, BlockBackend **pbb,
+                   QemuOpts *opts, Error **errp)
 {
     int ret;
     BlockBackend *blk = NULL;
@@ -2299,9 +2290,9 @@ static int coroutine_fn vmdk_create_extent(const char *filename,
         goto exit;
     }
 
-    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                       errp);
+    blk = blk_co_new_open(filename, NULL, NULL,
+                          BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                          errp);
     if (blk == NULL) {
         ret = -EIO;
         goto exit;
@@ -2367,14 +2358,10 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
  *           non-split format.
  * idx >= 1: get the n-th extent if in a split subformat
  */
-typedef BlockBackend * coroutine_fn (*vmdk_create_extent_fn)(int64_t size,
-                                                             int idx,
-                                                             bool flat,
-                                                             bool split,
-                                                             bool compress,
-                                                             bool zeroed_grain,
-                                                             void *opaque,
-                                                             Error **errp);
+typedef BlockBackend * coroutine_fn /* GRAPH_RDLOCK */
+    (*vmdk_create_extent_fn)(int64_t size, int idx, bool flat, bool split,
+                             bool compress, bool zeroed_grain, void *opaque,
+                             Error **errp);
 
 static void vmdk_desc_add_extent(GString *desc,
                                  const char *extent_line_fmt,
@@ -2387,17 +2374,18 @@ static void vmdk_desc_add_extent(GString *desc,
     g_free(basename);
 }
 
-static int coroutine_fn vmdk_co_do_create(int64_t size,
-                                          BlockdevVmdkSubformat subformat,
-                                          BlockdevVmdkAdapterType adapter_type,
-                                          const char *backing_file,
-                                          const char *hw_version,
-                                          const char *toolsversion,
-                                          bool compat6,
-                                          bool zeroed_grain,
-                                          vmdk_create_extent_fn extent_fn,
-                                          void *opaque,
-                                          Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_do_create(int64_t size,
+                  BlockdevVmdkSubformat subformat,
+                  BlockdevVmdkAdapterType adapter_type,
+                  const char *backing_file,
+                  const char *hw_version,
+                  const char *toolsversion,
+                  bool compat6,
+                  bool zeroed_grain,
+                  vmdk_create_extent_fn extent_fn,
+                  void *opaque,
+                  Error **errp)
 {
     int extent_idx;
     BlockBackend *blk = NULL;
@@ -2518,8 +2506,8 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
         }
         assert(full_backing);
 
-        backing = blk_new_open(full_backing, NULL, NULL,
-                               BDRV_O_NO_BACKING, errp);
+        backing = blk_co_new_open(full_backing, NULL, NULL,
+                                  BDRV_O_NO_BACKING, errp);
         g_free(full_backing);
         if (backing == NULL) {
             ret = -EIO;
@@ -2617,10 +2605,10 @@ typedef struct {
     QemuOpts *opts;
 } VMDKCreateOptsData;
 
-static BlockBackend * coroutine_fn vmdk_co_create_opts_cb(int64_t size, int idx,
-                                            bool flat, bool split, bool compress,
-                                            bool zeroed_grain, void *opaque,
-                                            Error **errp)
+static BlockBackend * coroutine_fn GRAPH_RDLOCK
+vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
+                       bool compress, bool zeroed_grain, void *opaque,
+                       Error **errp)
 {
     BlockBackend *blk = NULL;
     BlockDriverState *bs = NULL;
@@ -2659,10 +2647,9 @@ exit:
     return blk;
 }
 
-static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv,
-                                            const char *filename,
-                                            QemuOpts *opts,
-                                            Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_create_opts(BlockDriver *drv, const char *filename,
+                    QemuOpts *opts, Error **errp)
 {
     Error *local_err = NULL;
     char *desc = NULL;
@@ -2781,7 +2768,7 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
     BlockdevCreateOptionsVmdk *opts = opaque;
 
     if (idx == 0) {
-        bs = bdrv_open_blockdev_ref(opts->file, errp);
+        bs = bdrv_co_open_blockdev_ref(opts->file, errp);
     } else {
         int i;
         BlockdevRefList *list = opts->extents;
@@ -2796,14 +2783,16 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
             error_setg(errp, "Extent [%d] not specified", idx - 1);
             return NULL;
         }
-        bs = bdrv_open_blockdev_ref(list->value, errp);
+        bs = bdrv_co_open_blockdev_ref(list->value, errp);
     }
     if (!bs) {
         return NULL;
     }
-    blk = blk_new_with_bs(bs,
-                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
-                          BLK_PERM_ALL, errp);
+    blk = blk_co_new_with_bs(bs,
+                             BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
+                                BLK_PERM_RESIZE,
+                             BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         return NULL;
     }
@@ -2820,8 +2809,8 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
     return blk;
 }
 
-static int coroutine_fn vmdk_co_create(BlockdevCreateOptions *create_options,
-                                       Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_create(BlockdevCreateOptions *create_options, Error **errp)
 {
     BlockdevCreateOptionsVmdk *opts;
 
@@ -2916,9 +2905,8 @@ static VmdkExtentInfo *vmdk_get_extent_info(VmdkExtent *extent)
     return info;
 }
 
-static int coroutine_fn vmdk_co_check(BlockDriverState *bs,
-                                      BdrvCheckResult *result,
-                                      BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_check(BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix)
 {
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;
index cfdea7db80a87bedc233f51e5f6fca64ea7ed2ea..b89b0ff8e27568da4aeb81a0e2e16ea70603a9c1 100644 (file)
@@ -610,7 +610,7 @@ vpc_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
     return 0;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vpc_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -660,7 +660,7 @@ fail:
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vpc_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -1005,13 +1005,13 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(vpc_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(vpc_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto out;
@@ -1087,10 +1087,9 @@ out:
     return ret;
 }
 
-static int coroutine_fn vpc_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vpc_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     QDict *qdict;
@@ -1117,8 +1116,8 @@ static int coroutine_fn vpc_co_create_opts(BlockDriver *drv,
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto fail;
index 0ceecfb6dfa82f63c661280e682008e12ebbe71a..4e7b8b1c06a2c4a57663c3b00216ceb67dfd1a46 100644 (file)
@@ -36,6 +36,7 @@ extern char **environ;
 #include "target_os_signal.h"
 #include "target.h"
 #include "exec/gdbstub.h"
+#include "qemu/clang-tsa.h"
 
 /*
  * This struct is used to hold certain information about the image.  Basically,
@@ -234,8 +235,8 @@ int target_msync(abi_ulong start, abi_ulong len, int flags);
 extern unsigned long last_brk;
 extern abi_ulong mmap_next_start;
 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size);
-void mmap_fork_start(void);
-void mmap_fork_end(int child);
+void TSA_NO_TSA mmap_fork_start(void);
+void TSA_NO_TSA mmap_fork_end(int child);
 
 /* main.c */
 extern char qemu_proc_pathname[];
diff --git a/configs/devices/x86_64-softmmu/x86_64-quintela-devices.mak b/configs/devices/x86_64-softmmu/x86_64-quintela-devices.mak
deleted file mode 100644 (file)
index ee2bb8c..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-# Boards:
-#
-CONFIG_ISAPC=n
-CONFIG_I440FX=n
-CONFIG_Q35=n
-CONFIG_MICROVM=y
-
diff --git a/configs/devices/x86_64-softmmu/x86_64-quintela2-devices.mak b/configs/devices/x86_64-softmmu/x86_64-quintela2-devices.mak
deleted file mode 100644 (file)
index f7e4dae..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-# Boards:
-#
-CONFIG_ISAPC=y
-CONFIG_I440FX=y
-CONFIG_Q35=y
-CONFIG_MICROVM=y
index 4249a37f65283af275a6900de52228dcd1186ce2..0a2322c249bccaebc42dbc2b47e6fdc7dce4d206 100644 (file)
@@ -3,3 +3,4 @@ TARGET_SYSTBL_ABI=common
 TARGET_SYSTBL=syscall.tbl
 TARGET_BIG_ENDIAN=y
 TARGET_HAS_BFLT=y
+TARGET_XML_FILES=gdb-xml/microblaze-core.xml gdb-xml/microblaze-stack-protect.xml
index 8385e2d33363b109482880c0a1f4e73357352bfb..e84c0cc7283bc6470c9c75f4336ffe3e4106b599 100644 (file)
@@ -2,3 +2,4 @@ TARGET_ARCH=microblaze
 TARGET_BIG_ENDIAN=y
 TARGET_SUPPORTS_MTTCG=y
 TARGET_NEED_FDT=y
+TARGET_XML_FILES=gdb-xml/microblaze-core.xml gdb-xml/microblaze-stack-protect.xml
index d0e775d8402bf6bbb98ecff1c93815b2df18bda5..270743156a9e4480f2bf3df91ee955ae151c9a9c 100644 (file)
@@ -2,3 +2,4 @@ TARGET_ARCH=microblaze
 TARGET_SYSTBL_ABI=common
 TARGET_SYSTBL=syscall.tbl
 TARGET_HAS_BFLT=y
+TARGET_XML_FILES=gdb-xml/microblaze-core.xml gdb-xml/microblaze-stack-protect.xml
index af40391f2f3d2d829aa1a53893335ab85b117975..9b688036bd3de4ecd2261828435103ba6078b0b7 100644 (file)
@@ -1,3 +1,4 @@
 TARGET_ARCH=microblaze
 TARGET_SUPPORTS_MTTCG=y
 TARGET_NEED_FDT=y
+TARGET_XML_FILES=gdb-xml/microblaze-core.xml gdb-xml/microblaze-stack-protect.xml
index 64960c6000f5613e00712c6203f1e7117163a916..cf6db3d5518c2f01cc4222a6c1f93c4c6eabd715 100755 (executable)
--- a/configure
+++ b/configure
@@ -1018,7 +1018,7 @@ cat << EOF
   debug-tcg       TCG debugging (default is disabled)
   debug-info      debugging information
   safe-stack      SafeStack Stack Smash Protection. Depends on
-                  clang/llvm >= 3.7 and requires coroutine backend ucontext.
+                  clang/llvm and requires coroutine backend ucontext.
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -1138,12 +1138,12 @@ fi
 cat > $TMPC << EOF
 #if defined(__clang_major__) && defined(__clang_minor__)
 # ifdef __apple_build_version__
-#  if __clang_major__ < 10 || (__clang_major__ == 10 && __clang_minor__ < 0)
-#   error You need at least XCode Clang v10.0 to compile QEMU
+#  if __clang_major__ < 12 || (__clang_major__ == 12 && __clang_minor__ < 0)
+#   error You need at least XCode Clang v12.0 to compile QEMU
 #  endif
 # else
-#  if __clang_major__ < 6 || (__clang_major__ == 6 && __clang_minor__ < 0)
-#   error You need at least Clang v6.0 to compile QEMU
+#  if __clang_major__ < 10 || (__clang_major__ == 10 && __clang_minor__ < 0)
+#   error You need at least Clang v10.0 to compile QEMU
 #  endif
 # endif
 #elif defined(__GNUC__) && defined(__GNUC_MINOR__)
@@ -1156,7 +1156,7 @@ cat > $TMPC << EOF
 int main (void) { return 0; }
 EOF
 if ! compile_prog "" "" ; then
-    error_exit "You need at least GCC v7.4 or Clang v6.0 (or XCode Clang v10.0)"
+    error_exit "You need at least GCC v7.4 or Clang v10.0 (or XCode Clang v12.0)"
 fi
 
 # Accumulate -Wfoo and -Wno-bar separately.
@@ -1184,6 +1184,7 @@ add_to warn_flags -Wendif-labels
 add_to warn_flags -Wexpansion-to-defined
 add_to warn_flags -Wimplicit-fallthrough=2
 add_to warn_flags -Wmissing-format-attribute
+add_to warn_flags -Wthread-safety
 
 nowarn_flags=
 add_to nowarn_flags -Wno-initializer-overrides
@@ -1261,19 +1262,6 @@ EOF
   fi
 fi
 
-# Disable -Wmissing-braces on older compilers that warn even for
-# the "universal" C zero initializer {0}.
-cat > $TMPC << EOF
-struct {
-  int a[2];
-} x = {0};
-EOF
-if compile_object "-Werror" "" ; then
-  :
-else
-  QEMU_CFLAGS="$QEMU_CFLAGS -Wno-missing-braces"
-fi
-
 # Our module code doesn't support Windows
 if test "$modules" = "yes" && test "$mingw32" = "yes" ; then
   error_exit "Modules are not available for Windows"
index 793364dc0ed529e92f73db077ce61f7006e8cf80..39f355de989f6192608d410768a9a9e464bcadd0 100644 (file)
@@ -192,6 +192,11 @@ void start_exclusive(void)
     CPUState *other_cpu;
     int running_cpus;
 
+    if (current_cpu->exclusive_context_count) {
+        current_cpu->exclusive_context_count++;
+        return;
+    }
+
     qemu_mutex_lock(&qemu_cpu_list_lock);
     exclusive_idle();
 
@@ -219,13 +224,16 @@ void start_exclusive(void)
      */
     qemu_mutex_unlock(&qemu_cpu_list_lock);
 
-    current_cpu->in_exclusive_context = true;
+    current_cpu->exclusive_context_count = 1;
 }
 
 /* Finish an exclusive operation.  */
 void end_exclusive(void)
 {
-    current_cpu->in_exclusive_context = false;
+    current_cpu->exclusive_context_count--;
+    if (current_cpu->exclusive_context_count) {
+        return;
+    }
 
     qemu_mutex_lock(&qemu_cpu_list_lock);
     qatomic_set(&pending_cpus, 0);
index b302d835d215b90ecfe5646b47822c59bbf93e70..1e98f44e0d172fefa57e8d5f128b88f2f02eb5c7 100644 (file)
@@ -493,6 +493,13 @@ qcrypto_tls_session_read(QCryptoTLSSession *session,
 }
 
 
+size_t
+qcrypto_tls_session_check_pending(QCryptoTLSSession *session)
+{
+    return gnutls_record_check_pending(session->handle);
+}
+
+
 int
 qcrypto_tls_session_handshake(QCryptoTLSSession *session,
                               Error **errp)
@@ -615,6 +622,13 @@ qcrypto_tls_session_read(QCryptoTLSSession *sess,
 }
 
 
+size_t
+qcrypto_tls_session_check_pending(QCryptoTLSSession *session)
+{
+    return 0;
+}
+
+
 int
 qcrypto_tls_session_handshake(QCryptoTLSSession *sess,
                               Error **errp)
index 1c1e7b9e11c3ef1b46a4b9bef93a1951ea49c01f..20b97c33109d25aee28af3599a898ce9acf4a117 100644 (file)
@@ -86,6 +86,38 @@ respective ports repository, while NetBSD will use the pkgsrc repository.
 For macOS, `Homebrew`_ will be used, although `MacPorts`_ is expected to carry
 similar versions.
 
+Some build dependencies may follow less conservative rules:
+
+Python runtime
+  Distributions with long-term support often provide multiple versions
+  of the Python runtime.  While QEMU will initially aim to support the
+  distribution's default runtime, it may later increase its minimum version
+  to any newer python that is available as an option from the vendor.
+  In this case, it will be necessary to use the ``--python`` command line
+  option of the ``configure`` script to point QEMU to a supported
+  version of the Python runtime.
+
+  As of QEMU |version|, the minimum supported version of Python is 3.6.
+
+Python build dependencies
+  Some of QEMU's build dependencies are written in Python.  Usually these
+  are only packaged by distributions for the default Python runtime.
+  If QEMU bumps its minimum Python version and a non-default runtime is
+  required, it may be necessary to fetch python modules from the Python
+  Package Index (PyPI) via ``pip``, in order to build QEMU.
+
+Optional build dependencies
+  Build components whose absence does not affect the ability to build
+  QEMU may not be available in distros, or may be too old for QEMU's
+  requirements.  Many of these, such as the Avocado testing framework
+  or various linters, are written in Python and therefore can also
+  be installed using ``pip``.  Cross compilers are another example
+  of optional build-time dependency; in this case it is possible to
+  download them from repositories such as EPEL, to use container-based
+  cross compilation using ``docker`` or ``podman``, or to use pre-built
+  binaries distributed with QEMU.
+
+
 Windows
 -------
 
index da2e6fe63d2e6a44a43d426410f5112416eca9ac..ee95bcb1a633f444ef8fa7dbdd85bae82ac55a61 100644 (file)
@@ -20,6 +20,20 @@ they were first deprecated in the 2.10.0 release.
 What follows is a list of all features currently marked as
 deprecated.
 
+Build options
+-------------
+
+``gprof`` builds (since 8.0)
+''''''''''''''''''''''''''''
+
+The ``--enable-gprof`` configure setting relies on compiler
+instrumentation to gather its data which can distort the generated
+profile. As other non-instrumenting tools are available that give a
+more holistic view of the system with non-instrumented binaries we are
+deprecating the build option and no longer defend it in CI. The
+``--enable-gcov`` build option remains for analysis test case
+coverage.
+
 System emulator command line arguments
 --------------------------------------
 
@@ -52,14 +66,6 @@ and will cause a warning.
 The replacement for the ``nodelay`` short-form boolean option is ``nodelay=on``
 rather than ``delay=off``.
 
-``-spice password=string`` (since 6.0)
-''''''''''''''''''''''''''''''''''''''
-
-This option is insecure because the SPICE password remains visible in
-the process listing. This is replaced by the new ``password-secret``
-option which lets the password be securely provided on the command
-line using a ``secret`` object instance.
-
 ``-smp`` ("parameter=0" SMP configurations) (since 6.2)
 '''''''''''''''''''''''''''''''''''''''''''''''''''''''
 
@@ -246,15 +252,6 @@ full SCSI support.  Use virtio-scsi instead when SCSI passthrough is required.
 Note this also applies to ``-device virtio-blk-pci,scsi=on|off``, which is an
 alias.
 
-``-device sga`` (since 6.2)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The ``sga`` device loads an option ROM for x86 targets which enables
-SeaBIOS to send messages to the serial console. SeaBIOS 1.11.0 onwards
-contains native support for this feature and thus use of the option
-ROM approach is obsolete. The native SeaBIOS support can be activated
-by using ``-machine graphics=off``.
-
 ``-device nvme-ns,eui64-default=on|off`` (since 7.1)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -296,6 +293,14 @@ The above, converted to the current supported format::
 
   json:{"file.driver":"rbd", "file.pool":"rbd", "file.image":"name"}
 
+``iscsi,password=xxx`` (since 8.0)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Specifying the iSCSI password in plain text on the command line using the
+``password`` option is insecure. The ``password-secret`` option should be
+used instead, to refer to a ``--object secret...`` instance that provides
+a password via a file, or encrypted.
+
 Backwards compatibility
 -----------------------
 
@@ -325,24 +330,6 @@ versions, aliases will point to newer CPU model versions
 depending on the machine type, so management software must
 resolve CPU model aliases before starting a virtual machine.
 
-Tools
------
-
-virtiofsd
-'''''''''
-
-There is a new Rust implementation of ``virtiofsd`` at
-``https://gitlab.com/virtio-fs/virtiofsd``;
-since this is now marked stable, new development should be done on that
-rather than the existing C version in the QEMU tree.
-The C version will still accept fixes and patches that
-are already in development for the moment, but will eventually
-be deleted from this tree.
-New deployments should use the Rust version, and existing systems
-should consider moving to it.  The command line and feature set
-is very close and moving should be simple.
-
-
 QEMU guest agent
 ----------------
 
index a17d0554d66cf5b0ea9d70330b9ab926cd30f25f..5b258b446b611832bbe4a77a15ce310d7c8bc169 100644 (file)
@@ -428,6 +428,13 @@ respectively. The actual backend names should be used instead.
 Use ``-drive if=pflash`` to configure the OTP device of the sifive_u
 RISC-V machine instead.
 
+``-spice password=string`` (removed in 8.0)
+'''''''''''''''''''''''''''''''''''''''''''
+
+This option was insecure because the SPICE password remained visible in
+the process listing. This was replaced by the new ``password-secret``
+option which lets the password be securely provided on the command
+line using a ``secret`` object instance.
 
 QEMU Machine Protocol (QMP) commands
 ------------------------------------
@@ -789,6 +796,16 @@ The 'ide-drive' device has been removed. Users should use 'ide-hd' or
 The 'scsi-disk' device has been removed. Users should use 'scsi-hd' or
 'scsi-cd' as appropriate to get a SCSI hard disk or CD-ROM as needed.
 
+``sga`` (removed in 8.0)
+''''''''''''''''''''''''
+
+The ``sga`` device loaded an option ROM for x86 targets which enabled
+SeaBIOS to send messages to the serial console. SeaBIOS 1.11.0 onwards
+contains native support for this feature and thus use of the option
+ROM approach was obsolete. The native SeaBIOS support can be activated
+by using ``-machine graphics=off``.
+
+
 Related binaries
 ----------------
 
@@ -872,3 +889,16 @@ The VXHS code did not compile since v2.12.0. It was removed in 5.1.
 The corresponding upstream server project is no longer maintained.
 Users are recommended to switch to an alternative distributed block
 device driver such as RBD.
+
+Tools
+-----
+
+virtiofsd (removed in 8.0)
+''''''''''''''''''''''''''
+
+There is a newer Rust implementation of ``virtiofsd`` at
+``https://gitlab.com/virtio-fs/virtiofsd``; this has been
+stable for some time and is now widely used.
+The command line and feature set is very close to the removed
+C implementation.
+
index 73a287a4f27cb450a5976891b89d9b0c32124072..00767b0e24fbc1c5ffa0b0f34aa9f36dc26f9bc5 100644 (file)
@@ -290,10 +290,6 @@ man_pages = [
     ('tools/virtfs-proxy-helper', 'virtfs-proxy-helper',
      'QEMU 9p virtfs proxy filesystem helper',
      ['M. Mohan Kumar'], 1),
-    ('tools/virtiofsd', 'virtiofsd',
-     'QEMU virtio-fs shared file system daemon',
-     ['Stefan Hajnoczi <stefanha@redhat.com>',
-      'Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>'], 1),
 ]
 man_make_section_directory = False
 
index 715330c85613ff9ccab657814d9aca607556f8d4..3bfcb33fc4b56b619e6bd53c6fe877da9b098b18 100644 (file)
@@ -19,11 +19,6 @@ responsibility to ensure that state is reset between fuzzing-runs.
 Building the fuzzers
 --------------------
 
-*NOTE*: If possible, build a 32-bit binary. When forking, the 32-bit fuzzer is
-much faster, since the page-map has a smaller size. This is due to the fact that
-AddressSanitizer maps ~20TB of memory, as part of its detection. This results
-in a large page-map, and a much slower ``fork()``.
-
 To build the fuzzers, install a recent version of clang:
 Configure with (substitute the clang binaries with the version you installed).
 Here, enable-sanitizers, is optional but it allows us to reliably detect bugs
@@ -296,10 +291,9 @@ input. It is also responsible for manually calling ``main_loop_wait`` to ensure
 that bottom halves are executed and any cleanup required before the next input.
 
 Since the same process is reused for many fuzzing runs, QEMU state needs to
-be reset at the end of each run. There are currently two implemented
-options for resetting state:
+be reset at the end of each run. For example, this can be done by rebooting the
+VM, after each run.
 
-- Reboot the guest between runs.
   - *Pros*: Straightforward and fast for simple fuzz targets.
 
   - *Cons*: Depending on the device, does not reset all device state. If the
@@ -308,15 +302,3 @@ options for resetting state:
     reboot.
 
   - *Example target*: ``i440fx-qtest-reboot-fuzz``
-
-- Run each test case in a separate forked process and copy the coverage
-   information back to the parent. This is fairly similar to AFL's "deferred"
-   fork-server mode [3]
-
-  - *Pros*: Relatively fast. Devices only need to be initialized once. No need to
-    do slow reboots or vmloads.
-
-  - *Cons*: Not officially supported by libfuzzer. Does not work well for
-     devices that rely on dedicated threads.
-
-  - *Example target*: ``virtio-net-fork-fuzz``
index 5edc49aa7497386894a8a32f10401ce1b4059bbb..23e7f2fb1c372fd91c3a4e70a130b535c3486b45 100644 (file)
@@ -685,9 +685,10 @@ change in the QMP syntax (usually by allowing values or operations
 that previously resulted in an error).  QMP clients may still need to
 know whether the extension is available.
 
-For this purpose, a list of features can be specified for a command or
-struct type.  Each list member can either be ``{ 'name': STRING, '*if':
-COND }``, or STRING, which is shorthand for ``{ 'name': STRING }``.
+For this purpose, a list of features can be specified for definitions,
+enumeration values, and struct members.  Each feature list member can
+either be ``{ 'name': STRING, '*if': COND }``, or STRING, which is
+shorthand for ``{ 'name': STRING }``.
 
 The optional 'if' member specifies a conditional.  See `Configuring
 the schema`_ below for more on this.
@@ -817,8 +818,8 @@ member 'bar' ::
 
 A union's discriminator may not be conditional.
 
-Likewise, individual enumeration values be conditional.  This requires
-the longhand form of ENUM-VALUE_.
+Likewise, individual enumeration values may be conditional.  This
+requires the longhand form of ENUM-VALUE_.
 
 Example: an enum type with unconditional value 'foo' and conditional
 value 'bar' ::
@@ -1157,9 +1158,8 @@ Example: the SchemaInfo for EVENT_C from section Events_ ::
     Type "q_obj-EVENT_C-arg" is an implicitly defined object type with
     the two members from the event's definition.
 
-The SchemaInfo for struct and union types has meta-type "object".
-
-The SchemaInfo for a struct type has variant member "members".
+The SchemaInfo for struct and union types has meta-type "object" and
+variant member "members".
 
 The SchemaInfo for a union type additionally has variant members "tag"
 and "variants".
index 673057c90d8914ea4a4b4566c32f7d4f088a9abb..c214c73e2818464447a746dd4dd4378edf4d8cb9 100644 (file)
@@ -7,46 +7,43 @@ the guest is running on source host and restoring this saved state on the
 destination host. This document details how saving and restoring of VFIO
 devices is done in QEMU.
 
-Migration of VFIO devices consists of two phases: the optional pre-copy phase,
-and the stop-and-copy phase. The pre-copy phase is iterative and allows to
-accommodate VFIO devices that have a large amount of data that needs to be
-transferred. The iterative pre-copy phase of migration allows for the guest to
-continue whilst the VFIO device state is transferred to the destination, this
-helps to reduce the total downtime of the VM. VFIO devices can choose to skip
-the pre-copy phase of migration by returning pending_bytes as zero during the
-pre-copy phase.
+Migration of VFIO devices currently consists of a single stop-and-copy phase.
+During the stop-and-copy phase the guest is stopped and the entire VFIO device
+data is transferred to the destination.
+
+The pre-copy phase of migration is currently not supported for VFIO devices.
+Support for VFIO pre-copy will be added later on.
+
+Note that currently VFIO migration is supported only for a single device. This
+is due to VFIO migration's lack of P2P support. However, P2P support is planned
+to be added later on.
 
 A detailed description of the UAPI for VFIO device migration can be found in
-the comment for the ``vfio_device_migration_info`` structure in the header
-file linux-headers/linux/vfio.h.
+the comment for the ``vfio_device_mig_state`` structure in the header file
+linux-headers/linux/vfio.h.
 
 VFIO implements the device hooks for the iterative approach as follows:
 
-* A ``save_setup`` function that sets up the migration region and sets _SAVING
-  flag in the VFIO device state.
+* A ``save_setup`` function that sets up migration on the source.
 
-* A ``load_setup`` function that sets up the migration region on the
-  destination and sets _RESUMING flag in the VFIO device state.
+* A ``load_setup`` function that sets the VFIO device on the destination in
+  _RESUMING state.
 
 * A ``state_pending_exact`` function that reads pending_bytes from the vendor
   driver, which indicates the amount of data that the vendor driver has yet to
   save for the VFIO device.
 
-* A ``save_live_iterate`` function that reads the VFIO device's data from the
-  vendor driver through the migration region during iterative phase.
-
 * A ``save_state`` function to save the device config space if it is present.
 
-* A ``save_live_complete_precopy`` function that resets _RUNNING flag from the
-  VFIO device state and iteratively copies the remaining data for the VFIO
-  device until the vendor driver indicates that no data remains (pending bytes
-  is zero).
+* A ``save_live_complete_precopy`` function that sets the VFIO device in
+  _STOP_COPY state and iteratively copies the data for the VFIO device until
+  the vendor driver indicates that no data remains.
 
 * A ``load_state`` function that loads the config section and the data
-  sections that are generated by the save functions above
+  sections that are generated by the save functions above.
 
 * ``cleanup`` functions for both save and load that perform any migration
-  related cleanup, including unmapping the migration region
+  related cleanup.
 
 
 The VFIO migration code uses a VM state change handler to change the VFIO
@@ -71,13 +68,13 @@ tracking can identify dirtied pages, but any page pinned by the vendor driver
 can also be written by the device. There is currently no device or IOMMU
 support for dirty page tracking in hardware.
 
-By default, dirty pages are tracked when the device is in pre-copy as well as
-stop-and-copy phase. So, a page pinned by the vendor driver will be copied to
-the destination in both phases. Copying dirty pages in pre-copy phase helps
-QEMU to predict if it can achieve its downtime tolerances. If QEMU during
-pre-copy phase keeps finding dirty pages continuously, then it understands
-that even in stop-and-copy phase, it is likely to find dirty pages and can
-predict the downtime accordingly.
+By default, dirty pages are tracked during pre-copy as well as stop-and-copy
+phase. So, a page pinned by the vendor driver will be copied to the destination
+in both phases. Copying dirty pages in pre-copy phase helps QEMU to predict if
+it can achieve its downtime tolerances. If QEMU during pre-copy phase keeps
+finding dirty pages continuously, then it understands that even in stop-and-copy
+phase, it is likely to find dirty pages and can predict the downtime
+accordingly.
 
 QEMU also provides a per device opt-out option ``pre-copy-dirty-page-tracking``
 which disables querying the dirty bitmap during pre-copy phase. If it is set to
@@ -111,23 +108,22 @@ Live migration save path
                                   |
                      migrate_init spawns migration_thread
                 Migration thread then calls each device's .save_setup()
-                    (RUNNING, _SETUP, _RUNNING|_SAVING)
+                       (RUNNING, _SETUP, _RUNNING)
                                   |
-                    (RUNNING, _ACTIVE, _RUNNING|_SAVING)
+                      (RUNNING, _ACTIVE, _RUNNING)
              If device is active, get pending_bytes by .state_pending_exact()
           If total pending_bytes >= threshold_size, call .save_live_iterate()
-                  Data of VFIO device for pre-copy phase is copied
         Iterate till total pending bytes converge and are less than threshold
                                   |
   On migration completion, vCPU stops and calls .save_live_complete_precopy for
-   each active device. The VFIO device is then transitioned into _SAVING state
-                   (FINISH_MIGRATE, _DEVICE, _SAVING)
+  each active device. The VFIO device is then transitioned into _STOP_COPY state
+                  (FINISH_MIGRATE, _DEVICE, _STOP_COPY)
                                   |
      For the VFIO device, iterate in .save_live_complete_precopy until
                          pending data is 0
-                   (FINISH_MIGRATE, _DEVICE, _STOPPED)
+                   (FINISH_MIGRATE, _DEVICE, _STOP)
                                   |
-                 (FINISH_MIGRATE, _COMPLETED, _STOPPED)
+                 (FINISH_MIGRATE, _COMPLETED, _STOP)
              Migraton thread schedules cleanup bottom half and exits
 
 Live migration resume path
@@ -136,7 +132,7 @@ Live migration resume path
 ::
 
               Incoming migration calls .load_setup for each device
-                       (RESTORE_VM, _ACTIVE, _STOPPED)
+                       (RESTORE_VM, _ACTIVE, _STOP)
                                  |
        For each device, .load_state is called for that device section data
                        (RESTORE_VM, _ACTIVE, _RESUMING)
index 9136fed3b7303c914d01d9d2c4994b7df33b353f..bbcdccce68a8dcbda552cf96aa0443d294da77b6 100644 (file)
@@ -48,7 +48,6 @@ if build_docs
         'qemu-storage-daemon.1': (have_tools ? 'man1' : ''),
         'qemu-trace-stap.1': (stap.found() ? 'man1' : ''),
         'virtfs-proxy-helper.1': (have_virtfs_proxy_helper ? 'man1' : ''),
-        'virtiofsd.1': (have_virtiofsd ? 'man1' : ''),
         'qemu.1': 'man1',
         'qemu-block-drivers.7': 'man7',
         'qemu-cpu-models.7': 'man7'
index c38df32bde076c68bf123876cf1560188a3f4dc1..0424cae4b01ce1ba9ed61286481b36173d536bca 100644 (file)
@@ -49,6 +49,7 @@ Supported devices
  * SMBus controller (SMBF)
  * Ethernet controller (EMC)
  * Tachometer
+ * Peripheral SPI controller (PSPI)
 
 Missing devices
 ---------------
@@ -64,7 +65,6 @@ Missing devices
 
  * Ethernet controller (GMAC)
  * USB device (USBD)
- * Peripheral SPI controller (PSPI)
  * SD/MMC host
  * PECI interface
  * PCI and PCIe root complex and bridges
index 2151adcf78441a20eb8f1c5bce2768d7db9f3133..8e65ce0dfc7b56ab932863cb68c9789f48d13593 100644 (file)
@@ -16,4 +16,3 @@ command line utilities and other standalone programs.
    qemu-pr-helper
    qemu-trace-stap
    virtfs-proxy-helper
-   virtiofsd
diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst
deleted file mode 100644 (file)
index 995a754..0000000
+++ /dev/null
@@ -1,403 +0,0 @@
-QEMU virtio-fs shared file system daemon
-========================================
-
-Synopsis
---------
-
-**virtiofsd** [*OPTIONS*]
-
-Description
------------
-
-Share a host directory tree with a guest through a virtio-fs device.  This
-program is a vhost-user backend that implements the virtio-fs device.  Each
-virtio-fs device instance requires its own virtiofsd process.
-
-This program is designed to work with QEMU's ``--device vhost-user-fs-pci``
-but should work with any virtual machine monitor (VMM) that supports
-vhost-user.  See the Examples section below.
-
-This program must be run as the root user.  The program drops privileges where
-possible during startup although it must be able to create and access files
-with any uid/gid:
-
-* The ability to invoke syscalls is limited using seccomp(2).
-* Linux capabilities(7) are dropped.
-
-In "namespace" sandbox mode the program switches into a new file system
-namespace and invokes pivot_root(2) to make the shared directory tree its root.
-A new pid and net namespace is also created to isolate the process.
-
-In "chroot" sandbox mode the program invokes chroot(2) to make the shared
-directory tree its root. This mode is intended for container environments where
-the container runtime has already set up the namespaces and the program does
-not have permission to create namespaces itself.
-
-Both sandbox modes prevent "file system escapes" due to symlinks and other file
-system objects that might lead to files outside the shared directory.
-
-Options
--------
-
-.. program:: virtiofsd
-
-.. option:: -h, --help
-
-  Print help.
-
-.. option:: -V, --version
-
-  Print version.
-
-.. option:: -d
-
-  Enable debug output.
-
-.. option:: --syslog
-
-  Print log messages to syslog instead of stderr.
-
-.. option:: -o OPTION
-
-  * debug -
-    Enable debug output.
-
-  * flock|no_flock -
-    Enable/disable flock.  The default is ``no_flock``.
-
-  * modcaps=CAPLIST
-    Modify the list of capabilities allowed; CAPLIST is a colon separated
-    list of capabilities, each preceded by either + or -, e.g.
-    ''+sys_admin:-chown''.
-
-  * log_level=LEVEL -
-    Print only log messages matching LEVEL or more severe.  LEVEL is one of
-    ``err``, ``warn``, ``info``, or ``debug``.  The default is ``info``.
-
-  * posix_lock|no_posix_lock -
-    Enable/disable remote POSIX locks.  The default is ``no_posix_lock``.
-
-  * readdirplus|no_readdirplus -
-    Enable/disable readdirplus.  The default is ``readdirplus``.
-
-  * sandbox=namespace|chroot -
-    Sandbox mode:
-    - namespace: Create mount, pid, and net namespaces and pivot_root(2) into
-    the shared directory.
-    - chroot: chroot(2) into shared directory (use in containers).
-    The default is "namespace".
-
-  * source=PATH -
-    Share host directory tree located at PATH.  This option is required.
-
-  * timeout=TIMEOUT -
-    I/O timeout in seconds.  The default depends on cache= option.
-
-  * writeback|no_writeback -
-    Enable/disable writeback cache. The cache allows the FUSE client to buffer
-    and merge write requests.  The default is ``no_writeback``.
-
-  * xattr|no_xattr -
-    Enable/disable extended attributes (xattr) on files and directories.  The
-    default is ``no_xattr``.
-
-  * posix_acl|no_posix_acl -
-    Enable/disable posix acl support.  Posix ACLs are disabled by default.
-
-  * security_label|no_security_label -
-    Enable/disable security label support. Security labels are disabled by
-    default. This will allow client to send a MAC label of file during
-    file creation. Typically this is expected to be SELinux security
-    label. Server will try to set that label on newly created file
-    atomically wherever possible.
-
-  * killpriv_v2|no_killpriv_v2 -
-    Enable/disable ``FUSE_HANDLE_KILLPRIV_V2`` support. KILLPRIV_V2 is enabled
-    by default as long as the client supports it. Enabling this option helps
-    with performance in write path.
-
-.. option:: --socket-path=PATH
-
-  Listen on vhost-user UNIX domain socket at PATH.
-
-.. option:: --socket-group=GROUP
-
-  Set the vhost-user UNIX domain socket gid to GROUP.
-
-.. option:: --fd=FDNUM
-
-  Accept connections from vhost-user UNIX domain socket file descriptor FDNUM.
-  The file descriptor must already be listening for connections.
-
-.. option:: --thread-pool-size=NUM
-
-  Restrict the number of worker threads per request queue to NUM.  The default
-  is 0.
-
-.. option:: --cache=none|auto|always
-
-  Select the desired trade-off between coherency and performance.  ``none``
-  forbids the FUSE client from caching to achieve best coherency at the cost of
-  performance.  ``auto`` acts similar to NFS with a 1 second metadata cache
-  timeout.  ``always`` sets a long cache lifetime at the expense of coherency.
-  The default is ``auto``.
-
-Extended attribute (xattr) mapping
-----------------------------------
-
-By default the name of xattr's used by the client are passed through to the server
-file system.  This can be a problem where either those xattr names are used
-by something on the server (e.g. selinux client/server confusion) or if the
-``virtiofsd`` is running in a container with restricted privileges where it
-cannot access some attributes.
-
-Mapping syntax
-~~~~~~~~~~~~~~
-
-A mapping of xattr names can be made using -o xattrmap=mapping where the ``mapping``
-string consists of a series of rules.
-
-The first matching rule terminates the mapping.
-The set of rules must include a terminating rule to match any remaining attributes
-at the end.
-
-Each rule consists of a number of fields separated with a separator that is the
-first non-white space character in the rule.  This separator must then be used
-for the whole rule.
-White space may be added before and after each rule.
-
-Using ':' as the separator a rule is of the form:
-
-``:type:scope:key:prepend:``
-
-**scope** is:
-
-- 'client' - match 'key' against a xattr name from the client for
-             setxattr/getxattr/removexattr
-- 'server' - match 'prepend' against a xattr name from the server
-             for listxattr
-- 'all' - can be used to make a single rule where both the server
-          and client matches are triggered.
-
-**type** is one of:
-
-- 'prefix' - is designed to prepend and strip a prefix;  the modified
-  attributes then being passed on to the client/server.
-
-- 'ok' - Causes the rule set to be terminated when a match is found
-  while allowing matching xattr's through unchanged.
-  It is intended both as a way of explicitly terminating
-  the list of rules, and to allow some xattr's to skip following rules.
-
-- 'bad' - If a client tries to use a name matching 'key' it's
-  denied using EPERM; when the server passes an attribute
-  name matching 'prepend' it's hidden.  In many ways it's use is very like
-  'ok' as either an explicit terminator or for special handling of certain
-  patterns.
-
-- 'unsupported' - If a client tries to use a name matching 'key' it's
-  denied using ENOTSUP; when the server passes an attribute
-  name matching 'prepend' it's hidden.  In many ways it's use is very like
-  'ok' as either an explicit terminator or for special handling of certain
-  patterns.
-
-**key** is a string tested as a prefix on an attribute name originating
-on the client.  It maybe empty in which case a 'client' rule
-will always match on client names.
-
-**prepend** is a string tested as a prefix on an attribute name originating
-on the server, and used as a new prefix.  It may be empty
-in which case a 'server' rule will always match on all names from
-the server.
-
-e.g.:
-
-  ``:prefix:client:trusted.:user.virtiofs.:``
-
-  will match 'trusted.' attributes in client calls and prefix them before
-  passing them to the server.
-
-  ``:prefix:server::user.virtiofs.:``
-
-  will strip 'user.virtiofs.' from all server replies.
-
-  ``:prefix:all:trusted.:user.virtiofs.:``
-
-  combines the previous two cases into a single rule.
-
-  ``:ok:client:user.::``
-
-  will allow get/set xattr for 'user.' xattr's and ignore
-  following rules.
-
-  ``:ok:server::security.:``
-
-  will pass 'security.' xattr's in listxattr from the server
-  and ignore following rules.
-
-  ``:ok:all:::``
-
-  will terminate the rule search passing any remaining attributes
-  in both directions.
-
-  ``:bad:server::security.:``
-
-  would hide 'security.' xattr's in listxattr from the server.
-
-A simpler 'map' type provides a shorter syntax for the common case:
-
-``:map:key:prepend:``
-
-The 'map' type adds a number of separate rules to add **prepend** as a prefix
-to the matched **key** (or all attributes if **key** is empty).
-There may be at most one 'map' rule and it must be the last rule in the set.
-
-Note: When the 'security.capability' xattr is remapped, the daemon has to do
-extra work to remove it during many operations, which the host kernel normally
-does itself.
-
-Security considerations
-~~~~~~~~~~~~~~~~~~~~~~~
-
-Operating systems typically partition the xattr namespace using
-well defined name prefixes. Each partition may have different
-access controls applied. For example, on Linux there are multiple
-partitions
-
- * ``system.*`` - access varies depending on attribute & filesystem
- * ``security.*`` - only processes with CAP_SYS_ADMIN
- * ``trusted.*`` - only processes with CAP_SYS_ADMIN
- * ``user.*`` - any process granted by file permissions / ownership
-
-While other OS such as FreeBSD have different name prefixes
-and access control rules.
-
-When remapping attributes on the host, it is important to
-ensure that the remapping does not allow a guest user to
-evade the guest access control rules.
-
-Consider if ``trusted.*`` from the guest was remapped to
-``user.virtiofs.trusted*`` in the host. An unprivileged
-user in a Linux guest has the ability to write to xattrs
-under ``user.*``. Thus the user can evade the access
-control restriction on ``trusted.*`` by instead writing
-to ``user.virtiofs.trusted.*``.
-
-As noted above, the partitions used and access controls
-applied, will vary across guest OS, so it is not wise to
-try to predict what the guest OS will use.
-
-The simplest way to avoid an insecure configuration is
-to remap all xattrs at once, to a given fixed prefix.
-This is shown in example (1) below.
-
-If selectively mapping only a subset of xattr prefixes,
-then rules must be added to explicitly block direct
-access to the target of the remapping. This is shown
-in example (2) below.
-
-Mapping examples
-~~~~~~~~~~~~~~~~
-
-1) Prefix all attributes with 'user.virtiofs.'
-
-::
-
- -o xattrmap=":prefix:all::user.virtiofs.::bad:all:::"
-
-
-This uses two rules, using : as the field separator;
-the first rule prefixes and strips 'user.virtiofs.',
-the second rule hides any non-prefixed attributes that
-the host set.
-
-This is equivalent to the 'map' rule:
-
-::
-
- -o xattrmap=":map::user.virtiofs.:"
-
-2) Prefix 'trusted.' attributes, allow others through
-
-::
-
-   "/prefix/all/trusted./user.virtiofs./
-    /bad/server//trusted./
-    /bad/client/user.virtiofs.//
-    /ok/all///"
-
-
-Here there are four rules, using / as the field
-separator, and also demonstrating that new lines can
-be included between rules.
-The first rule is the prefixing of 'trusted.' and
-stripping of 'user.virtiofs.'.
-The second rule hides unprefixed 'trusted.' attributes
-on the host.
-The third rule stops a guest from explicitly setting
-the 'user.virtiofs.' path directly to prevent access
-control bypass on the target of the earlier prefix
-remapping.
-Finally, the fourth rule lets all remaining attributes
-through.
-
-This is equivalent to the 'map' rule:
-
-::
-
- -o xattrmap="/map/trusted./user.virtiofs./"
-
-3) Hide 'security.' attributes, and allow everything else
-
-::
-
-    "/bad/all/security./security./
-     /ok/all///'
-
-The first rule combines what could be separate client and server
-rules into a single 'all' rule, matching 'security.' in either
-client arguments or lists returned from the host.  This stops
-the client seeing any 'security.' attributes on the server and
-stops it setting any.
-
-SELinux support
----------------
-One can enable support for SELinux by running virtiofsd with option
-"-o security_label". But this will try to save guest's security context
-in xattr security.selinux on host and it might fail if host's SELinux
-policy does not permit virtiofsd to do this operation.
-
-Hence, it is preferred to remap guest's "security.selinux" xattr to say
-"trusted.virtiofs.security.selinux" on host.
-
-"-o xattrmap=:map:security.selinux:trusted.virtiofs.:"
-
-This will make sure that guest and host's SELinux xattrs on same file
-remain separate and not interfere with each other. And will allow both
-host and guest to implement their own separate SELinux policies.
-
-Setting trusted xattr on host requires CAP_SYS_ADMIN. So one will need
-add this capability to daemon.
-
-"-o modcaps=+sys_admin"
-
-Giving CAP_SYS_ADMIN increases the risk on system. Now virtiofsd is more
-powerful and if gets compromised, it can do lot of damage to host system.
-So keep this trade-off in my mind while making a decision.
-
-Examples
---------
-
-Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket
-``/var/run/vm001-vhost-fs.sock``:
-
-.. parsed-literal::
-
-  host# virtiofsd --socket-path=/var/run/vm001-vhost-fs.sock -o source=/var/lib/fs/vm001
-  host# |qemu_system| \\
-        -chardev socket,id=char0,path=/var/run/vm001-vhost-fs.sock \\
-        -device vhost-user-fs-pci,chardev=char0,tag=myfs \\
-        -object memory-backend-memfd,id=mem,size=4G,share=on \\
-        -numa node,memdev=mem \\
-        ...
-  guest# mount -t virtiofs myfs /mnt
index 279b07f09b184da5f86142bebc634db3b9b353f2..1362810991c968e112f75717ca0f09be9198c2bb 100644 (file)
@@ -1854,7 +1854,8 @@ static void dump_init(DumpState *s, int fd, bool has_format,
      */
     ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
     if (ret < 0) {
-        error_setg(errp, QERR_UNSUPPORTED);
+        error_setg(errp,
+                   "dumping guest memory is not supported on this target");
         goto cleanup;
     }
 
@@ -1864,10 +1865,7 @@ static void dump_init(DumpState *s, int fd, bool has_format,
 
     s->note_size = cpu_get_note_size(s->dump_info.d_class,
                                      s->dump_info.d_machine, nr_cpus);
-    if (s->note_size < 0) {
-        error_setg(errp, QERR_UNSUPPORTED);
-        goto cleanup;
-    }
+    assert(s->note_size >= 0);
 
     /*
      * The goal of this block is to (a) update the previously guessed
diff --git a/gdb-xml/microblaze-core.xml b/gdb-xml/microblaze-core.xml
new file mode 100644 (file)
index 0000000..becf77c
--- /dev/null
@@ -0,0 +1,67 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2008 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.microblaze.core">
+  <reg name="r0" bitsize="32" regnum="0"/>
+  <reg name="r1" bitsize="32" type="data_ptr"/>
+  <reg name="r2" bitsize="32"/>
+  <reg name="r3" bitsize="32"/>
+  <reg name="r4" bitsize="32"/>
+  <reg name="r5" bitsize="32"/>
+  <reg name="r6" bitsize="32"/>
+  <reg name="r7" bitsize="32"/>
+  <reg name="r8" bitsize="32"/>
+  <reg name="r9" bitsize="32"/>
+  <reg name="r10" bitsize="32"/>
+  <reg name="r11" bitsize="32"/>
+  <reg name="r12" bitsize="32"/>
+  <reg name="r13" bitsize="32"/>
+  <reg name="r14" bitsize="32"/>
+  <reg name="r15" bitsize="32"/>
+  <reg name="r16" bitsize="32"/>
+  <reg name="r17" bitsize="32"/>
+  <reg name="r18" bitsize="32"/>
+  <reg name="r19" bitsize="32"/>
+  <reg name="r20" bitsize="32"/>
+  <reg name="r21" bitsize="32"/>
+  <reg name="r22" bitsize="32"/>
+  <reg name="r23" bitsize="32"/>
+  <reg name="r24" bitsize="32"/>
+  <reg name="r25" bitsize="32"/>
+  <reg name="r26" bitsize="32"/>
+  <reg name="r27" bitsize="32"/>
+  <reg name="r28" bitsize="32"/>
+  <reg name="r29" bitsize="32"/>
+  <reg name="r30" bitsize="32"/>
+  <reg name="r31" bitsize="32"/>
+  <reg name="rpc" bitsize="32" type="code_ptr"/>
+  <reg name="rmsr" bitsize="32"/>
+  <reg name="rear" bitsize="32"/>
+  <reg name="resr" bitsize="32"/>
+  <reg name="rfsr" bitsize="32"/>
+  <reg name="rbtr" bitsize="32"/>
+  <reg name="rpvr0" bitsize="32"/>
+  <reg name="rpvr1" bitsize="32"/>
+  <reg name="rpvr2" bitsize="32"/>
+  <reg name="rpvr3" bitsize="32"/>
+  <reg name="rpvr4" bitsize="32"/>
+  <reg name="rpvr5" bitsize="32"/>
+  <reg name="rpvr6" bitsize="32"/>
+  <reg name="rpvr7" bitsize="32"/>
+  <reg name="rpvr8" bitsize="32"/>
+  <reg name="rpvr9" bitsize="32"/>
+  <reg name="rpvr10" bitsize="32"/>
+  <reg name="rpvr11" bitsize="32"/>
+  <reg name="redr" bitsize="32"/>
+  <reg name="rpid" bitsize="32"/>
+  <reg name="rzpr" bitsize="32"/>
+  <reg name="rtlbx" bitsize="32"/>
+  <reg name="rtlbsx" bitsize="32"/>
+  <reg name="rtlblo" bitsize="32"/>
+  <reg name="rtlbhi" bitsize="32"/>
+</feature>
diff --git a/gdb-xml/microblaze-stack-protect.xml b/gdb-xml/microblaze-stack-protect.xml
new file mode 100644 (file)
index 0000000..997301e
--- /dev/null
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2008 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.microblaze.stack-protect">
+  <reg name="rslr" bitsize="32"/>
+  <reg name="rshr" bitsize="32"/>
+</feature>
index 4c9d081ed48e10fdf6a56d81a442ed66acd44253..e268ce9b1a9c7d9199ba776f084b8f199cd4f5c5 100644 (file)
  */
 
 #include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "hw/acpi/acpi.h"
 
 void acpi_table_add(const QemuOpts *opts, Error **errp)
 {
-    error_setg(errp, QERR_UNSUPPORTED);
+    g_assert_not_reached();
 }
index 0c9f158ac9e15eaee5ba82354c9a63f7de9f55bd..a39315c1b353ebcfe81452650de9b92c3c7d9784 100644 (file)
@@ -12,7 +12,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "qapi/qapi-commands-machine.h"
 #include "qemu/module.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/aml-build.h"
@@ -244,20 +243,3 @@ static void vmgenid_register_types(void)
 }
 
 type_init(vmgenid_register_types)
-
-GuidInfo *qmp_query_vm_generation_id(Error **errp)
-{
-    GuidInfo *info;
-    VmGenIdState *vms;
-    Object *obj = find_vmgenid_dev();
-
-    if (!obj) {
-        error_setg(errp, "VM Generation ID device not found");
-        return NULL;
-    }
-    vms = VMGENID(obj);
-
-    info = g_malloc0(sizeof(*info));
-    info->guid = qemu_uuid_unparse_strdup(&vms->guid);
-    return info;
-}
index 2d157de9b8b42694b034c43401e00695f50b1826..b5aed4aff565308d4e0af5b26815a4aa70bf4c4f 100644 (file)
@@ -389,6 +389,7 @@ config XLNX_ZYNQMP_ARM
     select XLNX_CSU_DMA
     select XLNX_ZYNQMP
     select XLNX_ZDMA
+    select USB_DWC3
 
 config XLNX_VERSAL
     bool
index d85cc0276511b4ecbeae987fb49dc2d2c6b4738e..15ff21d04722b1be4f8707ab61ea320442b72494 100644 (file)
@@ -86,6 +86,8 @@ enum NPCM7xxInterrupt {
     NPCM7XX_EMC1RX_IRQ          = 15,
     NPCM7XX_EMC1TX_IRQ,
     NPCM7XX_MMC_IRQ             = 26,
+    NPCM7XX_PSPI2_IRQ           = 28,
+    NPCM7XX_PSPI1_IRQ           = 31,
     NPCM7XX_TIMER0_IRQ          = 32,   /* Timer Module 0 */
     NPCM7XX_TIMER1_IRQ,
     NPCM7XX_TIMER2_IRQ,
@@ -220,6 +222,12 @@ static const hwaddr npcm7xx_emc_addr[] = {
     0xf0826000,
 };
 
+/* Register base address for each PSPI Module */
+static const hwaddr npcm7xx_pspi_addr[] = {
+    0xf0200000,
+    0xf0201000,
+};
+
 static const struct {
     hwaddr regs_addr;
     uint32_t unconnected_pins;
@@ -444,6 +452,10 @@ static void npcm7xx_init(Object *obj)
         object_initialize_child(obj, "emc[*]", &s->emc[i], TYPE_NPCM7XX_EMC);
     }
 
+    for (i = 0; i < ARRAY_SIZE(s->pspi); i++) {
+        object_initialize_child(obj, "pspi[*]", &s->pspi[i], TYPE_NPCM_PSPI);
+    }
+
     object_initialize_child(obj, "mmc", &s->mmc, TYPE_NPCM7XX_SDHCI);
 }
 
@@ -715,6 +727,17 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->mmc), 0,
             npcm7xx_irq(s, NPCM7XX_MMC_IRQ));
 
+    /* PSPI */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm7xx_pspi_addr) != ARRAY_SIZE(s->pspi));
+    for (i = 0; i < ARRAY_SIZE(s->pspi); i++) {
+        SysBusDevice *sbd = SYS_BUS_DEVICE(&s->pspi[i]);
+        int irq = (i == 0) ? NPCM7XX_PSPI1_IRQ : NPCM7XX_PSPI2_IRQ;
+
+        sysbus_realize(sbd, &error_abort);
+        sysbus_mmio_map(sbd, 0, npcm7xx_pspi_addr[i]);
+        sysbus_connect_irq(sbd, 0, npcm7xx_irq(s, irq));
+    }
+
     create_unimplemented_device("npcm7xx.shm",          0xc0001000,   4 * KiB);
     create_unimplemented_device("npcm7xx.vdmx",         0xe0800000,   4 * KiB);
     create_unimplemented_device("npcm7xx.pcierc",       0xe1000000,  64 * KiB);
@@ -724,8 +747,6 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
     create_unimplemented_device("npcm7xx.peci",         0xf0100000,   4 * KiB);
     create_unimplemented_device("npcm7xx.siox[1]",      0xf0101000,   4 * KiB);
     create_unimplemented_device("npcm7xx.siox[2]",      0xf0102000,   4 * KiB);
-    create_unimplemented_device("npcm7xx.pspi1",        0xf0200000,   4 * KiB);
-    create_unimplemented_device("npcm7xx.pspi2",        0xf0201000,   4 * KiB);
     create_unimplemented_device("npcm7xx.ahbpci",       0xf0400000,   1 * MiB);
     create_unimplemented_device("npcm7xx.mcphy",        0xf05f0000,  64 * KiB);
     create_unimplemented_device("npcm7xx.gmac1",        0xf0802000,   8 * KiB);
index 733c9647784770114c87c5f6c5029b7a94262094..0a5a60ca1e91ccc368a17ede2da32e09874c5f8f 100644 (file)
@@ -249,7 +249,7 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova)
         /* there is a ttbr0 region and we are in it (high bits all zero) */
         return &cfg->tt[0];
     } else if (cfg->tt[1].tsz &&
-           !extract64(iova, 64 - cfg->tt[1].tsz, cfg->tt[1].tsz - tbi_byte)) {
+        sextract64(iova, 64 - cfg->tt[1].tsz, cfg->tt[1].tsz - tbi_byte) == -1) {
         /* there is a ttbr1 region and we are in it (high bits all one) */
         return &cfg->tt[1];
     } else if (!cfg->tt[0].tsz) {
@@ -439,7 +439,7 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn)
 
         memory_region_init_iommu(&sdev->iommu, sizeof(sdev->iommu),
                                  s->mrtypename,
-                                 OBJECT(s), name, 1ULL << SMMU_MAX_VA_BITS);
+                                 OBJECT(s), name, UINT64_MAX);
         address_space_init(&sdev->as,
                            MEMORY_REGION(&sdev->iommu), name);
         trace_smmu_add_mr(name);
index bce161870f697961546912d37a436125e573c4e8..e8f0ebf25e31e9404eef4b466bf6696b95776876 100644 (file)
@@ -79,6 +79,13 @@ REG32(CR0ACK,              0x24)
 REG32(CR1,                 0x28)
 REG32(CR2,                 0x2c)
 REG32(STATUSR,             0x40)
+REG32(GBPA,                0x44)
+    FIELD(GBPA, ABORT,        20, 1)
+    FIELD(GBPA, UPDATE,       31, 1)
+
+/* Use incoming. */
+#define SMMU_GBPA_RESET_VAL 0x1000
+
 REG32(IRQ_CTRL,            0x50)
     FIELD(IRQ_CTRL, GERROR_IRQEN,        0, 1)
     FIELD(IRQ_CTRL, PRI_IRQEN,           1, 1)
index 955b89c8d5921d542e3402f3831af4d835348fe8..270c80b665fcc4518898bf264d284a8156bb121a 100644 (file)
@@ -285,6 +285,7 @@ static void smmuv3_init_regs(SMMUv3State *s)
     s->gerror = 0;
     s->gerrorn = 0;
     s->statusr = 0;
+    s->gbpa = SMMU_GBPA_RESET_VAL;
 }
 
 static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
@@ -659,7 +660,11 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
     qemu_mutex_lock(&s->mutex);
 
     if (!smmu_enabled(s)) {
-        status = SMMU_TRANS_DISABLE;
+        if (FIELD_EX32(s->gbpa, GBPA, ABORT)) {
+            status = SMMU_TRANS_ABORT;
+        } else {
+            status = SMMU_TRANS_DISABLE;
+        }
         goto epilogue;
     }
 
@@ -1170,6 +1175,16 @@ static MemTxResult smmu_writel(SMMUv3State *s, hwaddr offset,
     case A_GERROR_IRQ_CFG2:
         s->gerror_irq_cfg2 = data;
         return MEMTX_OK;
+    case A_GBPA:
+        /*
+         * If UPDATE is not set, the write is ignored. This is the only
+         * permitted behavior in SMMUv3.2 and later.
+         */
+        if (data & R_GBPA_UPDATE_MASK) {
+            /* Ignore update bit as write is synchronous. */
+            s->gbpa = data & ~R_GBPA_UPDATE_MASK;
+        }
+        return MEMTX_OK;
     case A_STRTAB_BASE: /* 64b */
         s->strtab_base = deposit64(s->strtab_base, 0, 32, data);
         return MEMTX_OK;
@@ -1318,6 +1333,9 @@ static MemTxResult smmu_readl(SMMUv3State *s, hwaddr offset,
     case A_STATUSR:
         *data = s->statusr;
         return MEMTX_OK;
+    case A_GBPA:
+        *data = s->gbpa;
+        return MEMTX_OK;
     case A_IRQ_CTRL:
     case A_IRQ_CTRL_ACK:
         *data = s->irq_ctrl;
@@ -1482,6 +1500,25 @@ static const VMStateDescription vmstate_smmuv3_queue = {
     },
 };
 
+static bool smmuv3_gbpa_needed(void *opaque)
+{
+    SMMUv3State *s = opaque;
+
+    /* Only migrate GBPA if it has different reset value. */
+    return s->gbpa != SMMU_GBPA_RESET_VAL;
+}
+
+static const VMStateDescription vmstate_gbpa = {
+    .name = "smmuv3/gbpa",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = smmuv3_gbpa_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(gbpa, SMMUv3State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_smmuv3 = {
     .name = "smmuv3",
     .version_id = 1,
@@ -1512,6 +1549,10 @@ static const VMStateDescription vmstate_smmuv3 = {
 
         VMSTATE_END_OF_LIST(),
     },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_gbpa,
+        NULL
+    }
 };
 
 static void smmuv3_instance_init(Object *obj)
index 75f28947de07f3c8173a7333d090b73ad3bcc64a..ac626b3bef74f22703119c1c23fd17a744e2abc7 100644 (file)
@@ -2133,21 +2133,21 @@ static void machvirt_init(MachineState *machine)
     if (vms->secure && (kvm_enabled() || hvf_enabled())) {
         error_report("mach-virt: %s does not support providing "
                      "Security extensions (TrustZone) to the guest CPU",
-                     kvm_enabled() ? "KVM" : "HVF");
+                     current_accel_name());
         exit(1);
     }
 
     if (vms->virt && (kvm_enabled() || hvf_enabled())) {
         error_report("mach-virt: %s does not support providing "
                      "Virtualization extensions to the guest CPU",
-                     kvm_enabled() ? "KVM" : "HVF");
+                     current_accel_name());
         exit(1);
     }
 
     if (vms->mte && (kvm_enabled() || hvf_enabled())) {
         error_report("mach-virt: %s does not support providing "
                      "MTE to the guest CPU",
-                     kvm_enabled() ? "KVM" : "HVF");
+                     current_accel_name());
         exit(1);
     }
 
@@ -3013,7 +3013,11 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
     mc->minimum_page_bits = 12;
     mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
     mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
+#ifdef CONFIG_TCG
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
+#else
+    mc->default_cpu_type = ARM_CPU_TYPE_NAME("max");
+#endif
     mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
     mc->kvm_type = virt_kvm_type;
     assert(!mc->get_hotplug_handler);
index 176251787836a42bac66466720fef8165eb14fec..cefca93b312b4e4781a6e8216d8da13dea1645bf 100644 (file)
@@ -894,6 +894,10 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
     uint64_t capacity;
     int64_t length;
     int blk_size = conf->logical_block_size;
+    AioContext *ctx;
+
+    ctx = blk_get_aio_context(s->blk);
+    aio_context_acquire(ctx);
 
     blk_get_geometry(s->blk, &capacity);
     memset(&blkcfg, 0, sizeof(blkcfg));
@@ -917,6 +921,7 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
      * per track (cylinder).
      */
     length = blk_getlength(s->blk);
+    aio_context_release(ctx);
     if (length > 0 && length / conf->heads / conf->secs % blk_size) {
         blkcfg.geometry.sectors = conf->secs & ~s->sector_mask;
     } else {
index e58181fcf429323d6b2b444864cb59ca9c9bc1e0..f70adb530819bc30dbf701cbcd8eddf208ceebd1 100644 (file)
@@ -31,6 +31,7 @@
 #include "hw/qdev-clock.h"
 #include "hw/qdev-properties.h"
 #include "hw/qdev-properties-system.h"
+#include "hw/registerfields.h"
 #include "migration/vmstate.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
index 44b5da888024fc44637336370bc539aadebbd0cf..2d904747c080f2ba9a33cc6730278f4079ea80bf 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "hw/acpi/vmgenid.h"
 #include "hw/boards.h"
 #include "hw/intc/intc.h"
 #include "hw/mem/memory-device.h"
@@ -15,7 +16,6 @@
 #include "qapi/error.h"
 #include "qapi/qapi-builtin-visit.h"
 #include "qapi/qapi-commands-machine.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qobject.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/type-helpers.h"
@@ -140,7 +140,7 @@ HotpluggableCPUList *qmp_query_hotpluggable_cpus(Error **errp)
     MachineClass *mc = MACHINE_GET_CLASS(ms);
 
     if (!mc->has_hotpluggable_cpus) {
-        error_setg(errp, QERR_FEATURE_DISABLED, "query-hotpluggable-cpus");
+        error_setg(errp, "machine does not support hot-plugging CPUs");
         return NULL;
     }
 
@@ -383,3 +383,20 @@ HumanReadableText *qmp_x_query_irq(Error **errp)
 
     return human_readable_text_from_str(buf);
 }
+
+GuidInfo *qmp_query_vm_generation_id(Error **errp)
+{
+    GuidInfo *info;
+    VmGenIdState *vms;
+    Object *obj = find_vmgenid_dev();
+
+    if (!obj) {
+        error_setg(errp, "VM Generation ID device not found");
+        return NULL;
+    }
+    vms = VMGENID(obj);
+
+    info = g_malloc0(sizeof(*info));
+    info->guid = qemu_uuid_unparse_strdup(&vms->guid);
+    return info;
+}
index f73fc4c45c41eead1e2819f726b2c771629bc560..f29e700ee4d0bff2cf7a281887fdbeeb5f8c8d77 100644 (file)
@@ -13,7 +13,6 @@
 #include "qemu/osdep.h"
 #include "qemu/option.h"
 #include "qemu/accel.h"
-#include "qapi/qmp/qerror.h"
 #include "sysemu/replay.h"
 #include "qemu/units.h"
 #include "hw/boards.h"
index 481c4b3c7e53a5dc500c1f3d92c72aa9f04baa39..a7bce8a04a121dcbe06478f79365009f33c23aec 100644 (file)
@@ -22,7 +22,6 @@
 #include "qemu/osdep.h"
 #include "hw/nmi.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/module.h"
 #include "monitor/monitor.h"
 
@@ -70,7 +69,7 @@ void nmi_monitor_handle(int cpu_index, Error **errp)
     if (ns.handled) {
         error_propagate(errp, ns.err);
     } else {
-        error_setg(errp, QERR_UNSUPPORTED);
+        error_setg(errp, "machine does not provide NMIs");
     }
 }
 
index 4380a5e672645b1866f1bf09775e0876c8c22179..71dfd956b802ab990e2740fe6f861ad4f12ba61b 100644 (file)
@@ -11,6 +11,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu/sockets.h"
 #include "hw/qdev-properties.h"
 #include "hw/virtio/virtio-gpu.h"
index 847fa4c0cc3214a4b1ba68325d12d45e4eefda2a..69e2cf0bd65d1c5ab746dc1f12dfc98e916698d4 100644 (file)
@@ -12,6 +12,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu/units.h"
 #include "qemu/iov.h"
 #include "ui/console.h"
index 73cb92c8d5c6f00726cb27199b3e93da04754a4c..1c47603d40a21f9b4d04742a4a0e73eb92a70f31 100644 (file)
@@ -12,6 +12,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu/iov.h"
 #include "trace.h"
 #include "hw/virtio/virtio.h"
index 1bf47b0b0b4437251442b69c30eed1f031f0280b..9fbfe748b57dddb0892630dc129fb989c66f0612 100644 (file)
@@ -26,7 +26,6 @@ config PC
     imply QXL
     imply SEV
     imply SGX
-    imply SGA
     imply TEST_DEVICES
     imply TPM_CRB
     imply TPM_TIS_ISA
index 6e592bd969aa478b756e2fb3a383640ba5d6c2c4..a7a2ededf95bcd63675d74e2c3790b18eac0d3a4 100644 (file)
@@ -92,7 +92,6 @@
 #include "hw/mem/memory-device.h"
 #include "sysemu/replay.h"
 #include "target/i386/cpu.h"
-#include "qapi/qmp/qerror.h"
 #include "e820_memory_layout.h"
 #include "fw_cfg.h"
 #include "trace.h"
index eaff4227bd68b5362c5edf6825df5c16c29a5601..48be7a1c23797028262a589e77d533217667a71e 100644 (file)
@@ -28,7 +28,6 @@
 #include "qemu/datadir.h"
 #include "qemu/guest-random.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/qapi-visit-common.h"
 #include "qapi/clone-visitor.h"
 #include "qapi/qapi-visit-machine.h"
index 1f7763964c37c374d8fced6997a7ce0992b7b1f1..e54553283f4282e8ebd72b4f39339e204ffd7676 100644 (file)
@@ -389,7 +389,7 @@ static inline int nvic_exec_prio(NVICState *s)
     return MIN(running, s->exception_prio);
 }
 
-bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure)
+bool armv7m_nvic_neg_prio_requested(NVICState *s, bool secure)
 {
     /* Return true if the requested execution priority is negative
      * for the specified security state, ie that security state
@@ -399,8 +399,6 @@ bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure)
      * mean we don't allow FAULTMASK_NS to actually make the execution
      * priority negative). Compare pseudocode IsReqExcPriNeg().
      */
-    NVICState *s = opaque;
-
     if (s->cpu->env.v7m.faultmask[secure]) {
         return true;
     }
@@ -418,17 +416,13 @@ bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure)
     return false;
 }
 
-bool armv7m_nvic_can_take_pending_exception(void *opaque)
+bool armv7m_nvic_can_take_pending_exception(NVICState *s)
 {
-    NVICState *s = opaque;
-
     return nvic_exec_prio(s) > nvic_pending_prio(s);
 }
 
-int armv7m_nvic_raw_execution_priority(void *opaque)
+int armv7m_nvic_raw_execution_priority(NVICState *s)
 {
-    NVICState *s = opaque;
-
     return s->exception_prio;
 }
 
@@ -506,9 +500,8 @@ static void nvic_irq_update(NVICState *s)
  * if @secure is true and @irq does not specify one of the fixed set
  * of architecturally banked exceptions.
  */
-static void armv7m_nvic_clear_pending(void *opaque, int irq, bool secure)
+static void armv7m_nvic_clear_pending(NVICState *s, int irq, bool secure)
 {
-    NVICState *s = (NVICState *)opaque;
     VecInfo *vec;
 
     assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq);
@@ -666,17 +659,17 @@ static void do_armv7m_nvic_set_pending(void *opaque, int irq, bool secure,
     }
 }
 
-void armv7m_nvic_set_pending(void *opaque, int irq, bool secure)
+void armv7m_nvic_set_pending(NVICState *s, int irq, bool secure)
 {
-    do_armv7m_nvic_set_pending(opaque, irq, secure, false);
+    do_armv7m_nvic_set_pending(s, irq, secure, false);
 }
 
-void armv7m_nvic_set_pending_derived(void *opaque, int irq, bool secure)
+void armv7m_nvic_set_pending_derived(NVICState *s, int irq, bool secure)
 {
-    do_armv7m_nvic_set_pending(opaque, irq, secure, true);
+    do_armv7m_nvic_set_pending(s, irq, secure, true);
 }
 
-void armv7m_nvic_set_pending_lazyfp(void *opaque, int irq, bool secure)
+void armv7m_nvic_set_pending_lazyfp(NVICState *s, int irq, bool secure)
 {
     /*
      * Pend an exception during lazy FP stacking. This differs
@@ -684,7 +677,6 @@ void armv7m_nvic_set_pending_lazyfp(void *opaque, int irq, bool secure)
      * whether we should escalate depends on the saved context
      * in the FPCCR register, not on the current state of the CPU/NVIC.
      */
-    NVICState *s = (NVICState *)opaque;
     bool banked = exc_is_banked(irq);
     VecInfo *vec;
     bool targets_secure;
@@ -773,9 +765,8 @@ void armv7m_nvic_set_pending_lazyfp(void *opaque, int irq, bool secure)
 }
 
 /* Make pending IRQ active.  */
-void armv7m_nvic_acknowledge_irq(void *opaque)
+void armv7m_nvic_acknowledge_irq(NVICState *s)
 {
-    NVICState *s = (NVICState *)opaque;
     CPUARMState *env = &s->cpu->env;
     const int pending = s->vectpending;
     const int running = nvic_exec_prio(s);
@@ -814,10 +805,9 @@ static bool vectpending_targets_secure(NVICState *s)
         exc_targets_secure(s, s->vectpending);
 }
 
-void armv7m_nvic_get_pending_irq_info(void *opaque,
+void armv7m_nvic_get_pending_irq_info(NVICState *s,
                                       int *pirq, bool *ptargets_secure)
 {
-    NVICState *s = (NVICState *)opaque;
     const int pending = s->vectpending;
     bool targets_secure;
 
@@ -831,9 +821,8 @@ void armv7m_nvic_get_pending_irq_info(void *opaque,
     *pirq = pending;
 }
 
-int armv7m_nvic_complete_irq(void *opaque, int irq, bool secure)
+int armv7m_nvic_complete_irq(NVICState *s, int irq, bool secure)
 {
-    NVICState *s = (NVICState *)opaque;
     VecInfo *vec = NULL;
     int ret = 0;
 
@@ -915,7 +904,7 @@ int armv7m_nvic_complete_irq(void *opaque, int irq, bool secure)
     return ret;
 }
 
-bool armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure)
+bool armv7m_nvic_get_ready_status(NVICState *s, int irq, bool secure)
 {
     /*
      * Return whether an exception is "ready", i.e. it is enabled and is
@@ -926,7 +915,6 @@ bool armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure)
      * for non-banked exceptions secure is always false; for banked exceptions
      * it indicates which of the exceptions is required.
      */
-    NVICState *s = (NVICState *)opaque;
     bool banked = exc_is_banked(irq);
     VecInfo *vec;
     int running = nvic_exec_prio(s);
index e6640eb8e722ac2f4964c94c90397b56a35af4e8..72f038d47daa09e3052dfa723f3026a223429c56 100644 (file)
@@ -77,6 +77,13 @@ static void sparse_mem_write(void *opaque, hwaddr addr, uint64_t v,
 
 }
 
+static void sparse_mem_enter_reset(Object *obj, ResetType type)
+{
+    SparseMemState *s = SPARSE_MEM(obj);
+    g_hash_table_remove_all(s->mapped);
+    return;
+}
+
 static const MemoryRegionOps sparse_mem_ops = {
     .read = sparse_mem_read,
     .write = sparse_mem_write,
@@ -123,7 +130,8 @@ static void sparse_mem_realize(DeviceState *dev, Error **errp)
 
     assert(s->baseaddr + s->length > s->baseaddr);
 
-    s->mapped = g_hash_table_new(NULL, NULL);
+    s->mapped = g_hash_table_new_full(NULL, NULL, NULL,
+                                      (GDestroyNotify)g_free);
     memory_region_init_io(&s->mmio, OBJECT(s), &sparse_mem_ops, s,
                           "sparse-mem", s->length);
     sysbus_init_mmio(sbd, &s->mmio);
@@ -131,12 +139,15 @@ static void sparse_mem_realize(DeviceState *dev, Error **errp)
 
 static void sparse_mem_class_init(ObjectClass *klass, void *data)
 {
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     device_class_set_props(dc, sparse_mem_properties);
 
     dc->desc = "Sparse Memory Device";
     dc->realize = sparse_mem_realize;
+
+    rc->phases.enter = sparse_mem_enter_reset;
 }
 
 static const TypeInfo sparse_mem_types[] = {
index eaeddca277b545f7028b474cb378f05c60ffc034..2ef5781ef87b7fc07ceb069ff1981c5576d8b716 100644 (file)
@@ -15,10 +15,6 @@ config ISA_DEBUG
     bool
     depends on ISA_BUS
 
-config SGA
-    bool
-    depends on ISA_BUS
-
 config ISA_TESTDEV
     bool
     default y if TEST_DEVICES
index 5f9c742e504b5ca39423a338fdb633ffb9b98a99..72300d0cbc38b66cd530582684f1a0c5023639b4 100644 (file)
@@ -34,6 +34,7 @@
 #include "hw/isa/isa.h"
 #include "hw/qdev-properties.h"
 #include "ui/console.h"
+#include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/timer.h"
 #include "qom/object.h"
index 448e14b531a36e275ba522fb42e559fc8836bef5..fe869b98ca4ddf53dd6518843bbac59d9f99352f 100644 (file)
@@ -5,7 +5,6 @@ softmmu_ss.add(when: 'CONFIG_ISA_DEBUG', if_true: files('debugexit.c'))
 softmmu_ss.add(when: 'CONFIG_ISA_TESTDEV', if_true: files('pc-testdev.c'))
 softmmu_ss.add(when: 'CONFIG_PCA9552', if_true: files('pca9552.c'))
 softmmu_ss.add(when: 'CONFIG_PCI_TESTDEV', if_true: files('pci-testdev.c'))
-softmmu_ss.add(when: 'CONFIG_SGA', if_true: files('sga.c'))
 softmmu_ss.add(when: 'CONFIG_UNIMP', if_true: files('unimp.c'))
 softmmu_ss.add(when: 'CONFIG_EMPTY_SLOT', if_true: files('empty_slot.c'))
 softmmu_ss.add(when: 'CONFIG_LED', if_true: files('led.c'))
diff --git a/hw/misc/sga.c b/hw/misc/sga.c
deleted file mode 100644 (file)
index 1d04672..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * QEMU dummy ISA device for loading sgabios option rom.
- *
- * Copyright (c) 2011 Glauber Costa, Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * sgabios code originally available at code.google.com/p/sgabios
- *
- */
-
-#include "qemu/osdep.h"
-#include "hw/isa/isa.h"
-#include "hw/loader.h"
-#include "qemu/module.h"
-#include "qom/object.h"
-#include "qemu/error-report.h"
-
-#define SGABIOS_FILENAME "sgabios.bin"
-
-#define TYPE_SGA "sga"
-OBJECT_DECLARE_SIMPLE_TYPE(ISASGAState, SGA)
-
-struct ISASGAState {
-    ISADevice parent_obj;
-};
-
-static void sga_realizefn(DeviceState *dev, Error **errp)
-{
-    warn_report("-device sga is deprecated, use -machine graphics=off");
-    rom_add_vga(SGABIOS_FILENAME);
-}
-
-static void sga_class_initfn(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
-    dc->realize = sga_realizefn;
-    dc->desc = "Serial Graphics Adapter";
-}
-
-static const TypeInfo sga_info = {
-    .name          = TYPE_SGA,
-    .parent        = TYPE_ISA_DEVICE,
-    .instance_size = sizeof(ISASGAState),
-    .class_init    = sga_class_initfn,
-};
-
-static void sga_register_types(void)
-{
-    type_register_static(&sga_info);
-}
-
-type_init(sga_register_types)
index 20de23cf671b78904190eb84c094f231ea6eb124..3d2be95e6db244a7d98ad96e382cedb13f7fef58 100644 (file)
@@ -18,7 +18,6 @@
 #include "hw/register.h"
 
 #include "qemu/bitops.h"
-#include "qapi/qmp/qerror.h"
 
 #include "hw/misc/xlnx-zynqmp-apu-ctrl.h"
 
index f1cba55967247e46b59023a60b62631343a0b6a0..e5c4af182dfa79e8396aab33e4ecfa9414d513ec 100644 (file)
@@ -15,7 +15,6 @@
 #include "migration/vmstate.h"
 #include "net/net.h"
 #include "net/eth.h"
-#include "hw/hw.h"
 #include "hw/irq.h"
 #include "hw/net/lan9118.h"
 #include "hw/ptimer.h"
 #ifdef DEBUG_LAN9118
 #define DPRINTF(fmt, ...) \
 do { printf("lan9118: " fmt , ## __VA_ARGS__); } while (0)
-#define BADF(fmt, ...) \
-do { hw_error("lan9118: error: " fmt , ## __VA_ARGS__);} while (0)
 #else
 #define DPRINTF(fmt, ...) do {} while(0)
-#define BADF(fmt, ...) \
-do { fprintf(stderr, "lan9118: error: " fmt , ## __VA_ARGS__);} while (0)
 #endif
 
 /* The tx and rx fifo ports are a range of aliased 32-bit registers */
@@ -848,7 +843,8 @@ static uint32_t do_phy_read(lan9118_state *s, int reg)
     case 30: /* Interrupt mask */
         return s->phy_int_mask;
     default:
-        BADF("PHY read reg %d\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "do_phy_read: PHY read reg %d\n", reg);
         return 0;
     }
 }
@@ -876,7 +872,8 @@ static void do_phy_write(lan9118_state *s, int reg, uint32_t val)
         phy_update_irq(s);
         break;
     default:
-        BADF("PHY write reg %d = 0x%04x\n", reg, val);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "do_phy_write: PHY write reg %d = 0x%04x\n", reg, val);
     }
 }
 
@@ -1209,7 +1206,8 @@ static void lan9118_16bit_mode_write(void *opaque, hwaddr offset,
         return;
     }
 
-    hw_error("lan9118_write: Bad size 0x%x\n", size);
+    qemu_log_mask(LOG_GUEST_ERROR,
+                  "lan9118_16bit_mode_write: Bad size 0x%x\n", size);
 }
 
 static uint64_t lan9118_readl(void *opaque, hwaddr offset,
@@ -1324,7 +1322,8 @@ static uint64_t lan9118_16bit_mode_read(void *opaque, hwaddr offset,
         return lan9118_readl(opaque, offset, size);
     }
 
-    hw_error("lan9118_read: Bad size 0x%x\n", size);
+    qemu_log_mask(LOG_GUEST_ERROR,
+                  "lan9118_16bit_mode_read: Bad size 0x%x\n", size);
     return 0;
 }
 
index 5ef4f9324c36e88e2a1336c0dc98d8a871b8c360..f6c1196a24433d6674fac7eff1e544c1d5e735c6 100644 (file)
@@ -1,6 +1,5 @@
 /*
- * QMP Target options - Commands handled based on a target config
- *                      versus a host config
+ * QMP command stubs
  *
  * Copyright (c) 2015 David Ahern <dsahern@gmail.com>
  *
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-rocker.h"
-#include "qapi/qmp/qerror.h"
 
 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
 {
-    error_setg(errp, QERR_FEATURE_DISABLED, "rocker");
+    error_setg(errp, "rocker %s not found", name);
     return NULL;
 };
 
 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
 {
-    error_setg(errp, QERR_FEATURE_DISABLED, "rocker");
+    error_setg(errp, "rocker %s not found", name);
     return NULL;
 };
 
@@ -37,7 +35,7 @@ RockerOfDpaFlowList *qmp_query_rocker_of_dpa_flows(const char *name,
                                                    uint32_t tbl_id,
                                                    Error **errp)
 {
-    error_setg(errp, QERR_FEATURE_DISABLED, "rocker");
+    error_setg(errp, "rocker %s not found", name);
     return NULL;
 };
 
@@ -46,6 +44,6 @@ RockerOfDpaGroupList *qmp_query_rocker_of_dpa_groups(const char *name,
                                                      uint8_t type,
                                                      Error **errp)
 {
-    error_setg(errp, QERR_FEATURE_DISABLED, "rocker");
+    error_setg(errp, "rocker %s not found", name);
     return NULL;
 };
index d2ab527ef457642d2a1e1561dec3e3a52210f594..56559cda24af36f3ec9264c765871537bdd887cd 100644 (file)
@@ -1441,7 +1441,7 @@ static void vmxnet3_activate_device(VMXNET3State *s)
     vmxnet3_setup_rx_filtering(s);
     /* Cache fields from shared memory */
     s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
-    assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu < VMXNET3_MAX_MTU);
+    assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu <= VMXNET3_MAX_MTU);
     VMW_CFPRN("MTU is %u", s->mtu);
 
     s->max_rx_frags =
index 208c16f450a94bdd3e1a90129b145b9173585ec1..cc51f98593e03f09eeec77b35a743ec271faec55 100644 (file)
@@ -1789,7 +1789,6 @@ PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
                                const char *default_devaddr)
 {
     const char *devaddr = nd->devaddr ? nd->devaddr : default_devaddr;
-    GSList *list;
     GPtrArray *pci_nic_models;
     PCIBus *bus;
     PCIDevice *pci_dev;
@@ -1804,33 +1803,7 @@ PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
         nd->model = g_strdup("virtio-net-pci");
     }
 
-    list = object_class_get_list_sorted(TYPE_PCI_DEVICE, false);
-    pci_nic_models = g_ptr_array_new();
-    while (list) {
-        DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, list->data,
-                                             TYPE_DEVICE);
-        GSList *next;
-        if (test_bit(DEVICE_CATEGORY_NETWORK, dc->categories) &&
-            dc->user_creatable) {
-            const char *name = object_class_get_name(list->data);
-            /*
-             * A network device might also be something else than a NIC, see
-             * e.g. the "rocker" device. Thus we have to look for the "netdev"
-             * property, too. Unfortunately, some devices like virtio-net only
-             * create this property during instance_init, so we have to create
-             * a temporary instance here to be able to check it.
-             */
-            Object *obj = object_new_with_class(OBJECT_CLASS(dc));
-            if (object_property_find(obj, "netdev")) {
-                g_ptr_array_add(pci_nic_models, (gpointer)name);
-            }
-            object_unref(obj);
-        }
-        next = list->next;
-        g_slist_free_1(list);
-        list = next;
-    }
-    g_ptr_array_add(pci_nic_models, NULL);
+    pci_nic_models = qemu_get_nic_models(TYPE_PCI_DEVICE);
 
     if (qemu_show_nic_models(nd->model, (const char **)pci_nic_models->pdata)) {
         exit(0);
index faa51aa4c70d8a95c52a50ee341bab9900513baa..6891e3cd73b466d8ef816b07b2e2e593c4341818 100644 (file)
@@ -64,8 +64,7 @@ static bool event_pending(SCLPEventFacility *ef)
     SCLPEventClass *event_class;
 
     QTAILQ_FOREACH(kid, &ef->sbus.qbus.children, sibling) {
-        DeviceState *qdev = kid->child;
-        event = DO_UPCAST(SCLPEvent, qdev, qdev);
+        event = SCLP_EVENT(kid->child);
         event_class = SCLP_EVENT_GET_CLASS(event);
         if (event->event_pending &&
             event_class->get_send_mask() & ef->receive_mask) {
index 3e32002eab86fd34b7b93d90bf1faee106904c3e..aed919ad7df77952f81805ef077509b737260145 100644 (file)
@@ -182,17 +182,15 @@ static int cmma_save_setup(QEMUFile *f, void *opaque)
     return 0;
 }
 
-static void cmma_state_pending(void *opaque,
-                               uint64_t *res_precopy_only,
-                               uint64_t *res_compatible,
-                               uint64_t *res_postcopy_only)
+static void cmma_state_pending(void *opaque, uint64_t *must_precopy,
+                               uint64_t *can_postcopy)
 {
     S390StAttribState *sas = S390_STATTRIB(opaque);
     S390StAttribClass *sac = S390_STATTRIB_GET_CLASS(sas);
     long long res = sac->get_dirtycount(sas);
 
     if (res >= 0) {
-        *res_precopy_only += res;
+        *must_precopy += res;
     }
 }
 
index d4e360850fefa74df23e6ecbd102910b7795a3af..97c9b1c8cd99c840c8a6679a199077ae94e9e69e 100644 (file)
@@ -273,9 +273,11 @@ static void scsi_aio_complete(void *opaque, int ret)
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     if (scsi_disk_req_check_error(r, ret, true)) {
         goto done;
     }
@@ -352,6 +354,7 @@ done:
     scsi_req_unref(&r->req);
 }
 
+/* Called with AioContext lock held */
 static void scsi_dma_complete(void *opaque, int ret)
 {
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
@@ -360,14 +363,12 @@ static void scsi_dma_complete(void *opaque, int ret)
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
     if (ret < 0) {
         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
     } else {
         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
     }
     scsi_dma_complete_noio(r, ret);
-    aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 }
 
 static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
@@ -393,10 +394,11 @@ static void scsi_read_complete(void *opaque, int ret)
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
     if (ret < 0) {
         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
     } else {
@@ -446,10 +448,11 @@ static void scsi_do_read_cb(void *opaque, int ret)
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     assert (r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
     if (ret < 0) {
         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
     } else {
@@ -530,10 +533,11 @@ static void scsi_write_complete(void * opaque, int ret)
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     assert (r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
     if (ret < 0) {
         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
     } else {
@@ -1737,10 +1741,11 @@ static void scsi_unmap_complete(void *opaque, int ret)
     SCSIDiskReq *r = data->r;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
     if (scsi_disk_req_check_error(r, ret, true)) {
         scsi_req_unref(&r->req);
         g_free(data);
@@ -1816,9 +1821,11 @@ static void scsi_write_same_complete(void *opaque, int ret)
     SCSIDiskReq *r = data->r;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     if (scsi_disk_req_check_error(r, ret, true)) {
         goto done;
     }
index 92cce20a4dbec0e721695d33a599e80624719778..ac9fa662b4e314990653f439c3c64663b8e9361c 100644 (file)
@@ -111,10 +111,11 @@ static void scsi_command_complete(void *opaque, int ret)
     SCSIGenericReq *r = (SCSIGenericReq *)opaque;
     SCSIDevice *s = r->req.dev;
 
+    aio_context_acquire(blk_get_aio_context(s->conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->conf.blk));
     scsi_command_complete_noio(r, ret);
     aio_context_release(blk_get_aio_context(s->conf.blk));
 }
@@ -269,11 +270,11 @@ static void scsi_read_complete(void * opaque, int ret)
     SCSIDevice *s = r->req.dev;
     int len;
 
+    aio_context_acquire(blk_get_aio_context(s->conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->conf.blk));
-
     if (ret || r->req.io_canceled) {
         scsi_command_complete_noio(r, ret);
         goto done;
@@ -386,11 +387,11 @@ static void scsi_write_complete(void * opaque, int ret)
 
     trace_scsi_generic_write_complete(ret);
 
+    aio_context_acquire(blk_get_aio_context(s->conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->conf.blk));
-
     if (ret || r->req.io_canceled) {
         scsi_command_complete_noio(r, ret);
         goto done;
index 2b649ca9762fe0b544ab6d5d1c617b1bde4cd2e2..612c525d9da369a97822fe96e0c67925c514b4a0 100644 (file)
@@ -43,13 +43,11 @@ typedef struct VirtIOSCSIReq {
     QEMUSGList qsgl;
     QEMUIOVector resp_iov;
 
-    union {
-        /* Used for two-stage request submission */
-        QTAILQ_ENTRY(VirtIOSCSIReq) next;
+    /* Used for two-stage request submission and TMFs deferred to BH */
+    QTAILQ_ENTRY(VirtIOSCSIReq) next;
 
-        /* Used for cancellation of request during TMFs */
-        int remaining;
-    };
+    /* Used for cancellation of request during TMFs */
+    int remaining;
 
     SCSIRequest *sreq;
     size_t resp_size;
@@ -294,6 +292,122 @@ static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d)
     }
 }
 
+static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
+{
+    VirtIOSCSI *s = req->dev;
+    SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
+    BusChild *kid;
+    int target;
+
+    switch (req->req.tmf.subtype) {
+    case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
+        if (!d) {
+            req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
+            goto out;
+        }
+        if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
+            req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN;
+            goto out;
+        }
+        qatomic_inc(&s->resetting);
+        device_cold_reset(&d->qdev);
+        qatomic_dec(&s->resetting);
+        break;
+
+    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
+        target = req->req.tmf.lun[1];
+        qatomic_inc(&s->resetting);
+
+        rcu_read_lock();
+        QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
+            SCSIDevice *d1 = SCSI_DEVICE(kid->child);
+            if (d1->channel == 0 && d1->id == target) {
+                device_cold_reset(&d1->qdev);
+            }
+        }
+        rcu_read_unlock();
+
+        qatomic_dec(&s->resetting);
+        break;
+
+    default:
+        g_assert_not_reached();
+        break;
+    }
+
+out:
+    object_unref(OBJECT(d));
+
+    virtio_scsi_acquire(s);
+    virtio_scsi_complete_req(req);
+    virtio_scsi_release(s);
+}
+
+/* Some TMFs must be processed from the main loop thread */
+static void virtio_scsi_do_tmf_bh(void *opaque)
+{
+    VirtIOSCSI *s = opaque;
+    QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
+    VirtIOSCSIReq *req;
+    VirtIOSCSIReq *tmp;
+
+    GLOBAL_STATE_CODE();
+
+    virtio_scsi_acquire(s);
+
+    QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
+        QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
+        QTAILQ_INSERT_TAIL(&reqs, req, next);
+    }
+
+    qemu_bh_delete(s->tmf_bh);
+    s->tmf_bh = NULL;
+
+    virtio_scsi_release(s);
+
+    QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) {
+        QTAILQ_REMOVE(&reqs, req, next);
+        virtio_scsi_do_one_tmf_bh(req);
+    }
+}
+
+static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
+{
+    VirtIOSCSIReq *req;
+    VirtIOSCSIReq *tmp;
+
+    GLOBAL_STATE_CODE();
+
+    virtio_scsi_acquire(s);
+
+    if (s->tmf_bh) {
+        qemu_bh_delete(s->tmf_bh);
+        s->tmf_bh = NULL;
+    }
+
+    QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
+        QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
+
+        /* SAM-6 6.3.2 Hard reset */
+        req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
+        virtio_scsi_complete_req(req);
+    }
+
+    virtio_scsi_release(s);
+}
+
+static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
+{
+    VirtIOSCSI *s = req->dev;
+
+    QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next);
+
+    if (!s->tmf_bh) {
+        s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s);
+        qemu_bh_schedule(s->tmf_bh);
+    }
+}
+
 /* Return 0 if the request is ready to be completed and return to guest;
  * -EINPROGRESS if the request is submitted and will be completed later, in the
  *  case of async cancellation. */
@@ -301,8 +415,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
 {
     SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
     SCSIRequest *r, *next;
-    BusChild *kid;
-    int target;
     int ret = 0;
 
     virtio_scsi_ctx_check(s, d);
@@ -359,15 +471,9 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
         break;
 
     case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
-        if (!d) {
-            goto fail;
-        }
-        if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
-            goto incorrect_lun;
-        }
-        s->resetting++;
-        device_cold_reset(&d->qdev);
-        s->resetting--;
+    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
+        virtio_scsi_defer_tmf_to_bh(req);
+        ret = -EINPROGRESS;
         break;
 
     case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
@@ -410,22 +516,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
         }
         break;
 
-    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
-        target = req->req.tmf.lun[1];
-        s->resetting++;
-
-        rcu_read_lock();
-        QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
-            SCSIDevice *d1 = SCSI_DEVICE(kid->child);
-            if (d1->channel == 0 && d1->id == target) {
-                device_cold_reset(&d1->qdev);
-            }
-        }
-        rcu_read_unlock();
-
-        s->resetting--;
-        break;
-
     case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
     default:
         req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
@@ -655,7 +745,7 @@ static void virtio_scsi_request_cancelled(SCSIRequest *r)
     if (!req) {
         return;
     }
-    if (req->dev->resetting) {
+    if (qatomic_read(&req->dev->resetting)) {
         req->resp.cmd.response = VIRTIO_SCSI_S_RESET;
     } else {
         req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED;
@@ -831,9 +921,12 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
 
     assert(!s->dataplane_started);
-    s->resetting++;
+
+    virtio_scsi_reset_tmf_bh(s);
+
+    qatomic_inc(&s->resetting);
     bus_cold_reset(BUS(&s->bus));
-    s->resetting--;
+    qatomic_dec(&s->resetting);
 
     vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
     vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
@@ -1053,6 +1146,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
     VirtIOSCSI *s = VIRTIO_SCSI(dev);
     Error *err = NULL;
 
+    QTAILQ_INIT(&s->tmf_bh_list);
+
     virtio_scsi_common_realize(dev,
                                virtio_scsi_handle_ctrl,
                                virtio_scsi_handle_event,
@@ -1090,6 +1185,8 @@ static void virtio_scsi_device_unrealize(DeviceState *dev)
 {
     VirtIOSCSI *s = VIRTIO_SCSI(dev);
 
+    virtio_scsi_reset_tmf_bh(s);
+
     qbus_set_hotplug_handler(BUS(&s->bus), NULL);
     virtio_scsi_common_unrealize(dev);
 }
index 64e5ba93ec563d02a5c34d56cd52a52e7f113379..e8808adfdad6f36df981d7a56cfc16276122f6a2 100644 (file)
  */
 
 #include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "hw/firmware/smbios.h"
 
 void smbios_entry_add(QemuOpts *opts, Error **errp)
 {
-    error_setg(errp, QERR_UNSUPPORTED);
+    g_assert_not_reached();
 }
index 57df462e3ca63c3ca8381c67a35915455d4f7408..1ee7d88c22678e9b096d2849e0fc1586b690f276 100644 (file)
@@ -26,6 +26,7 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
+#include "hw/registerfields.h"
 #include "hw/ssi/ibex_spi_host.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
index 702aa5e4dfe628fb7c1e7c89eb9f1225bbe46840..904a47161a457dc13d042a32e905de38b6bbd410 100644 (file)
@@ -1,6 +1,6 @@
 softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_smc.c'))
 softmmu_ss.add(when: 'CONFIG_MSF2', if_true: files('mss-spi.c'))
-softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_fiu.c'))
+softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_fiu.c', 'npcm_pspi.c'))
 softmmu_ss.add(when: 'CONFIG_PL022', if_true: files('pl022.c'))
 softmmu_ss.add(when: 'CONFIG_SIFIVE_SPI', if_true: files('sifive_spi.c'))
 softmmu_ss.add(when: 'CONFIG_SSI', if_true: files('ssi.c'))
diff --git a/hw/ssi/npcm_pspi.c b/hw/ssi/npcm_pspi.c
new file mode 100644 (file)
index 0000000..3fb9350
--- /dev/null
@@ -0,0 +1,221 @@
+/*
+ * Nuvoton NPCM Peripheral SPI Module (PSPI)
+ *
+ * Copyright 2023 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/irq.h"
+#include "hw/registerfields.h"
+#include "hw/ssi/npcm_pspi.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/units.h"
+
+#include "trace.h"
+
+REG16(PSPI_DATA, 0x0)
+REG16(PSPI_CTL1, 0x2)
+    FIELD(PSPI_CTL1, SPIEN, 0,  1)
+    FIELD(PSPI_CTL1, MOD,   2,  1)
+    FIELD(PSPI_CTL1, EIR,   5,  1)
+    FIELD(PSPI_CTL1, EIW,   6,  1)
+    FIELD(PSPI_CTL1, SCM,   7,  1)
+    FIELD(PSPI_CTL1, SCIDL, 8,  1)
+    FIELD(PSPI_CTL1, SCDV,  9,  7)
+REG16(PSPI_STAT, 0x4)
+    FIELD(PSPI_STAT, BSY,  0,  1)
+    FIELD(PSPI_STAT, RBF,  1,  1)
+
+static void npcm_pspi_update_irq(NPCMPSPIState *s)
+{
+    int level = 0;
+
+    /* Only fire IRQ when the module is enabled. */
+    if (FIELD_EX16(s->regs[R_PSPI_CTL1], PSPI_CTL1, SPIEN)) {
+        /* Update interrupt as BSY is cleared. */
+        if ((!FIELD_EX16(s->regs[R_PSPI_STAT], PSPI_STAT, BSY)) &&
+            FIELD_EX16(s->regs[R_PSPI_CTL1], PSPI_CTL1, EIW)) {
+            level = 1;
+        }
+
+        /* Update interrupt as RBF is set. */
+        if (FIELD_EX16(s->regs[R_PSPI_STAT], PSPI_STAT, RBF) &&
+            FIELD_EX16(s->regs[R_PSPI_CTL1], PSPI_CTL1, EIR)) {
+            level = 1;
+        }
+    }
+    qemu_set_irq(s->irq, level);
+}
+
+static uint16_t npcm_pspi_read_data(NPCMPSPIState *s)
+{
+    uint16_t value = s->regs[R_PSPI_DATA];
+
+    /* Clear stat bits as the value are read out. */
+    s->regs[R_PSPI_STAT] = 0;
+
+    return value;
+}
+
+static void npcm_pspi_write_data(NPCMPSPIState *s, uint16_t data)
+{
+    uint16_t value = 0;
+
+    if (FIELD_EX16(s->regs[R_PSPI_CTL1], PSPI_CTL1, MOD)) {
+        value = ssi_transfer(s->spi, extract16(data, 8, 8)) << 8;
+    }
+    value |= ssi_transfer(s->spi, extract16(data, 0, 8));
+    s->regs[R_PSPI_DATA] = value;
+
+    /* Mark data as available */
+    s->regs[R_PSPI_STAT] = R_PSPI_STAT_BSY_MASK | R_PSPI_STAT_RBF_MASK;
+}
+
+/* Control register read handler. */
+static uint64_t npcm_pspi_ctrl_read(void *opaque, hwaddr addr,
+                                    unsigned int size)
+{
+    NPCMPSPIState *s = opaque;
+    uint16_t value;
+
+    switch (addr) {
+    case A_PSPI_DATA:
+        value = npcm_pspi_read_data(s);
+        break;
+
+    case A_PSPI_CTL1:
+        value = s->regs[R_PSPI_CTL1];
+        break;
+
+    case A_PSPI_STAT:
+        value = s->regs[R_PSPI_STAT];
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: write to invalid offset 0x%" PRIx64 "\n",
+                      DEVICE(s)->canonical_path, addr);
+        return 0;
+    }
+    trace_npcm_pspi_ctrl_read(DEVICE(s)->canonical_path, addr, value);
+    npcm_pspi_update_irq(s);
+
+    return value;
+}
+
+/* Control register write handler. */
+static void npcm_pspi_ctrl_write(void *opaque, hwaddr addr, uint64_t v,
+                                 unsigned int size)
+{
+    NPCMPSPIState *s = opaque;
+    uint16_t value = v;
+
+    trace_npcm_pspi_ctrl_write(DEVICE(s)->canonical_path, addr, value);
+
+    switch (addr) {
+    case A_PSPI_DATA:
+        npcm_pspi_write_data(s, value);
+        break;
+
+    case A_PSPI_CTL1:
+        s->regs[R_PSPI_CTL1] = value;
+        break;
+
+    case A_PSPI_STAT:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: write to read-only register PSPI_STAT: 0x%08"
+                      PRIx64 "\n", DEVICE(s)->canonical_path, v);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: write to invalid offset 0x%" PRIx64 "\n",
+                      DEVICE(s)->canonical_path, addr);
+        return;
+    }
+    npcm_pspi_update_irq(s);
+}
+
+static const MemoryRegionOps npcm_pspi_ctrl_ops = {
+    .read = npcm_pspi_ctrl_read,
+    .write = npcm_pspi_ctrl_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 2,
+        .unaligned = false,
+    },
+    .impl = {
+        .min_access_size = 2,
+        .max_access_size = 2,
+        .unaligned = false,
+    },
+};
+
+static void npcm_pspi_enter_reset(Object *obj, ResetType type)
+{
+    NPCMPSPIState *s = NPCM_PSPI(obj);
+
+    trace_npcm_pspi_enter_reset(DEVICE(obj)->canonical_path, type);
+    memset(s->regs, 0, sizeof(s->regs));
+}
+
+static void npcm_pspi_realize(DeviceState *dev, Error **errp)
+{
+    NPCMPSPIState *s = NPCM_PSPI(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    Object *obj = OBJECT(dev);
+
+    s->spi = ssi_create_bus(dev, "pspi");
+    memory_region_init_io(&s->mmio, obj, &npcm_pspi_ctrl_ops, s,
+                          "mmio", 4 * KiB);
+    sysbus_init_mmio(sbd, &s->mmio);
+    sysbus_init_irq(sbd, &s->irq);
+}
+
+static const VMStateDescription vmstate_npcm_pspi = {
+    .name = "npcm-pspi",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT16_ARRAY(regs, NPCMPSPIState, NPCM_PSPI_NR_REGS),
+        VMSTATE_END_OF_LIST(),
+    },
+};
+
+
+static void npcm_pspi_class_init(ObjectClass *klass, void *data)
+{
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "NPCM Peripheral SPI Module";
+    dc->realize = npcm_pspi_realize;
+    dc->vmsd = &vmstate_npcm_pspi;
+    rc->phases.enter = npcm_pspi_enter_reset;
+}
+
+static const TypeInfo npcm_pspi_types[] = {
+    {
+        .name = TYPE_NPCM_PSPI,
+        .parent = TYPE_SYS_BUS_DEVICE,
+        .instance_size = sizeof(NPCMPSPIState),
+        .class_init = npcm_pspi_class_init,
+    },
+};
+DEFINE_TYPES(npcm_pspi_types);
index c707d4aabafe566acab4a7616cbda876f06f5791..2d5bd2b83d0c23725d41d606ad519fabb623bb5d 100644 (file)
@@ -21,6 +21,11 @@ npcm7xx_fiu_ctrl_write(const char *id, uint64_t addr, uint32_t data) "%s offset:
 npcm7xx_fiu_flash_read(const char *id, int cs, uint64_t addr, unsigned int size, uint64_t value) "%s[%d] offset: 0x%08" PRIx64 " size: %u value: 0x%" PRIx64
 npcm7xx_fiu_flash_write(const char *id, unsigned cs, uint64_t addr, unsigned int size, uint64_t value) "%s[%d] offset: 0x%08" PRIx64 " size: %u value: 0x%" PRIx64
 
+# npcm_pspi.c
+npcm_pspi_enter_reset(const char *id, int reset_type) "%s reset type: %d"
+npcm_pspi_ctrl_read(const char *id, uint64_t addr, uint16_t data) "%s offset: 0x%03" PRIx64 " value: 0x%04" PRIx16
+npcm_pspi_ctrl_write(const char *id, uint64_t addr, uint16_t data) "%s offset: 0x%03" PRIx64 " value: 0x%04" PRIx16
+
 # ibex_spi_host.c
 
 ibex_spi_host_reset(const char *msg) "%s"
index 130e5d1dc7029a9fde176203c4d9e7af7801547f..bab83c0e55cb754c41529d2c6004268ba4b7b476 100644 (file)
@@ -40,6 +40,8 @@
 #include "trace.h"
 #include "qapi/error.h"
 #include "migration/migration.h"
+#include "migration/misc.h"
+#include "migration/blocker.h"
 #include "sysemu/tpm.h"
 
 VFIOGroupList vfio_group_list =
@@ -336,6 +338,58 @@ bool vfio_mig_active(void)
     return true;
 }
 
+static Error *multiple_devices_migration_blocker;
+
+static unsigned int vfio_migratable_device_num(void)
+{
+    VFIOGroup *group;
+    VFIODevice *vbasedev;
+    unsigned int device_num = 0;
+
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            if (vbasedev->migration) {
+                device_num++;
+            }
+        }
+    }
+
+    return device_num;
+}
+
+int vfio_block_multiple_devices_migration(Error **errp)
+{
+    int ret;
+
+    if (multiple_devices_migration_blocker ||
+        vfio_migratable_device_num() <= 1) {
+        return 0;
+    }
+
+    error_setg(&multiple_devices_migration_blocker,
+               "Migration is currently not supported with multiple "
+               "VFIO devices");
+    ret = migrate_add_blocker(multiple_devices_migration_blocker, errp);
+    if (ret < 0) {
+        error_free(multiple_devices_migration_blocker);
+        multiple_devices_migration_blocker = NULL;
+    }
+
+    return ret;
+}
+
+void vfio_unblock_multiple_devices_migration(void)
+{
+    if (!multiple_devices_migration_blocker ||
+        vfio_migratable_device_num() > 1) {
+        return;
+    }
+
+    migrate_del_blocker(multiple_devices_migration_blocker);
+    error_free(multiple_devices_migration_blocker);
+    multiple_devices_migration_blocker = NULL;
+}
+
 static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
 {
     VFIOGroup *group;
@@ -354,8 +408,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
                 return false;
             }
 
-            if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF)
-                && (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) {
+            if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF &&
+                migration->device_state == VFIO_DEVICE_STATE_RUNNING) {
                 return false;
             }
         }
@@ -363,13 +417,16 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
     return true;
 }
 
-static bool vfio_devices_all_running_and_saving(VFIOContainer *container)
+/*
+ * Check if all VFIO devices are running and migration is active, which is
+ * essentially equivalent to the migration being in pre-copy phase.
+ */
+static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
 {
     VFIOGroup *group;
     VFIODevice *vbasedev;
-    MigrationState *ms = migrate_get_current();
 
-    if (!migration_is_setup_or_active(ms->state)) {
+    if (!migration_is_active(migrate_get_current())) {
         return false;
     }
 
@@ -381,8 +438,7 @@ static bool vfio_devices_all_running_and_saving(VFIOContainer *container)
                 return false;
             }
 
-            if ((migration->device_state & VFIO_DEVICE_STATE_V1_SAVING) &&
-                (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) {
+            if (migration->device_state == VFIO_DEVICE_STATE_RUNNING) {
                 continue;
             } else {
                 return false;
@@ -461,7 +517,7 @@ static int vfio_dma_unmap(VFIOContainer *container,
     };
 
     if (iotlb && container->dirty_pages_supported &&
-        vfio_devices_all_running_and_saving(container)) {
+        vfio_devices_all_running_and_mig_active(container)) {
         return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
     }
 
@@ -488,6 +544,12 @@ static int vfio_dma_unmap(VFIOContainer *container,
         return -errno;
     }
 
+    if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
+        cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
+                                            tcg_enabled() ? DIRTY_CLIENTS_ALL :
+                                            DIRTY_CLIENTS_NOCODE);
+    }
+
     return 0;
 }
 
@@ -1201,6 +1263,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
         .argsz = sizeof(dirty),
     };
 
+    if (!container->dirty_pages_supported) {
+        return;
+    }
+
     if (start) {
         dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
     } else {
@@ -1236,6 +1302,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
     uint64_t pages;
     int ret;
 
+    if (!container->dirty_pages_supported) {
+        cpu_physical_memory_set_dirty_range(ram_addr, size,
+                                            tcg_enabled() ? DIRTY_CLIENTS_ALL :
+                                            DIRTY_CLIENTS_NOCODE);
+        return 0;
+    }
+
     dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
 
     dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
@@ -1409,8 +1482,7 @@ static void vfio_listener_log_sync(MemoryListener *listener,
 {
     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
 
-    if (vfio_listener_skipped_section(section) ||
-        !container->dirty_pages_supported) {
+    if (vfio_listener_skipped_section(section)) {
         return;
     }
 
index b3318f0f20645ae2df4519ea4e2ae8234923388d..a2c3d9bade7f1de4535a2d45dfaa5689ca6162b0 100644 (file)
@@ -10,6 +10,7 @@
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
 #include "qemu/cutils.h"
+#include "qemu/units.h"
 #include <linux/vfio.h>
 #include <sys/ioctl.h>
 
 #define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
 #define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
 
+/*
+ * This is an arbitrary size based on migration of mlx5 devices, where typically
+ * total device migration size is on the order of 100s of MB. Testing with
+ * larger values, e.g. 128MB and 1GB, did not show a performance improvement.
+ */
+#define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB)
+
 static int64_t bytes_transferred;
 
-static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
-                                  off_t off, bool iswrite)
+static const char *mig_state_to_str(enum vfio_device_mig_state state)
 {
-    int ret;
-
-    ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
-                    pread(vbasedev->fd, val, count, off);
-    if (ret < count) {
-        error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
-                     HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
-                     vbasedev->name, off, strerror(errno));
-        return (ret < 0) ? ret : -EINVAL;
+    switch (state) {
+    case VFIO_DEVICE_STATE_ERROR:
+        return "ERROR";
+    case VFIO_DEVICE_STATE_STOP:
+        return "STOP";
+    case VFIO_DEVICE_STATE_RUNNING:
+        return "RUNNING";
+    case VFIO_DEVICE_STATE_STOP_COPY:
+        return "STOP_COPY";
+    case VFIO_DEVICE_STATE_RESUMING:
+        return "RESUMING";
+    default:
+        return "UNKNOWN STATE";
     }
-    return 0;
 }
 
-static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
-                       off_t off, bool iswrite)
-{
-    int ret, done = 0;
-    __u8 *tbuf = buf;
-
-    while (count) {
-        int bytes = 0;
-
-        if (count >= 8 && !(off % 8)) {
-            bytes = 8;
-        } else if (count >= 4 && !(off % 4)) {
-            bytes = 4;
-        } else if (count >= 2 && !(off % 2)) {
-            bytes = 2;
-        } else {
-            bytes = 1;
-        }
-
-        ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
-        if (ret) {
-            return ret;
-        }
-
-        count -= bytes;
-        done += bytes;
-        off += bytes;
-        tbuf += bytes;
-    }
-    return done;
-}
-
-#define vfio_mig_read(f, v, c, o)       vfio_mig_rw(f, (__u8 *)v, c, o, false)
-#define vfio_mig_write(f, v, c, o)      vfio_mig_rw(f, (__u8 *)v, c, o, true)
-
-#define VFIO_MIG_STRUCT_OFFSET(f)       \
-                                 offsetof(struct vfio_device_migration_info, f)
-/*
- * Change the device_state register for device @vbasedev. Bits set in @mask
- * are preserved, bits set in @value are set, and bits not set in either @mask
- * or @value are cleared in device_state. If the register cannot be accessed,
- * the resulting state would be invalid, or the device enters an error state,
- * an error is returned.
- */
-
-static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
-                                    uint32_t value)
+static int vfio_migration_set_state(VFIODevice *vbasedev,
+                                    enum vfio_device_mig_state new_state,
+                                    enum vfio_device_mig_state recover_state)
 {
     VFIOMigration *migration = vbasedev->migration;
-    VFIORegion *region = &migration->region;
-    off_t dev_state_off = region->fd_offset +
-                          VFIO_MIG_STRUCT_OFFSET(device_state);
-    uint32_t device_state;
+    uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
+                              sizeof(struct vfio_device_feature_mig_state),
+                              sizeof(uint64_t))] = {};
+    struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+    struct vfio_device_feature_mig_state *mig_state =
+        (struct vfio_device_feature_mig_state *)feature->data;
     int ret;
 
-    ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
-                        dev_state_off);
-    if (ret < 0) {
-        return ret;
-    }
-
-    device_state = (device_state & mask) | value;
-
-    if (!VFIO_DEVICE_STATE_VALID(device_state)) {
-        return -EINVAL;
-    }
-
-    ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
-                         dev_state_off);
-    if (ret < 0) {
-        int rret;
-
-        rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
-                             dev_state_off);
-
-        if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
-            hw_error("%s: Device in error state 0x%x", vbasedev->name,
-                     device_state);
-            return rret ? rret : -EIO;
+    feature->argsz = sizeof(buf);
+    feature->flags =
+        VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE;
+    mig_state->device_state = new_state;
+    if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+        /* Try to set the device in some good state */
+        ret = -errno;
+
+        if (recover_state == VFIO_DEVICE_STATE_ERROR) {
+            error_report("%s: Failed setting device state to %s, err: %s. "
+                         "Recover state is ERROR. Resetting device",
+                         vbasedev->name, mig_state_to_str(new_state),
+                         strerror(errno));
+
+            goto reset_device;
         }
-        return ret;
-    }
 
-    migration->device_state = device_state;
-    trace_vfio_migration_set_state(vbasedev->name, device_state);
-    return 0;
-}
+        error_report(
+            "%s: Failed setting device state to %s, err: %s. Setting device in recover state %s",
+                     vbasedev->name, mig_state_to_str(new_state),
+                     strerror(errno), mig_state_to_str(recover_state));
 
-static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
-                                   uint64_t data_size, uint64_t *size)
-{
-    void *ptr = NULL;
-    uint64_t limit = 0;
-    int i;
+        mig_state->device_state = recover_state;
+        if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+            ret = -errno;
+            error_report(
+                "%s: Failed setting device in recover state, err: %s. Resetting device",
+                         vbasedev->name, strerror(errno));
 
-    if (!region->mmaps) {
-        if (size) {
-            *size = MIN(data_size, region->size - data_offset);
+            goto reset_device;
         }
-        return ptr;
-    }
 
-    for (i = 0; i < region->nr_mmaps; i++) {
-        VFIOMmap *map = region->mmaps + i;
+        migration->device_state = recover_state;
 
-        if ((data_offset >= map->offset) &&
-            (data_offset < map->offset + map->size)) {
+        return ret;
+    }
 
-            /* check if data_offset is within sparse mmap areas */
-            ptr = map->mmap + data_offset - map->offset;
-            if (size) {
-                *size = MIN(data_size, map->offset + map->size - data_offset);
-            }
-            break;
-        } else if ((data_offset < map->offset) &&
-                   (!limit || limit > map->offset)) {
+    migration->device_state = new_state;
+    if (mig_state->data_fd != -1) {
+        if (migration->data_fd != -1) {
             /*
-             * data_offset is not within sparse mmap areas, find size of
-             * non-mapped area. Check through all list since region->mmaps list
-             * is not sorted.
+             * This can happen if the device is asynchronously reset and
+             * terminates a data transfer.
              */
-            limit = map->offset;
-        }
-    }
-
-    if (!ptr && size) {
-        *size = limit ? MIN(data_size, limit - data_offset) : data_size;
-    }
-    return ptr;
-}
+            error_report("%s: data_fd out of sync", vbasedev->name);
+            close(mig_state->data_fd);
 
-static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
-{
-    VFIOMigration *migration = vbasedev->migration;
-    VFIORegion *region = &migration->region;
-    uint64_t data_offset = 0, data_size = 0, sz;
-    int ret;
-
-    ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
-                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
-    if (ret < 0) {
-        return ret;
-    }
+            return -EBADF;
+        }
 
-    ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
-                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
-    if (ret < 0) {
-        return ret;
+        migration->data_fd = mig_state->data_fd;
     }
 
-    trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
-                           migration->pending_bytes);
-
-    qemu_put_be64(f, data_size);
-    sz = data_size;
-
-    while (sz) {
-        void *buf;
-        uint64_t sec_size;
-        bool buf_allocated = false;
-
-        buf = get_data_section_size(region, data_offset, sz, &sec_size);
-
-        if (!buf) {
-            buf = g_try_malloc(sec_size);
-            if (!buf) {
-                error_report("%s: Error allocating buffer ", __func__);
-                return -ENOMEM;
-            }
-            buf_allocated = true;
-
-            ret = vfio_mig_read(vbasedev, buf, sec_size,
-                                region->fd_offset + data_offset);
-            if (ret < 0) {
-                g_free(buf);
-                return ret;
-            }
-        }
+    trace_vfio_migration_set_state(vbasedev->name, mig_state_to_str(new_state));
 
-        qemu_put_buffer(f, buf, sec_size);
+    return 0;
 
-        if (buf_allocated) {
-            g_free(buf);
-        }
-        sz -= sec_size;
-        data_offset += sec_size;
+reset_device:
+    if (ioctl(vbasedev->fd, VFIO_DEVICE_RESET)) {
+        hw_error("%s: Failed resetting device, err: %s", vbasedev->name,
+                 strerror(errno));
     }
 
-    ret = qemu_file_get_error(f);
+    migration->device_state = VFIO_DEVICE_STATE_RUNNING;
 
-    if (!ret && size) {
-        *size = data_size;
-    }
-
-    bytes_transferred += data_size;
     return ret;
 }
 
 static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
                             uint64_t data_size)
-{
-    VFIORegion *region = &vbasedev->migration->region;
-    uint64_t data_offset = 0, size, report_size;
-    int ret;
-
-    do {
-        ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
-                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
-        if (ret < 0) {
-            return ret;
-        }
-
-        if (data_offset + data_size > region->size) {
-            /*
-             * If data_size is greater than the data section of migration region
-             * then iterate the write buffer operation. This case can occur if
-             * size of migration region at destination is smaller than size of
-             * migration region at source.
-             */
-            report_size = size = region->size - data_offset;
-            data_size -= size;
-        } else {
-            report_size = size = data_size;
-            data_size = 0;
-        }
-
-        trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
-
-        while (size) {
-            void *buf;
-            uint64_t sec_size;
-            bool buf_alloc = false;
-
-            buf = get_data_section_size(region, data_offset, size, &sec_size);
-
-            if (!buf) {
-                buf = g_try_malloc(sec_size);
-                if (!buf) {
-                    error_report("%s: Error allocating buffer ", __func__);
-                    return -ENOMEM;
-                }
-                buf_alloc = true;
-            }
-
-            qemu_get_buffer(f, buf, sec_size);
-
-            if (buf_alloc) {
-                ret = vfio_mig_write(vbasedev, buf, sec_size,
-                        region->fd_offset + data_offset);
-                g_free(buf);
-
-                if (ret < 0) {
-                    return ret;
-                }
-            }
-            size -= sec_size;
-            data_offset += sec_size;
-        }
-
-        ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
-                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
-        if (ret < 0) {
-            return ret;
-        }
-    } while (data_size);
-
-    return 0;
-}
-
-static int vfio_update_pending(VFIODevice *vbasedev)
 {
     VFIOMigration *migration = vbasedev->migration;
-    VFIORegion *region = &migration->region;
-    uint64_t pending_bytes = 0;
     int ret;
 
-    ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
-                    region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
-    if (ret < 0) {
-        migration->pending_bytes = 0;
-        return ret;
-    }
+    ret = qemu_file_get_to_fd(f, migration->data_fd, data_size);
+    trace_vfio_load_state_device_data(vbasedev->name, data_size, ret);
 
-    migration->pending_bytes = pending_bytes;
-    trace_vfio_update_pending(vbasedev->name, pending_bytes);
-    return 0;
+    return ret;
 }
 
 static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
@@ -398,183 +213,157 @@ static void vfio_migration_cleanup(VFIODevice *vbasedev)
 {
     VFIOMigration *migration = vbasedev->migration;
 
-    if (migration->region.mmaps) {
-        vfio_region_unmap(&migration->region);
-    }
+    close(migration->data_fd);
+    migration->data_fd = -1;
 }
 
-/* ---------------------------------------------------------------------- */
-
-static int vfio_save_setup(QEMUFile *f, void *opaque)
+static int vfio_query_stop_copy_size(VFIODevice *vbasedev,
+                                     uint64_t *stop_copy_size)
 {
-    VFIODevice *vbasedev = opaque;
-    VFIOMigration *migration = vbasedev->migration;
-    int ret;
-
-    trace_vfio_save_setup(vbasedev->name);
+    uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
+                              sizeof(struct vfio_device_feature_mig_data_size),
+                              sizeof(uint64_t))] = {};
+    struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+    struct vfio_device_feature_mig_data_size *mig_data_size =
+        (struct vfio_device_feature_mig_data_size *)feature->data;
 
-    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
-
-    if (migration->region.mmaps) {
-        /*
-         * Calling vfio_region_mmap() from migration thread. Memory API called
-         * from this function require locking the iothread when called from
-         * outside the main loop thread.
-         */
-        qemu_mutex_lock_iothread();
-        ret = vfio_region_mmap(&migration->region);
-        qemu_mutex_unlock_iothread();
-        if (ret) {
-            error_report("%s: Failed to mmap VFIO migration region: %s",
-                         vbasedev->name, strerror(-ret));
-            error_report("%s: Falling back to slow path", vbasedev->name);
-        }
-    }
+    feature->argsz = sizeof(buf);
+    feature->flags =
+        VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIG_DATA_SIZE;
 
-    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
-                                   VFIO_DEVICE_STATE_V1_SAVING);
-    if (ret) {
-        error_report("%s: Failed to set state SAVING", vbasedev->name);
-        return ret;
+    if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+        return -errno;
     }
 
-    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
-
-    ret = qemu_file_get_error(f);
-    if (ret) {
-        return ret;
-    }
+    *stop_copy_size = mig_data_size->stop_copy_length;
 
     return 0;
 }
 
-static void vfio_save_cleanup(void *opaque)
+/* Returns 1 if end-of-stream is reached, 0 if more data and -errno if error */
+static int vfio_save_block(QEMUFile *f, VFIOMigration *migration)
 {
-    VFIODevice *vbasedev = opaque;
+    ssize_t data_size;
 
-    vfio_migration_cleanup(vbasedev);
-    trace_vfio_save_cleanup(vbasedev->name);
+    data_size = read(migration->data_fd, migration->data_buffer,
+                     migration->data_buffer_size);
+    if (data_size < 0) {
+        return -errno;
+    }
+    if (data_size == 0) {
+        return 1;
+    }
+
+    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
+    qemu_put_be64(f, data_size);
+    qemu_put_buffer(f, migration->data_buffer, data_size);
+    bytes_transferred += data_size;
+
+    trace_vfio_save_block(migration->vbasedev->name, data_size);
+
+    return qemu_file_get_error(f);
 }
 
-static void vfio_state_pending(void *opaque,
-                               uint64_t *res_precopy_only,
-                               uint64_t *res_compatible,
-                               uint64_t *res_postcopy_only)
+/* ---------------------------------------------------------------------- */
+
+static int vfio_save_setup(QEMUFile *f, void *opaque)
 {
     VFIODevice *vbasedev = opaque;
     VFIOMigration *migration = vbasedev->migration;
-    int ret;
+    uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
 
-    ret = vfio_update_pending(vbasedev);
-    if (ret) {
-        return;
+    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
+
+    vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
+    migration->data_buffer_size = MIN(VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE,
+                                      stop_copy_size);
+    migration->data_buffer = g_try_malloc0(migration->data_buffer_size);
+    if (!migration->data_buffer) {
+        error_report("%s: Failed to allocate migration data buffer",
+                     vbasedev->name);
+        return -ENOMEM;
     }
 
-    *res_precopy_only += migration->pending_bytes;
+    trace_vfio_save_setup(vbasedev->name, migration->data_buffer_size);
+
+    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 
-    trace_vfio_state_pending(vbasedev->name, *res_precopy_only,
-                            *res_postcopy_only, *res_compatible);
+    return qemu_file_get_error(f);
 }
 
-static int vfio_save_iterate(QEMUFile *f, void *opaque)
+static void vfio_save_cleanup(void *opaque)
 {
     VFIODevice *vbasedev = opaque;
     VFIOMigration *migration = vbasedev->migration;
-    uint64_t data_size;
-    int ret;
-
-    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
-
-    if (migration->pending_bytes == 0) {
-        ret = vfio_update_pending(vbasedev);
-        if (ret) {
-            return ret;
-        }
-
-        if (migration->pending_bytes == 0) {
-            qemu_put_be64(f, 0);
-            qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
-            /* indicates data finished, goto complete phase */
-            return 1;
-        }
-    }
 
-    ret = vfio_save_buffer(f, vbasedev, &data_size);
-    if (ret) {
-        error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
-                     strerror(errno));
-        return ret;
-    }
+    g_free(migration->data_buffer);
+    migration->data_buffer = NULL;
+    vfio_migration_cleanup(vbasedev);
+    trace_vfio_save_cleanup(vbasedev->name);
+}
 
-    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+/*
+ * Migration size of VFIO devices can be as little as a few KBs or as big as
+ * many GBs. This value should be big enough to cover the worst case.
+ */
+#define VFIO_MIG_STOP_COPY_SIZE (100 * GiB)
 
-    ret = qemu_file_get_error(f);
-    if (ret) {
-        return ret;
-    }
+/*
+ * Only exact function is implemented and not estimate function. The reason is
+ * that during pre-copy phase of migration the estimate function is called
+ * repeatedly while pending RAM size is over the threshold, thus migration
+ * can't converge and querying the VFIO device pending data size is useless.
+ */
+static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy,
+                                     uint64_t *can_postcopy)
+{
+    VFIODevice *vbasedev = opaque;
+    uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE;
 
     /*
-     * Reset pending_bytes as state_pending* are not called during
-     * savevm or snapshot case, in such case vfio_update_pending() at
-     * the start of this function updates pending_bytes.
+     * If getting pending migration size fails, VFIO_MIG_STOP_COPY_SIZE is
+     * reported so downtime limit won't be violated.
      */
-    migration->pending_bytes = 0;
-    trace_vfio_save_iterate(vbasedev->name, data_size);
-    return 0;
+    vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
+    *must_precopy += stop_copy_size;
+
+    trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy,
+                                   stop_copy_size);
 }
 
 static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
 {
     VFIODevice *vbasedev = opaque;
-    VFIOMigration *migration = vbasedev->migration;
-    uint64_t data_size;
     int ret;
 
-    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_RUNNING,
-                                   VFIO_DEVICE_STATE_V1_SAVING);
-    if (ret) {
-        error_report("%s: Failed to set state STOP and SAVING",
-                     vbasedev->name);
-        return ret;
-    }
-
-    ret = vfio_update_pending(vbasedev);
+    /* We reach here with device state STOP only */
+    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
+                                   VFIO_DEVICE_STATE_STOP);
     if (ret) {
         return ret;
     }
 
-    while (migration->pending_bytes > 0) {
-        qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
-        ret = vfio_save_buffer(f, vbasedev, &data_size);
+    do {
+        ret = vfio_save_block(f, vbasedev->migration);
         if (ret < 0) {
-            error_report("%s: Failed to save buffer", vbasedev->name);
-            return ret;
-        }
-
-        if (data_size == 0) {
-            break;
-        }
-
-        ret = vfio_update_pending(vbasedev);
-        if (ret) {
             return ret;
         }
-    }
+    } while (!ret);
 
     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
-
     ret = qemu_file_get_error(f);
     if (ret) {
         return ret;
     }
 
-    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_SAVING, 0);
-    if (ret) {
-        error_report("%s: Failed to set state STOPPED", vbasedev->name);
-        return ret;
-    }
+    /*
+     * If setting the device in STOP state fails, the device should be reset.
+     * To do so, use ERROR state as a recover state.
+     */
+    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP,
+                                   VFIO_DEVICE_STATE_ERROR);
+    trace_vfio_save_complete_precopy(vbasedev->name, ret);
 
-    trace_vfio_save_complete_precopy(vbasedev->name);
     return ret;
 }
 
@@ -594,28 +383,9 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
 static int vfio_load_setup(QEMUFile *f, void *opaque)
 {
     VFIODevice *vbasedev = opaque;
-    VFIOMigration *migration = vbasedev->migration;
-    int ret = 0;
 
-    if (migration->region.mmaps) {
-        ret = vfio_region_mmap(&migration->region);
-        if (ret) {
-            error_report("%s: Failed to mmap VFIO migration region %d: %s",
-                         vbasedev->name, migration->region.nr,
-                         strerror(-ret));
-            error_report("%s: Falling back to slow path", vbasedev->name);
-        }
-    }
-
-    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
-                                   VFIO_DEVICE_STATE_V1_RESUMING);
-    if (ret) {
-        error_report("%s: Failed to set state RESUMING", vbasedev->name);
-        if (migration->region.mmaps) {
-            vfio_region_unmap(&migration->region);
-        }
-    }
-    return ret;
+    return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
+                                   vbasedev->migration->device_state);
 }
 
 static int vfio_load_cleanup(void *opaque)
@@ -624,6 +394,7 @@ static int vfio_load_cleanup(void *opaque)
 
     vfio_migration_cleanup(vbasedev);
     trace_vfio_load_cleanup(vbasedev->name);
+
     return 0;
 }
 
@@ -681,12 +452,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
     return ret;
 }
 
-static SaveVMHandlers savevm_vfio_handlers = {
+static const SaveVMHandlers savevm_vfio_handlers = {
     .save_setup = vfio_save_setup,
     .save_cleanup = vfio_save_cleanup,
-    .state_pending_exact = vfio_state_pending,
-    .state_pending_estimate = vfio_state_pending,
-    .save_live_iterate = vfio_save_iterate,
+    .state_pending_exact = vfio_state_pending_exact,
     .save_live_complete_precopy = vfio_save_complete_precopy,
     .save_state = vfio_save_state,
     .load_setup = vfio_load_setup,
@@ -699,56 +468,33 @@ static SaveVMHandlers savevm_vfio_handlers = {
 static void vfio_vmstate_change(void *opaque, bool running, RunState state)
 {
     VFIODevice *vbasedev = opaque;
-    VFIOMigration *migration = vbasedev->migration;
-    uint32_t value, mask;
+    enum vfio_device_mig_state new_state;
     int ret;
 
-    if (vbasedev->migration->vm_running == running) {
-        return;
-    }
-
     if (running) {
-        /*
-         * Here device state can have one of _SAVING, _RESUMING or _STOP bit.
-         * Transition from _SAVING to _RUNNING can happen if there is migration
-         * failure, in that case clear _SAVING bit.
-         * Transition from _RESUMING to _RUNNING occurs during resuming
-         * phase, in that case clear _RESUMING bit.
-         * In both the above cases, set _RUNNING bit.
-         */
-        mask = ~VFIO_DEVICE_STATE_MASK;
-        value = VFIO_DEVICE_STATE_V1_RUNNING;
+        new_state = VFIO_DEVICE_STATE_RUNNING;
     } else {
-        /*
-         * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset
-         * _RUNNING bit
-         */
-        mask = ~VFIO_DEVICE_STATE_V1_RUNNING;
-
-        /*
-         * When VM state transition to stop for savevm command, device should
-         * start saving data.
-         */
-        if (state == RUN_STATE_SAVE_VM) {
-            value = VFIO_DEVICE_STATE_V1_SAVING;
-        } else {
-            value = 0;
-        }
+        new_state = VFIO_DEVICE_STATE_STOP;
     }
 
-    ret = vfio_migration_set_state(vbasedev, mask, value);
+    /*
+     * If setting the device in new_state fails, the device should be reset.
+     * To do so, use ERROR state as a recover state.
+     */
+    ret = vfio_migration_set_state(vbasedev, new_state,
+                                   VFIO_DEVICE_STATE_ERROR);
     if (ret) {
         /*
          * Migration should be aborted in this case, but vm_state_notify()
          * currently does not support reporting failures.
          */
-        error_report("%s: Failed to set device state 0x%x", vbasedev->name,
-                     (migration->device_state & mask) | value);
-        qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
+        if (migrate_get_current()->to_dst_file) {
+            qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
+        }
     }
-    vbasedev->migration->vm_running = running;
+
     trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
-            (migration->device_state & mask) | value);
+                              mig_state_to_str(new_state));
 }
 
 static void vfio_migration_state_notifier(Notifier *notifier, void *data)
@@ -757,7 +503,6 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data)
     VFIOMigration *migration = container_of(notifier, VFIOMigration,
                                             migration_state);
     VFIODevice *vbasedev = migration->vbasedev;
-    int ret;
 
     trace_vfio_migration_state_notifier(vbasedev->name,
                                         MigrationStatus_str(s->state));
@@ -767,34 +512,57 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data)
     case MIGRATION_STATUS_CANCELLED:
     case MIGRATION_STATUS_FAILED:
         bytes_transferred = 0;
-        ret = vfio_migration_set_state(vbasedev,
-                                       ~(VFIO_DEVICE_STATE_V1_SAVING |
-                                         VFIO_DEVICE_STATE_V1_RESUMING),
-                                       VFIO_DEVICE_STATE_V1_RUNNING);
-        if (ret) {
-            error_report("%s: Failed to set state RUNNING", vbasedev->name);
-        }
+        /*
+         * If setting the device in RUNNING state fails, the device should
+         * be reset. To do so, use ERROR state as a recover state.
+         */
+        vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING,
+                                 VFIO_DEVICE_STATE_ERROR);
     }
 }
 
 static void vfio_migration_exit(VFIODevice *vbasedev)
 {
-    VFIOMigration *migration = vbasedev->migration;
-
-    vfio_region_exit(&migration->region);
-    vfio_region_finalize(&migration->region);
     g_free(vbasedev->migration);
     vbasedev->migration = NULL;
 }
 
-static int vfio_migration_init(VFIODevice *vbasedev,
-                               struct vfio_region_info *info)
+static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags)
+{
+    uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
+                                  sizeof(struct vfio_device_feature_migration),
+                              sizeof(uint64_t))] = {};
+    struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+    struct vfio_device_feature_migration *mig =
+        (struct vfio_device_feature_migration *)feature->data;
+
+    feature->argsz = sizeof(buf);
+    feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION;
+    if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+        if (errno == ENOTTY) {
+            error_report("%s: VFIO migration is not supported in kernel",
+                         vbasedev->name);
+        } else {
+            error_report("%s: Failed to query VFIO migration support, err: %s",
+                         vbasedev->name, strerror(errno));
+        }
+
+        return -errno;
+    }
+
+    *mig_flags = mig->flags;
+
+    return 0;
+}
+
+static int vfio_migration_init(VFIODevice *vbasedev)
 {
     int ret;
     Object *obj;
     VFIOMigration *migration;
     char id[256] = "";
     g_autofree char *path = NULL, *oid = NULL;
+    uint64_t mig_flags = 0;
 
     if (!vbasedev->ops->vfio_get_object) {
         return -EINVAL;
@@ -805,27 +573,21 @@ static int vfio_migration_init(VFIODevice *vbasedev,
         return -EINVAL;
     }
 
-    vbasedev->migration = g_new0(VFIOMigration, 1);
-    vbasedev->migration->device_state = VFIO_DEVICE_STATE_V1_RUNNING;
-    vbasedev->migration->vm_running = runstate_is_running();
-
-    ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
-                            info->index, "migration");
+    ret = vfio_migration_query_flags(vbasedev, &mig_flags);
     if (ret) {
-        error_report("%s: Failed to setup VFIO migration region %d: %s",
-                     vbasedev->name, info->index, strerror(-ret));
-        goto err;
+        return ret;
     }
 
-    if (!vbasedev->migration->region.size) {
-        error_report("%s: Invalid zero-sized VFIO migration region %d",
-                     vbasedev->name, info->index);
-        ret = -EINVAL;
-        goto err;
+    /* Basic migration functionality must be supported */
+    if (!(mig_flags & VFIO_MIGRATION_STOP_COPY)) {
+        return -EOPNOTSUPP;
     }
 
+    vbasedev->migration = g_new0(VFIOMigration, 1);
     migration = vbasedev->migration;
     migration->vbasedev = vbasedev;
+    migration->device_state = VFIO_DEVICE_STATE_RUNNING;
+    migration->data_fd = -1;
 
     oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
     if (oid) {
@@ -843,11 +605,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
                                                            vbasedev);
     migration->migration_state.notify = vfio_migration_state_notifier;
     add_migration_state_change_notifier(&migration->migration_state);
-    return 0;
 
-err:
-    vfio_migration_exit(vbasedev);
-    return ret;
+    return 0;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -859,35 +618,28 @@ int64_t vfio_mig_bytes_transferred(void)
 
 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
 {
-    VFIOContainer *container = vbasedev->group->container;
-    struct vfio_region_info *info = NULL;
     int ret = -ENOTSUP;
 
-    if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
+    if (!vbasedev->enable_migration) {
         goto add_blocker;
     }
 
-    ret = vfio_get_dev_region_info(vbasedev,
-                                   VFIO_REGION_TYPE_MIGRATION_DEPRECATED,
-                                   VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED,
-                                   &info);
+    ret = vfio_migration_init(vbasedev);
     if (ret) {
         goto add_blocker;
     }
 
-    ret = vfio_migration_init(vbasedev, info);
+    ret = vfio_block_multiple_devices_migration(errp);
     if (ret) {
-        goto add_blocker;
+        return ret;
     }
 
-    trace_vfio_migration_probe(vbasedev->name, info->index);
-    g_free(info);
+    trace_vfio_migration_probe(vbasedev->name);
     return 0;
 
 add_blocker:
     error_setg(&vbasedev->migration_blocker,
                "VFIO device doesn't support migration");
-    g_free(info);
 
     ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
     if (ret < 0) {
@@ -906,6 +658,7 @@ void vfio_migration_finalize(VFIODevice *vbasedev)
         qemu_del_vm_change_state_handler(migration->vm_state);
         unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
         vfio_migration_exit(vbasedev);
+        vfio_unblock_multiple_devices_migration();
     }
 
     if (vbasedev->migration_blocker) {
index 52de1c84f8b206bbfd2031b45bed5d4a47ba2e8d..669d9fe07cd993ffb79329e1af94604e72fc5f08 100644 (file)
@@ -119,6 +119,8 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic
 vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
 vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
 vfio_dma_unmap_overflow_workaround(void) ""
+vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64
+vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
 
 # platform.c
 vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d"
@@ -148,21 +150,17 @@ vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u"
 vfio_display_edid_write_error(void) ""
 
 # migration.c
-vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
-vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
-vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
+vfio_load_cleanup(const char *name) " (%s)"
+vfio_load_device_config_state(const char *name) " (%s)"
+vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
+vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d"
+vfio_migration_probe(const char *name) " (%s)"
+vfio_migration_set_state(const char *name, const char *state) " (%s) state %s"
 vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
-vfio_save_setup(const char *name) " (%s)"
+vfio_save_block(const char *name, int data_size) " (%s) data_size %d"
 vfio_save_cleanup(const char *name) " (%s)"
-vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64
-vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64
+vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
 vfio_save_device_config_state(const char *name) " (%s)"
-vfio_state_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
-vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d"
-vfio_save_complete_precopy(const char *name) " (%s)"
-vfio_load_device_config_state(const char *name) " (%s)"
-vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
-vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64
-vfio_load_cleanup(const char *name) " (%s)"
-vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64
-vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
+vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64
+vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64
+vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"
index f5049735acf65159745a63b9a52b5760397bc35f..83fc20e49e6554af21e8810d7c1089fc9bae5652 100644 (file)
@@ -273,6 +273,7 @@ static void vuf_device_unrealize(DeviceState *dev)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VHostUserFS *fs = VHOST_USER_FS(dev);
+    struct vhost_virtqueue *vhost_vqs = fs->vhost_dev.vqs;
     int i;
 
     /* This will stop vhost backend if appropriate. */
@@ -288,8 +289,7 @@ static void vuf_device_unrealize(DeviceState *dev)
     }
     g_free(fs->req_vqs);
     virtio_cleanup(vdev);
-    g_free(fs->vhost_dev.vqs);
-    fs->vhost_dev.vqs = NULL;
+    g_free(vhost_vqs);
 }
 
 static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
index 469300fe8d9b7e3d3012786b53541977c73c145b..b5122ef8ab435e7ad1eaf50bfa5073274ad6bd42 100644 (file)
 #define co_wrapper_bdrv_rdlock         no_coroutine_fn
 #define co_wrapper_mixed_bdrv_rdlock   no_coroutine_fn coroutine_mixed_fn
 
+/*
+ * no_co_wrapper: Function specifier used by block-coroutine-wrapper.py
+ *
+ * Function specifier which does nothing but mark functions to be generated by
+ * scripts/block-coroutine-wrapper.py.
+ *
+ * A no_co_wrapper function declaration creates a coroutine_fn wrapper around
+ * functions that must not be called in coroutine context. It achieves this by
+ * scheduling a BH in the bottom half that runs the respective non-coroutine
+ * function. The coroutine yields after scheduling the BH and is reentered when
+ * the wrapped function returns.
+ */
+#define no_co_wrapper
+
 #include "block/blockjob.h"
 
 /* block.c */
index d0f83865542d6b62bcb1f3ce1595c6be40732c2b..0700953ab8fd775b7d44acfac5278540cdea1c6e 100644 (file)
@@ -36,9 +36,9 @@ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm);
 void block_copy_state_free(BlockCopyState *s);
 
 void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes);
-int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s,
-                                                  int64_t offset,
-                                                  int64_t *count);
+
+int64_t coroutine_fn GRAPH_RDLOCK
+block_copy_reset_unallocated(BlockCopyState *s, int64_t offset, int64_t *count);
 
 int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
                             bool ignore_ratelimit, uint64_t timeout_ns,
index a38f86dc154750a738332c5c9bf749072013f5dd..399200a9a3ad026cac2c14d3931eb421617901f7 100644 (file)
@@ -58,13 +58,15 @@ BlockDriver *bdrv_find_protocol(const char *filename,
                                 Error **errp);
 BlockDriver *bdrv_find_format(const char *format_name);
 
-int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
-                                QemuOpts *opts, Error **errp);
-int co_wrapper bdrv_create(BlockDriver *drv, const char *filename,
-                           QemuOpts *opts, Error **errp);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_create(BlockDriver *drv, const char *filename, QemuOpts *opts,
+               Error **errp);
 
-int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts,
-                                     Error **errp);
+int co_wrapper_bdrv_rdlock bdrv_create(BlockDriver *drv, const char *filename,
+                                       QemuOpts *opts, Error **errp);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp);
 
 BlockDriverState *bdrv_new(void);
 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
@@ -77,16 +79,26 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
                                    int flags, Error **errp);
 int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
 
-BdrvChild *bdrv_open_child(const char *filename,
-                           QDict *options, const char *bdref_key,
-                           BlockDriverState *parent,
-                           const BdrvChildClass *child_class,
-                           BdrvChildRole child_role,
-                           bool allow_none, Error **errp);
+BdrvChild * no_coroutine_fn
+bdrv_open_child(const char *filename, QDict *options, const char *bdref_key,
+                BlockDriverState *parent, const BdrvChildClass *child_class,
+                BdrvChildRole child_role, bool allow_none, Error **errp);
+
+BdrvChild * coroutine_fn no_co_wrapper
+bdrv_co_open_child(const char *filename, QDict *options, const char *bdref_key,
+                BlockDriverState *parent, const BdrvChildClass *child_class,
+                BdrvChildRole child_role, bool allow_none, Error **errp);
+
 int bdrv_open_file_child(const char *filename,
                          QDict *options, const char *bdref_key,
                          BlockDriverState *parent, Error **errp);
-BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
+
+BlockDriverState * no_coroutine_fn
+bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
+
+BlockDriverState * coroutine_fn no_co_wrapper
+bdrv_co_open_blockdev_ref(BlockdevRef *ref, Error **errp);
+
 int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
                         Error **errp);
 int bdrv_set_backing_hd_drained(BlockDriverState *bs,
@@ -94,8 +106,15 @@ int bdrv_set_backing_hd_drained(BlockDriverState *bs,
                                 Error **errp);
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
                            const char *bdref_key, Error **errp);
-BlockDriverState *bdrv_open(const char *filename, const char *reference,
-                            QDict *options, int flags, Error **errp);
+
+BlockDriverState * no_coroutine_fn
+bdrv_open(const char *filename, const char *reference, QDict *options,
+          int flags, Error **errp);
+
+BlockDriverState * coroutine_fn no_co_wrapper
+bdrv_co_open(const char *filename, const char *reference,
+             QDict *options, int flags, Error **errp);
+
 BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
                                             const char *node_name,
                                             QDict *options, int flags,
index 614cbd7eda84ae070dbae0b28860cb606f97a01f..5da99d4d60ff6368d355395f01ce98b05fe6e2ba 100644 (file)
@@ -60,27 +60,29 @@ int co_wrapper_mixed_bdrv_rdlock
 bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
                  const void *buf, BdrvRequestFlags flags);
 
-int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
-                                     int64_t bytes, const void *buf,
-                                     BdrvRequestFlags flags);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
+                    const void *buf, BdrvRequestFlags flags);
+
 /*
  * Efficiently zero a region of the disk image.  Note that this is a regular
  * I/O request like read or write and should have a reasonable size.  This
  * function is not suitable for zeroing the entire image in a single request
  * because it may allocate memory for the entire region.
  */
-int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
-                                       int64_t bytes, BdrvRequestFlags flags);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, int64_t bytes,
+                      BdrvRequestFlags flags);
 
-int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
-                                  PreallocMode prealloc, BdrvRequestFlags flags,
-                                  Error **errp);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
+                 PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
 
-int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs);
-int64_t co_wrapper_mixed bdrv_nb_sectors(BlockDriverState *bs);
+int64_t coroutine_fn GRAPH_RDLOCK bdrv_co_nb_sectors(BlockDriverState *bs);
+int64_t co_wrapper_mixed_bdrv_rdlock bdrv_nb_sectors(BlockDriverState *bs);
 
-int64_t coroutine_fn bdrv_co_getlength(BlockDriverState *bs);
-int64_t co_wrapper_mixed bdrv_getlength(BlockDriverState *bs);
+int64_t coroutine_fn GRAPH_RDLOCK bdrv_co_getlength(BlockDriverState *bs);
+int64_t co_wrapper_mixed_bdrv_rdlock bdrv_getlength(BlockDriverState *bs);
 
 int64_t coroutine_fn bdrv_co_get_allocated_file_size(BlockDriverState *bs);
 int64_t co_wrapper bdrv_get_allocated_file_size(BlockDriverState *bs);
@@ -88,8 +90,12 @@ int64_t co_wrapper bdrv_get_allocated_file_size(BlockDriverState *bs);
 BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
                                BlockDriverState *in_bs, Error **errp);
 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
-int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
-void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
+
+void coroutine_fn GRAPH_RDLOCK
+bdrv_co_delete_file_noerr(BlockDriverState *bs);
 
 
 /* async block I/O */
@@ -97,45 +103,45 @@ void bdrv_aio_cancel(BlockAIOCB *acb);
 void bdrv_aio_cancel_async(BlockAIOCB *acb);
 
 /* sg packet commands */
-int coroutine_fn bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
 
 /* Ensure contents are flushed to disk.  */
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
+int coroutine_fn GRAPH_RDLOCK bdrv_co_flush(BlockDriverState *bs);
+
+int coroutine_fn GRAPH_RDLOCK bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
+                                               int64_t bytes);
 
-int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
-                                  int64_t bytes);
 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
 int bdrv_block_status(BlockDriverState *bs, int64_t offset,
                       int64_t bytes, int64_t *pnum, int64_t *map,
                       BlockDriverState **file);
 
-int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
-                                            BlockDriverState *base,
-                                            int64_t offset, int64_t bytes,
-                                            int64_t *pnum, int64_t *map,
-                                            BlockDriverState **file);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_block_status_above(BlockDriverState *bs, BlockDriverState *base,
+                           int64_t offset, int64_t bytes, int64_t *pnum,
+                           int64_t *map, BlockDriverState **file);
 int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
                             int64_t offset, int64_t bytes, int64_t *pnum,
                             int64_t *map, BlockDriverState **file);
 
-int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset,
-                                      int64_t bytes, int64_t *pnum);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     int64_t *pnum);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
                       int64_t *pnum);
 
-int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
-                                            BlockDriverState *base,
-                                            bool include_base, int64_t offset,
-                                            int64_t bytes, int64_t *pnum);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+                           bool include_base, int64_t offset, int64_t bytes,
+                           int64_t *pnum);
 int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
                             bool include_base, int64_t offset, int64_t bytes,
                             int64_t *pnum);
 
-int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
-                                      int64_t bytes);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes);
 
-int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
-                           bool ignore_allow_rdw, Error **errp);
 int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
                               Error **errp);
 bool bdrv_is_read_only(BlockDriverState *bs);
@@ -143,11 +149,14 @@ bool bdrv_is_writable(BlockDriverState *bs);
 bool bdrv_is_sg(BlockDriverState *bs);
 int bdrv_get_flags(BlockDriverState *bs);
 
-bool coroutine_fn bdrv_co_is_inserted(BlockDriverState *bs);
-bool co_wrapper bdrv_is_inserted(BlockDriverState *bs);
+bool coroutine_fn GRAPH_RDLOCK bdrv_co_is_inserted(BlockDriverState *bs);
+bool co_wrapper_bdrv_rdlock bdrv_is_inserted(BlockDriverState *bs);
+
+void coroutine_fn GRAPH_RDLOCK
+bdrv_co_lock_medium(BlockDriverState *bs, bool locked);
 
-void coroutine_fn bdrv_co_lock_medium(BlockDriverState *bs, bool locked);
-void coroutine_fn bdrv_co_eject(BlockDriverState *bs, bool eject_flag);
+void coroutine_fn GRAPH_RDLOCK
+bdrv_co_eject(BlockDriverState *bs, bool eject_flag);
 
 const char *bdrv_get_format_name(BlockDriverState *bs);
 
@@ -231,17 +240,15 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
 
 AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
 
-void coroutine_fn bdrv_co_io_plug(BlockDriverState *bs);
-void coroutine_fn bdrv_co_io_unplug(BlockDriverState *bs);
+void coroutine_fn GRAPH_RDLOCK bdrv_co_io_plug(BlockDriverState *bs);
+void coroutine_fn GRAPH_RDLOCK bdrv_co_io_unplug(BlockDriverState *bs);
 
-bool coroutine_fn bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs,
-                                                     const char *name,
-                                                     uint32_t granularity,
-                                                     Error **errp);
-bool co_wrapper bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs,
-                                                const char *name,
-                                                uint32_t granularity,
-                                                Error **errp);
+bool coroutine_fn GRAPH_RDLOCK
+bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                   uint32_t granularity, Error **errp);
+bool co_wrapper_bdrv_rdlock
+bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                uint32_t granularity, Error **errp);
 
 /**
  *
@@ -272,10 +279,11 @@ bool co_wrapper bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs,
  *
  * Returns: 0 if succeeded; negative error code if failed.
  **/
-int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
-                                    BdrvChild *dst, int64_t dst_offset,
-                                    int64_t bytes, BdrvRequestFlags read_flags,
-                                    BdrvRequestFlags write_flags);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
+                   BdrvChild *dst, int64_t dst_offset,
+                   int64_t bytes, BdrvRequestFlags read_flags,
+                   BdrvRequestFlags write_flags);
 
 /*
  * "I/O or GS" API functions. These functions can run without
index ba2e0fce2510ea54695697cb3c2f406713c2ab31..d4190173284fa37b4053b814893bb9c2c8c06981 100644 (file)
@@ -246,12 +246,11 @@ struct BlockDriver {
                           Error **errp);
     void (*bdrv_close)(BlockDriverState *bs);
 
-    int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts,
-                                       Error **errp);
-    int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv,
-                                            const char *filename,
-                                            QemuOpts *opts,
-                                            Error **errp);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create)(
+        BlockdevCreateOptions *opts, Error **errp);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create_opts)(
+        BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp);
 
     int (*bdrv_amend_options)(BlockDriverState *bs,
                               QemuOpts *opts,
@@ -446,9 +445,10 @@ struct BlockDriver {
      *
      * Returns: true on success, false on failure
      */
-    bool (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size,
-                              Error **errp);
-    void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host, size_t size);
+    bool GRAPH_RDLOCK_PTR (*bdrv_register_buf)(
+        BlockDriverState *bs, void *host, size_t size, Error **errp);
+    void GRAPH_RDLOCK_PTR (*bdrv_unregister_buf)(
+        BlockDriverState *bs, void *host, size_t size);
 
     /*
      * This field is modified only under the BQL, and is part of
@@ -471,19 +471,22 @@ struct BlockDriver {
                                       Error **errp);
 
     /* aio */
-    BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
+    BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_preadv)(BlockDriverState *bs,
         int64_t offset, int64_t bytes, QEMUIOVector *qiov,
         BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
-    BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
+
+    BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_pwritev)(BlockDriverState *bs,
         int64_t offset, int64_t bytes, QEMUIOVector *qiov,
         BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
-    BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
-        BlockCompletionFunc *cb, void *opaque);
-    BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
-        int64_t offset, int bytes,
+
+    BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_flush)(
+        BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque);
+
+    BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_pdiscard)(
+        BlockDriverState *bs, int64_t offset, int bytes,
         BlockCompletionFunc *cb, void *opaque);
 
-    int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_readv)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
 
     /**
@@ -501,16 +504,16 @@ struct BlockDriver {
      *
      * The buffer in @qiov may point directly to guest memory.
      */
-    int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_preadv)(BlockDriverState *bs,
         int64_t offset, int64_t bytes, QEMUIOVector *qiov,
         BdrvRequestFlags flags);
 
-    int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes,
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_preadv_part)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
         QEMUIOVector *qiov, size_t qiov_offset,
         BdrvRequestFlags flags);
 
-    int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_writev)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
         int flags);
     /**
@@ -528,12 +531,12 @@ struct BlockDriver {
      *
      * The buffer in @qiov may point directly to guest memory.
      */
-    int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
-        BdrvRequestFlags flags);
-    int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov,
         BdrvRequestFlags flags);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev_part)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+        size_t qiov_offset, BdrvRequestFlags flags);
 
     /*
      * Efficiently zero a region of the disk image.  Typically an image format
@@ -541,10 +544,12 @@ struct BlockDriver {
      * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
      * will be called instead.
      */
-    int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, BdrvRequestFlags flags);
-    int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwrite_zeroes)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        BdrvRequestFlags flags);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pdiscard)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes);
 
     /*
      * Map [offset, offset + nbytes) range onto a child of @bs to copy from,
@@ -554,14 +559,10 @@ struct BlockDriver {
      * See the comment of bdrv_co_copy_range for the parameter and return value
      * semantics.
      */
-    int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
-                                                BdrvChild *src,
-                                                int64_t offset,
-                                                BdrvChild *dst,
-                                                int64_t dst_offset,
-                                                int64_t bytes,
-                                                BdrvRequestFlags read_flags,
-                                                BdrvRequestFlags write_flags);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_copy_range_from)(
+        BlockDriverState *bs, BdrvChild *src, int64_t offset,
+        BdrvChild *dst, int64_t dst_offset, int64_t bytes,
+        BdrvRequestFlags read_flags, BdrvRequestFlags write_flags);
 
     /*
      * Map [offset, offset + nbytes) range onto a child of bs to copy data to,
@@ -572,14 +573,10 @@ struct BlockDriver {
      * See the comment of bdrv_co_copy_range for the parameter and return value
      * semantics.
      */
-    int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
-                                              BdrvChild *src,
-                                              int64_t src_offset,
-                                              BdrvChild *dst,
-                                              int64_t dst_offset,
-                                              int64_t bytes,
-                                              BdrvRequestFlags read_flags,
-                                              BdrvRequestFlags write_flags);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_copy_range_to)(
+        BlockDriverState *bs, BdrvChild *src, int64_t src_offset,
+        BdrvChild *dst, int64_t dst_offset, int64_t bytes,
+        BdrvRequestFlags read_flags, BdrvRequestFlags write_flags);
 
     /*
      * Building block for bdrv_block_status[_above] and
@@ -606,7 +603,8 @@ struct BlockDriver {
      * *pnum value for the block-status cache on protocol nodes, prior
      * to clamping *pnum for return to its caller.
      */
-    int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_block_status)(
+        BlockDriverState *bs,
         bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
         int64_t *map, BlockDriverState **file);
 
@@ -626,13 +624,16 @@ struct BlockDriver {
      * - receive the snapshot's actual length (which may differ from bs's
      *   length)
      */
-    int coroutine_fn (*bdrv_co_preadv_snapshot)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
-    int coroutine_fn (*bdrv_co_snapshot_block_status)(BlockDriverState *bs,
-        bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
-        int64_t *map, BlockDriverState **file);
-    int coroutine_fn (*bdrv_co_pdiscard_snapshot)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_preadv_snapshot)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov, size_t qiov_offset);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_snapshot_block_status)(
+        BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
+        int64_t *pnum, int64_t *map, BlockDriverState **file);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pdiscard_snapshot)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes);
 
     /*
      * Invalidate any cached meta-data.
@@ -645,24 +646,26 @@ struct BlockDriver {
      * layers, if needed. This function is needed for deterministic
      * synchronization of the flush finishing callback.
      */
-    int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_flush)(BlockDriverState *bs);
 
     /* Delete a created file. */
-    int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs,
-                                            Error **errp);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_delete_file)(
+        BlockDriverState *bs, Error **errp);
 
     /*
      * Flushes all data that was already written to the OS all the way down to
      * the disk (for example file-posix.c calls fsync()).
      */
-    int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_flush_to_disk)(
+        BlockDriverState *bs);
 
     /*
      * Flushes all internal caches to the OS. The data may still sit in a
      * writeback cache of the host OS, but it will survive a crash of the qemu
      * process.
      */
-    int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_flush_to_os)(
+        BlockDriverState *bs);
 
     /*
      * Truncate @bs to @offset bytes using the given @prealloc mode
@@ -677,21 +680,26 @@ struct BlockDriver {
      * If @exact is true and this function fails but would succeed
      * with @exact = false, it should return -ENOTSUP.
      */
-    int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
-                                         bool exact, PreallocMode prealloc,
-                                         BdrvRequestFlags flags, Error **errp);
-    int64_t coroutine_fn (*bdrv_co_getlength)(BlockDriverState *bs);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_truncate)(
+        BlockDriverState *bs, int64_t offset, bool exact,
+        PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
+
+    int64_t coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_getlength)(
+        BlockDriverState *bs);
+
     int64_t coroutine_fn (*bdrv_co_get_allocated_file_size)(
         BlockDriverState *bs);
 
     BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs,
                                       Error **errp);
 
-    int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, QEMUIOVector *qiov);
-    int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
-        size_t qiov_offset);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev_compressed)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev_compressed_part)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov, size_t qiov_offset);
 
     int coroutine_fn (*bdrv_co_get_info)(BlockDriverState *bs,
                                          BlockDriverInfo *bdi);
@@ -707,16 +715,20 @@ struct BlockDriver {
         BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
 
     /* removable device specific */
-    bool coroutine_fn (*bdrv_co_is_inserted)(BlockDriverState *bs);
-    void coroutine_fn (*bdrv_co_eject)(BlockDriverState *bs, bool eject_flag);
-    void coroutine_fn (*bdrv_co_lock_medium)(BlockDriverState *bs, bool locked);
+    bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_is_inserted)(
+        BlockDriverState *bs);
+    void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_eject)(
+        BlockDriverState *bs, bool eject_flag);
+    void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_lock_medium)(
+        BlockDriverState *bs, bool locked);
 
     /* to control generic scsi devices */
-    BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
-        unsigned long int req, void *buf,
+    BlockAIOCB *coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_aio_ioctl)(
+        BlockDriverState *bs, unsigned long int req, void *buf,
         BlockCompletionFunc *cb, void *opaque);
-    int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs,
-                                      unsigned long int req, void *buf);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_ioctl)(
+        BlockDriverState *bs, unsigned long int req, void *buf);
 
     /*
      * Returns 0 for completed check, -errno for internal errors.
@@ -729,8 +741,9 @@ struct BlockDriver {
                                              BlkdebugEvent event);
 
     /* io queue for linux-aio */
-    void coroutine_fn (*bdrv_co_io_plug)(BlockDriverState *bs);
-    void coroutine_fn (*bdrv_co_io_unplug)(BlockDriverState *bs);
+    void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_plug)(BlockDriverState *bs);
+    void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_unplug)(
+        BlockDriverState *bs);
 
     /**
      * bdrv_drain_begin is called if implemented in the beginning of a
@@ -748,14 +761,16 @@ struct BlockDriver {
     void (*bdrv_drain_end)(BlockDriverState *bs);
 
     bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
-    bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)(
+
+    bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_can_store_new_dirty_bitmap)(
         BlockDriverState *bs, const char *name, uint32_t granularity,
         Error **errp);
-    int coroutine_fn (*bdrv_co_remove_persistent_dirty_bitmap)(
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_remove_persistent_dirty_bitmap)(
         BlockDriverState *bs, const char *name, Error **errp);
 };
 
-static inline bool block_driver_can_compress(BlockDriver *drv)
+static inline bool TSA_NO_TSA block_driver_can_compress(BlockDriver *drv)
 {
     return drv->bdrv_co_pwritev_compressed ||
            drv->bdrv_co_pwritev_compressed_part;
index 4430bf4c4a462ed15d390ea020f25f8d1129c5c3..eb0da7232ef8def942c82977a4c492a83c76baa9 100644 (file)
  * the I/O API.
  */
 
-int coroutine_fn bdrv_co_preadv_snapshot(BdrvChild *child,
+int coroutine_fn GRAPH_RDLOCK bdrv_co_preadv_snapshot(BdrvChild *child,
     int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
-int coroutine_fn bdrv_co_snapshot_block_status(BlockDriverState *bs,
-    bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
-    int64_t *map, BlockDriverState **file);
-int coroutine_fn bdrv_co_pdiscard_snapshot(BlockDriverState *bs,
+int coroutine_fn GRAPH_RDLOCK bdrv_co_snapshot_block_status(
+    BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
+    int64_t *pnum, int64_t *map, BlockDriverState **file);
+int coroutine_fn GRAPH_RDLOCK bdrv_co_pdiscard_snapshot(BlockDriverState *bs,
     int64_t offset, int64_t bytes);
 
 
-int coroutine_fn bdrv_co_preadv(BdrvChild *child,
+int coroutine_fn GRAPH_RDLOCK bdrv_co_preadv(BdrvChild *child,
     int64_t offset, int64_t bytes, QEMUIOVector *qiov,
     BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
+int coroutine_fn GRAPH_RDLOCK bdrv_co_preadv_part(BdrvChild *child,
     int64_t offset, int64_t bytes,
     QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
+int coroutine_fn GRAPH_RDLOCK bdrv_co_pwritev(BdrvChild *child,
     int64_t offset, int64_t bytes, QEMUIOVector *qiov,
     BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+int coroutine_fn GRAPH_RDLOCK bdrv_co_pwritev_part(BdrvChild *child,
     int64_t offset, int64_t bytes,
     QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
 
-static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
+static inline int coroutine_fn GRAPH_RDLOCK bdrv_co_pread(BdrvChild *child,
     int64_t offset, int64_t bytes, void *buf, BdrvRequestFlags flags)
 {
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     return bdrv_co_preadv(child, offset, bytes, &qiov, flags);
 }
 
-static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
+static inline int coroutine_fn GRAPH_RDLOCK bdrv_co_pwrite(BdrvChild *child,
     int64_t offset, int64_t bytes, const void *buf, BdrvRequestFlags flags)
 {
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     return bdrv_co_pwritev(child, offset, bytes, &qiov, flags);
 }
@@ -111,20 +113,21 @@ void bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
 void bdrv_inc_in_flight(BlockDriverState *bs);
 void bdrv_dec_in_flight(BlockDriverState *bs);
 
-int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
-                                         BdrvChild *dst, int64_t dst_offset,
-                                         int64_t bytes,
-                                         BdrvRequestFlags read_flags,
-                                         BdrvRequestFlags write_flags);
-int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
-                                       BdrvChild *dst, int64_t dst_offset,
-                                       int64_t bytes,
-                                       BdrvRequestFlags read_flags,
-                                       BdrvRequestFlags write_flags);
-
-int coroutine_fn bdrv_co_refresh_total_sectors(BlockDriverState *bs,
-                                               int64_t hint);
-int co_wrapper_mixed
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
+                        BdrvChild *dst, int64_t dst_offset,
+                        int64_t bytes, BdrvRequestFlags read_flags,
+                        BdrvRequestFlags write_flags);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
+                      BdrvChild *dst, int64_t dst_offset,
+                      int64_t bytes, BdrvRequestFlags read_flags,
+                      BdrvRequestFlags write_flags);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_refresh_total_sectors(BlockDriverState *bs, int64_t hint);
+
+int co_wrapper_mixed_bdrv_rdlock
 bdrv_refresh_total_sectors(BlockDriverState *bs, int64_t hint);
 
 BdrvChild *bdrv_cow_child(BlockDriverState *bs);
index 233535ef2d3c139f6a4239a8223300fd7dafe51b..fa956debfb2883e721142fafe5c2f1941fca621c 100644 (file)
@@ -36,12 +36,12 @@ int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags,
 void bdrv_release_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
 
-int coroutine_fn bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs,
-                                                        const char *name,
-                                                        Error **errp);
-int co_wrapper bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
-                                                   const char *name,
-                                                   Error **errp);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                       Error **errp);
+int co_wrapper_bdrv_rdlock
+bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                    Error **errp);
 
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
index 15b9cef086ccb483af2da68b10978b882498f0b0..571049bd0ec05540d2311038072299f6205608b6 100644 (file)
@@ -248,6 +248,17 @@ ssize_t qcrypto_tls_session_read(QCryptoTLSSession *sess,
                                  char *buf,
                                  size_t len);
 
+/**
+ * qcrypto_tls_session_check_pending:
+ * @sess: the TLS session object
+ *
+ * Check if there are unread data in the TLS buffers that have
+ * already been read from the underlying data source.
+ *
+ * Returns: the number of bytes available or zero
+ */
+size_t qcrypto_tls_session_check_pending(QCryptoTLSSession *sess);
+
 /**
  * qcrypto_tls_session_handshake:
  * @sess: the TLS session object
index 64247ecb11f45280f628dfe237d84859c45d6d5a..32cda9ef14c5167115fbe0ee611190413aedd1b9 100644 (file)
 
 #include "qemu/bswap.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 typedef void *PTR;
 typedef uint64_t bfd_vma;
 typedef int64_t bfd_signed_vma;
@@ -506,8 +502,4 @@ static inline bfd_vma bfd_getb16(const bfd_byte *addr)
 
 typedef bool bfd_boolean;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* DISAS_DIS_ASM_H */
index 54585a9954dfe71b07b78a896e77d9cd766fbd61..0e36f4d063ef2f100faf5024bb701c12b466fe8b 100644 (file)
@@ -25,6 +25,7 @@
 #include "exec/cpu_ldst.h"
 #endif
 #include "qemu/interval-tree.h"
+#include "qemu/clang-tsa.h"
 
 /* allow to see translation results - the slowdown should be negligible, so we leave it */
 #define DEBUG_DISAS
@@ -759,8 +760,8 @@ static inline tb_page_addr_t get_page_addr_code(CPUArchState *env,
 }
 
 #if defined(CONFIG_USER_ONLY)
-void mmap_lock(void);
-void mmap_unlock(void);
+void TSA_NO_TSA mmap_lock(void);
+void TSA_NO_TSA mmap_unlock(void);
 bool have_mmap_lock(void);
 
 /**
index e0f2f7ab198284dfaef12375deb0b50ed04eb2b0..79e0c80568ec8db0d9a97a607bcba854a46162c7 100644 (file)
@@ -1,7 +1,6 @@
 #ifndef HW_ARM_ALLWINNER_A10_H
 #define HW_ARM_ALLWINNER_A10_H
 
-#include "qemu/error-report.h"
 #include "hw/char/serial.h"
 #include "hw/arm/boot.h"
 #include "hw/pci/pci_device.h"
index f1b7e4a48d337c01e82bdbe4c5162e96468d5bcb..72c77220964b3a99baa60f7da29033f45d672c16 100644 (file)
@@ -32,6 +32,7 @@
 #include "hw/nvram/npcm7xx_otp.h"
 #include "hw/timer/npcm7xx_timer.h"
 #include "hw/ssi/npcm7xx_fiu.h"
+#include "hw/ssi/npcm_pspi.h"
 #include "hw/usb/hcd-ehci.h"
 #include "hw/usb/hcd-ohci.h"
 #include "target/arm/cpu.h"
@@ -104,6 +105,7 @@ struct NPCM7xxState {
     NPCM7xxFIUState     fiu[2];
     NPCM7xxEMCState     emc[2];
     NPCM7xxSDHCIState   mmc;
+    NPCMPSPIState       pspi[2];
 };
 
 #define TYPE_NPCM7XX    "npcm7xx"
index c5683af07d676cdca0c2cc8991f5cfdf243585f9..9fcff26357f028c30a15e277ae2ac221f656b21d 100644 (file)
@@ -27,8 +27,6 @@
 #define SMMU_PCI_DEVFN_MAX    256
 #define SMMU_PCI_DEVFN(sid)   (sid & 0xFF)
 
-#define SMMU_MAX_VA_BITS      48
-
 /*
  * Page table walk error types
  */
index f1921fdf9e722f221f8b06bd0a1975cb6a242cb0..a0c026402e1360fa781768cb809d0cf9f18a9f9d 100644 (file)
@@ -20,7 +20,6 @@
 #define HW_ARM_SMMUV3_H
 
 #include "hw/arm/smmu-common.h"
-#include "hw/registerfields.h"
 #include "qom/object.h"
 
 #define TYPE_SMMUV3_IOMMU_MEMORY_REGION "smmuv3-iommu-memory-region"
@@ -46,6 +45,7 @@ struct SMMUv3State {
     uint32_t cr[3];
     uint32_t cr0ack;
     uint32_t statusr;
+    uint32_t gbpa;
     uint32_t irq_ctrl;
     uint32_t gerror;
     uint32_t gerrorn;
index a39985516a4425aa3c1b8eebddfbefa1daf06e75..9deadf223bfb3904d7fee25be94a8370f33af4f6 100644 (file)
@@ -26,7 +26,6 @@
 #define HW_IBEX_UART_H
 
 #include "hw/sysbus.h"
-#include "hw/registerfields.h"
 #include "chardev/char-fe.h"
 #include "qemu/timer.h"
 #include "qom/object.h"
index 2417597236bca91f629ec17c0551aa97e394f345..671f041bec67e55b369c9e90aebafa00ab273230 100644 (file)
@@ -349,7 +349,7 @@ struct CPUState {
     bool unplug;
     bool crash_occurred;
     bool exit_request;
-    bool in_exclusive_context;
+    int exclusive_context_count;
     uint32_t cflags_next_tb;
     /* updates protected by BQL */
     uint32_t interrupt_request;
@@ -758,7 +758,7 @@ void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data
  */
 static inline bool cpu_in_exclusive_context(const CPUState *cpu)
 {
-    return cpu->in_exclusive_context;
+    return cpu->exclusive_context_count;
 }
 
 /**
index 0180c7b0ca136d3845c4620d73602d3846ecbdfc..1ca262fbf82028f2d2791faed1f7c96831b27c84 100644 (file)
 #include "qom/object.h"
 
 #define TYPE_NVIC "armv7m_nvic"
-
-typedef struct NVICState NVICState;
-DECLARE_INSTANCE_CHECKER(NVICState, NVIC,
-                         TYPE_NVIC)
+OBJECT_DECLARE_SIMPLE_TYPE(NVICState, NVIC)
 
 /* Highest permitted number of exceptions (architectural limit) */
 #define NVIC_MAX_VECTORS 512
@@ -86,4 +83,127 @@ struct NVICState {
     qemu_irq sysresetreq;
 };
 
+/* Interface between CPU and Interrupt controller.  */
+/**
+ * armv7m_nvic_set_pending: mark the specified exception as pending
+ * @s: the NVIC
+ * @irq: the exception number to mark pending
+ * @secure: false for non-banked exceptions or for the nonsecure
+ * version of a banked exception, true for the secure version of a banked
+ * exception.
+ *
+ * Marks the specified exception as pending. Note that we will assert()
+ * if @secure is true and @irq does not specify one of the fixed set
+ * of architecturally banked exceptions.
+ */
+void armv7m_nvic_set_pending(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_set_pending_derived: mark this derived exception as pending
+ * @s: the NVIC
+ * @irq: the exception number to mark pending
+ * @secure: false for non-banked exceptions or for the nonsecure
+ * version of a banked exception, true for the secure version of a banked
+ * exception.
+ *
+ * Similar to armv7m_nvic_set_pending(), but specifically for derived
+ * exceptions (exceptions generated in the course of trying to take
+ * a different exception).
+ */
+void armv7m_nvic_set_pending_derived(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_set_pending_lazyfp: mark this lazy FP exception as pending
+ * @s: the NVIC
+ * @irq: the exception number to mark pending
+ * @secure: false for non-banked exceptions or for the nonsecure
+ * version of a banked exception, true for the secure version of a banked
+ * exception.
+ *
+ * Similar to armv7m_nvic_set_pending(), but specifically for exceptions
+ * generated in the course of lazy stacking of FP registers.
+ */
+void armv7m_nvic_set_pending_lazyfp(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_get_pending_irq_info: return highest priority pending
+ *    exception, and whether it targets Secure state
+ * @s: the NVIC
+ * @pirq: set to pending exception number
+ * @ptargets_secure: set to whether pending exception targets Secure
+ *
+ * This function writes the number of the highest priority pending
+ * exception (the one which would be made active by
+ * armv7m_nvic_acknowledge_irq()) to @pirq, and sets @ptargets_secure
+ * to true if the current highest priority pending exception should
+ * be taken to Secure state, false for NS.
+ */
+void armv7m_nvic_get_pending_irq_info(NVICState *s, int *pirq,
+                                      bool *ptargets_secure);
+/**
+ * armv7m_nvic_acknowledge_irq: make highest priority pending exception active
+ * @s: the NVIC
+ *
+ * Move the current highest priority pending exception from the pending
+ * state to the active state, and update v7m.exception to indicate that
+ * it is the exception currently being handled.
+ */
+void armv7m_nvic_acknowledge_irq(NVICState *s);
+/**
+ * armv7m_nvic_complete_irq: complete specified interrupt or exception
+ * @s: the NVIC
+ * @irq: the exception number to complete
+ * @secure: true if this exception was secure
+ *
+ * Returns: -1 if the irq was not active
+ *           1 if completing this irq brought us back to base (no active irqs)
+ *           0 if there is still an irq active after this one was completed
+ * (Ignoring -1, this is the same as the RETTOBASE value before completion.)
+ */
+int armv7m_nvic_complete_irq(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure)
+ * @s: the NVIC
+ * @irq: the exception number to mark pending
+ * @secure: false for non-banked exceptions or for the nonsecure
+ * version of a banked exception, true for the secure version of a banked
+ * exception.
+ *
+ * Return whether an exception is "ready", i.e. whether the exception is
+ * enabled and is configured at a priority which would allow it to
+ * interrupt the current execution priority. This controls whether the
+ * RDY bit for it in the FPCCR is set.
+ */
+bool armv7m_nvic_get_ready_status(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_raw_execution_priority: return the raw execution priority
+ * @s: the NVIC
+ *
+ * Returns: the raw execution priority as defined by the v8M architecture.
+ * This is the execution priority minus the effects of AIRCR.PRIS,
+ * and minus any PRIMASK/FAULTMASK/BASEPRI priority boosting.
+ * (v8M ARM ARM I_PKLD.)
+ */
+int armv7m_nvic_raw_execution_priority(NVICState *s);
+/**
+ * armv7m_nvic_neg_prio_requested: return true if the requested execution
+ * priority is negative for the specified security state.
+ * @s: the NVIC
+ * @secure: the security state to test
+ * This corresponds to the pseudocode IsReqExecPriNeg().
+ */
+#ifndef CONFIG_USER_ONLY
+bool armv7m_nvic_neg_prio_requested(NVICState *s, bool secure);
+#else
+static inline bool armv7m_nvic_neg_prio_requested(NVICState *s, bool secure)
+{
+    return false;
+}
+#endif
+#ifndef CONFIG_USER_ONLY
+bool armv7m_nvic_can_take_pending_exception(NVICState *s);
+#else
+static inline bool armv7m_nvic_can_take_pending_exception(NVICState *s)
+{
+    return true;
+}
+#endif
+
 #endif
index 1f6d07776605d8f0104b45be47788c8a18572bd7..8089cc1c31b466d2cef1ae146077ddc037678030 100644 (file)
@@ -32,7 +32,6 @@
 #include "hw/ssi/ssi.h"
 #include "qemu/fifo8.h"
 #include "qom/object.h"
-#include "hw/registerfields.h"
 #include "qemu/timer.h"
 
 #define TYPE_IBEX_SPI_HOST "ibex-spi"
diff --git a/include/hw/ssi/npcm_pspi.h b/include/hw/ssi/npcm_pspi.h
new file mode 100644 (file)
index 0000000..37cc784
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Nuvoton Peripheral SPI Module
+ *
+ * Copyright 2023 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM_PSPI_H
+#define NPCM_PSPI_H
+
+#include "hw/ssi/ssi.h"
+#include "hw/sysbus.h"
+
+/*
+ * Number of registers in our device state structure. Don't change this without
+ * incrementing the version_id in the vmstate.
+ */
+#define NPCM_PSPI_NR_REGS 3
+
+/**
+ * NPCMPSPIState - Device state for one Flash Interface Unit.
+ * @parent: System bus device.
+ * @mmio: Memory region for register access.
+ * @spi: The SPI bus mastered by this controller.
+ * @regs: Register contents.
+ * @irq: The interrupt request queue for this module.
+ *
+ * Each PSPI has a shared bank of registers, and controls up to four chip
+ * selects. Each chip select has a dedicated memory region which may be used to
+ * read and write the flash connected to that chip select as if it were memory.
+ */
+typedef struct NPCMPSPIState {
+    SysBusDevice parent;
+
+    MemoryRegion mmio;
+
+    SSIBus *spi;
+    uint16_t regs[NPCM_PSPI_NR_REGS];
+    qemu_irq irq;
+} NPCMPSPIState;
+
+#define TYPE_NPCM_PSPI "npcm-pspi"
+OBJECT_DECLARE_SIMPLE_TYPE(NPCMPSPIState, NPCM_PSPI)
+
+#endif /* NPCM_PSPI_H */
index e573f5a9f19f4d6b7c2bedeecf8e442dd30efc44..87524c64a443592bff602e3783b2bf215229663f 100644 (file)
@@ -61,11 +61,11 @@ typedef struct VFIORegion {
 typedef struct VFIOMigration {
     struct VFIODevice *vbasedev;
     VMChangeStateEntry *vm_state;
-    VFIORegion region;
-    uint32_t device_state;
-    int vm_running;
     Notifier migration_state;
-    uint64_t pending_bytes;
+    uint32_t device_state;
+    int data_fd;
+    void *data_buffer;
+    size_t data_buffer_size;
 } VFIOMigration;
 
 typedef struct VFIOAddressSpace {
@@ -218,6 +218,8 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
 extern VFIOGroupList vfio_group_list;
 
 bool vfio_mig_active(void);
+int vfio_block_multiple_devices_migration(Error **errp);
+void vfio_unblock_multiple_devices_migration(void);
 int64_t vfio_mig_bytes_transferred(void);
 
 #ifdef CONFIG_LINUX
index 37b75e15e3e59c6c4865fceeb605003488ec2608..779568ab5d28f62f5be86863c68e6dc416fe4d47 100644 (file)
@@ -74,13 +74,22 @@ struct VirtIOSCSICommon {
     VirtQueue **cmd_vqs;
 };
 
+struct VirtIOSCSIReq;
+
 struct VirtIOSCSI {
     VirtIOSCSICommon parent_obj;
 
     SCSIBus bus;
-    int resetting;
+    int resetting; /* written from main loop thread, read from any thread */
     bool events_dropped;
 
+    /*
+     * TMFs deferred to main loop BH. These fields are protected by
+     * virtio_scsi_acquire().
+     */
+    QEMUBH *tmf_bh;
+    QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
+
     /* Fields for dataplane below */
     AioContext *ctx; /* one iothread per virtio-scsi-pci for now */
 
index b91a0cdbf8fcf986888ca847f55b8499e168baf2..a8dfd8fefd0a5c94737b1fd483129f1b4ba75a65 100644 (file)
@@ -47,25 +47,25 @@ typedef struct SaveVMHandlers {
     /* This runs outside the iothread lock!  */
     int (*save_setup)(QEMUFile *f, void *opaque);
     /* Note for save_live_pending:
-     * - res_precopy_only is for data which must be migrated in precopy phase
-     *     or in stopped state, in other words - before target vm start
-     * - res_compatible is for data which may be migrated in any phase
-     * - res_postcopy_only is for data which must be migrated in postcopy phase
-     *     or in stopped state, in other words - after source vm stop
+     * must_precopy:
+     * - must be migrated in precopy or in stopped state
+     * - i.e. must be migrated before target start
      *
-     * Sum of res_postcopy_only, res_compatible and res_postcopy_only is the
-     * whole amount of pending data.
+     * can_postcopy:
+     * - can migrate in postcopy or in stopped state
+     * - i.e. can migrate after target start
+     * - some can also be migrated during precopy (RAM)
+     * - some must be migrated after source stops (block-dirty-bitmap)
+     *
+     * Sum of can_postcopy and must_postcopy is the whole amount of
+     * pending data.
      */
     /* This estimates the remaining data to transfer */
-    void (*state_pending_estimate)(void *opaque,
-                                   uint64_t *res_precopy_only,
-                                   uint64_t *res_compatible,
-                                   uint64_t *res_postcopy_only);
+    void (*state_pending_estimate)(void *opaque, uint64_t *must_precopy,
+                                   uint64_t *can_postcopy);
     /* This calculate the exact remaining data to transfer */
-    void (*state_pending_exact)(void *opaque,
-                                uint64_t *res_precopy_only,
-                                uint64_t *res_compatible,
-                                uint64_t *res_postcopy_only);
+    void (*state_pending_exact)(void *opaque, uint64_t *must_precopy,
+                                uint64_t *can_postcopy);
     LoadStateHandler *load_state;
     int (*load_setup)(QEMUFile *f, void *opaque);
     int (*load_cleanup)(void *opaque);
index fad589cc1d8e7e749b0b1f3c96e8b6a245779098..1d88621c124f72247977b068a4ba8b9bae5eac5a 100644 (file)
@@ -203,6 +203,20 @@ void net_socket_rs_init(SocketReadState *rs,
                         bool vnet_hdr);
 NetClientState *qemu_get_peer(NetClientState *nc, int queue_index);
 
+/**
+ * qemu_get_nic_models:
+ * @device_type: Defines which devices should be taken into consideration
+ *               (e.g. TYPE_DEVICE for all devices, or TYPE_PCI_DEVICE for PCI)
+ *
+ * Get an array of pointers to names of NIC devices that are available in
+ * the QEMU binary. The array is terminated with a NULL pointer entry.
+ * The caller is responsible for freeing the memory when it is not required
+ * anymore, e.g. with g_ptr_array_free(..., true).
+ *
+ * Returns: Pointer to the array that contains the pointers to the names.
+ */
+GPtrArray *qemu_get_nic_models(const char *device_type);
+
 /* NIC info */
 
 #define MAX_NICS 8
index 87ca83b155afc0ff139cafacabb6c0834688f8e9..8dd9fcb071a41835fd6fe783c0b2c0fe734b76ec 100644 (file)
@@ -29,9 +29,6 @@
 #define QERR_DEVICE_NO_HOTPLUG \
     "Device '%s' does not support hotplugging"
 
-#define QERR_FEATURE_DISABLED \
-    "The feature '%s' is not enabled"
-
 #define QERR_INVALID_PARAMETER \
     "Invalid parameter '%s'"
 
@@ -59,9 +56,6 @@
 #define QERR_QGA_COMMAND_FAILED \
     "Guest agent command failed, error was '%s'"
 
-#define QERR_REPLAY_NOT_SUPPORTED \
-    "Record/replay feature is not supported for '%s'"
-
 #define QERR_UNSUPPORTED \
     "this feature or command is not currently supported"
 
index 3cbe52246b81b35e42565bf32e95f1d6db5f62de..b1650daedf9c2c04155c47c99e6d28231a3f1ea1 100644 (file)
@@ -1,10 +1,6 @@
 #ifndef BSWAP_H
 #define BSWAP_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #undef  bswap16
 #define bswap16(_x) __builtin_bswap16(_x)
 #undef  bswap32
@@ -395,8 +391,4 @@ DO_STN_LDN_P(be)
 #undef le_bswaps
 #undef be_bswaps
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* BSWAP_H */
index b9addcc11f7d39c2f18b01ff709e517608eea3c9..6006dfae44c37bab7cce4437206c1e6d8c754b7b 100644 (file)
@@ -1,10 +1,6 @@
 #ifndef ENVLIST_H
 #define ENVLIST_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 typedef struct envlist envlist_t;
 
 envlist_t *envlist_create(void);
@@ -15,8 +11,4 @@ int envlist_parse_set(envlist_t *, const char *);
 int envlist_parse_unset(envlist_t *, const char *);
 char **envlist_to_environ(const envlist_t *, size_t *);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* ENVLIST_H */
index af4e4ab7468ef01f63084b6ff0efc0131f068c27..8136e33674cd05f2a439bb6fd6d1e9a14c4b496e 100644 (file)
@@ -330,7 +330,7 @@ bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t start, int64_t end,
                              int64_t *dirty_start, int64_t *dirty_count);
 
 /*
- * bdrv_dirty_bitmap_status:
+ * hbitmap_status:
  * @hb: The HBitmap to operate on
  * @start: The bit to start from
  * @count: Number of bits to proceed
index b063c6fde81d42997d1039d7579c5178e6fb09f7..313fc414bc2a16a24a67c7090a69a405534137b3 100644 (file)
 #include "qemu/sys_membarrier.h"
 #include "qemu/coroutine-tls.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /*
  * Important !
  *
@@ -196,8 +192,4 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock)
 void rcu_add_force_rcu_notifier(Notifier *n);
 void rcu_remove_force_rcu_notifier(Notifier *n);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* QEMU_RCU_H */
index 0e53ddd5305ec6345eb71e9d7e67f9b6b0b8a24f..4e6298d47307fd8dbcc1d13f3297ea8ae5bcdbcd 100644 (file)
 #include "qemu/queue.h"
 #include "qemu/atomic.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
 /*
  * List access methods.
  */
@@ -311,7 +306,4 @@ extern "C" {
          (var) && ((next) = qatomic_rcu_read(&(var)->field.sle_next), 1); \
          (var) = (next))
 
-#ifdef __cplusplus
-}
-#endif
 #endif /* QEMU_RCU_QUEUE_H */
index 7841084199ce73ee486056bb4bdfedabea8baf09..dd3822d7cee9010fb4e07158ee86c429c30315f2 100644 (file)
@@ -3,6 +3,7 @@
 
 #include "qemu/processor.h"
 #include "qemu/atomic.h"
+#include "qemu/clang-tsa.h"
 
 typedef struct QemuCond QemuCond;
 typedef struct QemuSemaphore QemuSemaphore;
@@ -24,9 +25,12 @@ typedef struct QemuThread QemuThread;
 
 void qemu_mutex_init(QemuMutex *mutex);
 void qemu_mutex_destroy(QemuMutex *mutex);
-int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line);
-void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line);
-void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line);
+int TSA_NO_TSA qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file,
+                                       const int line);
+void TSA_NO_TSA qemu_mutex_lock_impl(QemuMutex *mutex, const char *file,
+                                     const int line);
+void TSA_NO_TSA qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file,
+                                       const int line);
 
 void qemu_rec_mutex_init(QemuRecMutex *mutex);
 void qemu_rec_mutex_destroy(QemuRecMutex *mutex);
@@ -153,8 +157,8 @@ void qemu_cond_destroy(QemuCond *cond);
  */
 void qemu_cond_signal(QemuCond *cond);
 void qemu_cond_broadcast(QemuCond *cond);
-void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex,
-                         const char *file, const int line);
+void TSA_NO_TSA qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex,
+                                    const char *file, const int line);
 bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms,
                               const char *file, const int line);
 
index d201c61260de35bc7b0dac7780d273f932510649..db5218c39ec0579133961636fef61dcd33a70ddf 100644 (file)
 #ifndef QEMU_URI_H
 #define QEMU_URI_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * URI:
  *
@@ -105,7 +101,4 @@ struct QueryParams *query_params_new (int init_alloc);
 extern QueryParams *query_params_parse (const char *query);
 extern void query_params_free (QueryParams *ps);
 
-#ifdef __cplusplus
-}
-#endif
 #endif /* QEMU_URI_H */
index cd43193b809d9a09c1a6c1f8ed3b9435f91d0519..25c72433cab03fc49ba711288a4609f56ceb8a9a 100644 (file)
@@ -15,7 +15,6 @@
 #include "io/channel-socket.h"
 #include "io/channel-file.h"
 #include "io/net-listener.h"
-#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "standard-headers/linux/virtio_blk.h"
 
index 48b620cbef69684308518fce22ebb9f48e9a7b79..69cab17b383f196b9f70d916d12b439f21a8dc61 100644 (file)
@@ -98,18 +98,42 @@ extern "C" {
 #define DRM_FORMAT_INVALID     0
 
 /* color index */
+#define DRM_FORMAT_C1          fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */
+#define DRM_FORMAT_C2          fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */
+#define DRM_FORMAT_C4          fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */
 #define DRM_FORMAT_C8          fourcc_code('C', '8', ' ', ' ') /* [7:0] C */
 
-/* 8 bpp Red */
+/* 1 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D1          fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */
+
+/* 2 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D2          fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */
+
+/* 4 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D4          fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */
+
+/* 8 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D8          fourcc_code('D', '8', ' ', ' ') /* [7:0] D */
+
+/* 1 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R1          fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */
+
+/* 2 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R2          fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */
+
+/* 4 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R4          fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */
+
+/* 8 bpp Red (direct relationship between channel value and brightness) */
 #define DRM_FORMAT_R8          fourcc_code('R', '8', ' ', ' ') /* [7:0] R */
 
-/* 10 bpp Red */
+/* 10 bpp Red (direct relationship between channel value and brightness) */
 #define DRM_FORMAT_R10         fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */
 
-/* 12 bpp Red */
+/* 12 bpp Red (direct relationship between channel value and brightness) */
 #define DRM_FORMAT_R12         fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */
 
-/* 16 bpp Red */
+/* 16 bpp Red (direct relationship between channel value and brightness) */
 #define DRM_FORMAT_R16         fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */
 
 /* 16 bpp RG */
@@ -204,7 +228,9 @@ extern "C" {
 #define DRM_FORMAT_VYUY                fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */
 
 #define DRM_FORMAT_AYUV                fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_AVUY8888    fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */
 #define DRM_FORMAT_XYUV8888    fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_XVUY8888    fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */
 #define DRM_FORMAT_VUY888      fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */
 #define DRM_FORMAT_VUY101010   fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */
 
@@ -717,6 +743,35 @@ extern "C" {
  */
 #define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4)
 
+/*
+ * Vivante TS (tile-status) buffer modifiers. They can be combined with all of
+ * the color buffer tiling modifiers defined above. When TS is present it's a
+ * separate buffer containing the clear/compression status of each tile. The
+ * modifiers are defined as VIVANTE_MOD_TS_c_s, where c is the color buffer
+ * tile size in bytes covered by one entry in the status buffer and s is the
+ * number of status bits per entry.
+ * We reserve the top 8 bits of the Vivante modifier space for tile status
+ * clear/compression modifiers, as future cores might add some more TS layout
+ * variations.
+ */
+#define VIVANTE_MOD_TS_64_4               (1ULL << 48)
+#define VIVANTE_MOD_TS_64_2               (2ULL << 48)
+#define VIVANTE_MOD_TS_128_4              (3ULL << 48)
+#define VIVANTE_MOD_TS_256_4              (4ULL << 48)
+#define VIVANTE_MOD_TS_MASK               (0xfULL << 48)
+
+/*
+ * Vivante compression modifiers. Those depend on a TS modifier being present
+ * as the TS bits get reinterpreted as compression tags instead of simple
+ * clear markers when compression is enabled.
+ */
+#define VIVANTE_MOD_COMP_DEC400           (1ULL << 52)
+#define VIVANTE_MOD_COMP_MASK             (0xfULL << 52)
+
+/* Masking out the extension bits will yield the base modifier. */
+#define VIVANTE_MOD_EXT_MASK              (VIVANTE_MOD_TS_MASK | \
+                                           VIVANTE_MOD_COMP_MASK)
+
 /* NVIDIA frame buffer modifiers */
 
 /*
index 4537da20cc0ae4e6cbd1925facef96f598c135e1..87176ab075d25440770cfb204d90a5ba0c6a1116 100644 (file)
@@ -159,8 +159,10 @@ static inline uint32_t ethtool_cmd_speed(const struct ethtool_cmd *ep)
  *     in its bus driver structure (e.g. pci_driver::name).  Must
  *     not be an empty string.
  * @version: Driver version string; may be an empty string
- * @fw_version: Firmware version string; may be an empty string
- * @erom_version: Expansion ROM version string; may be an empty string
+ * @fw_version: Firmware version string; driver defined; may be an
+ *     empty string
+ * @erom_version: Expansion ROM version string; driver defined; may be
+ *     an empty string
  * @bus_info: Device bus address.  This should match the dev_name()
  *     string for the underlying bus device, if there is one.  May be
  *     an empty string.
@@ -179,10 +181,6 @@ static inline uint32_t ethtool_cmd_speed(const struct ethtool_cmd *ep)
  *
  * Users can use the %ETHTOOL_GSSET_INFO command to get the number of
  * strings in any string set (from Linux 2.6.34).
- *
- * Drivers should set at most @driver, @version, @fw_version and
- * @bus_info in their get_drvinfo() implementation.  The ethtool
- * core fills in the other fields using other driver operations.
  */
 struct ethtool_drvinfo {
        uint32_t        cmd;
@@ -736,6 +734,51 @@ enum ethtool_module_power_mode {
        ETHTOOL_MODULE_POWER_MODE_HIGH,
 };
 
+/**
+ * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE
+ *     functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are
+ *     unknown
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled
+ */
+enum ethtool_podl_pse_admin_state {
+       ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1,
+       ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
+       ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED,
+};
+
+/**
+ * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE.
+ *     IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus:
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is
+ *     asserted true when the PoDL PSE state diagram variable mr_pse_enable is
+ *     false"
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is
+ *     asserted true when either of the PSE state diagram variables
+ *     pi_detecting or pi_classifying is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower”
+ *     is asserted true when the PoDL PSE state diagram variable pi_powered is
+ *     true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted
+ *     true when the PoDL PSE state diagram variable pi_sleeping is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true
+ *     when the logical combination of the PoDL PSE state diagram variables
+ *     pi_prebiased*!pi_sleeping is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted
+ *     true when the PoDL PSE state diagram variable overload_held is true."
+ */
+enum ethtool_podl_pse_pw_d_status {
+       ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1,
+       ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED,
+       ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING,
+       ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING,
+       ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP,
+       ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE,
+       ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR,
+};
+
 /**
  * struct ethtool_gstrings - string set for data tagging
  * @cmd: Command number = %ETHTOOL_GSTRINGS
@@ -1692,6 +1735,13 @@ enum ethtool_link_mode_bit_indices {
        ETHTOOL_LINK_MODE_100baseFX_Half_BIT             = 90,
        ETHTOOL_LINK_MODE_100baseFX_Full_BIT             = 91,
        ETHTOOL_LINK_MODE_10baseT1L_Full_BIT             = 92,
+       ETHTOOL_LINK_MODE_800000baseCR8_Full_BIT         = 93,
+       ETHTOOL_LINK_MODE_800000baseKR8_Full_BIT         = 94,
+       ETHTOOL_LINK_MODE_800000baseDR8_Full_BIT         = 95,
+       ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT       = 96,
+       ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT         = 97,
+       ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT         = 98,
+
        /* must be last entry */
        __ETHTOOL_LINK_MODE_MASK_NBITS
 };
@@ -1803,6 +1853,7 @@ enum ethtool_link_mode_bit_indices {
 #define SPEED_100000           100000
 #define SPEED_200000           200000
 #define SPEED_400000           400000
+#define SPEED_800000           800000
 
 #define SPEED_UNKNOWN          -1
 
@@ -1840,6 +1891,20 @@ static inline int ethtool_validate_duplex(uint8_t duplex)
 #define MASTER_SLAVE_STATE_SLAVE               3
 #define MASTER_SLAVE_STATE_ERR                 4
 
+/* These are used to throttle the rate of data on the phy interface when the
+ * native speed of the interface is higher than the link speed. These should
+ * not be used for phy interfaces which natively support multiple speeds (e.g.
+ * MII or SGMII).
+ */
+/* No rate matching performed. */
+#define RATE_MATCH_NONE                0
+/* The phy sends pause frames to throttle the MAC. */
+#define RATE_MATCH_PAUSE       1
+/* The phy asserts CRS to prevent the MAC from transmitting. */
+#define RATE_MATCH_CRS         2
+/* The MAC is programmed with a sufficiently-large IPG. */
+#define RATE_MATCH_OPEN_LOOP   3
+
 /* Which connector port. */
 #define PORT_TP                        0x00
 #define PORT_AUI               0x01
@@ -2033,8 +2098,8 @@ enum ethtool_reset_flags {
  *     reported consistently by PHYLIB.  Read-only.
  * @master_slave_cfg: Master/slave port mode.
  * @master_slave_state: Master/slave port state.
+ * @rate_matching: Rate adaptation performed by the PHY
  * @reserved: Reserved for future use; see the note on reserved space.
- * @reserved1: Reserved for future use; see the note on reserved space.
  * @link_mode_masks: Variable length bitmaps.
  *
  * If autonegotiation is disabled, the speed and @duplex represent the
@@ -2085,7 +2150,7 @@ struct ethtool_link_settings {
        uint8_t transceiver;
        uint8_t master_slave_cfg;
        uint8_t master_slave_state;
-       uint8_t reserved1[1];
+       uint8_t rate_matching;
        uint32_t        reserved[7];
        uint32_t        link_mode_masks[];
        /* layout of link_mode_masks fields:
index bda06258be57824154067d19c33a64c4c25b1284..a1af78d9894ea0e610855d1c2b4359053757c1c1 100644 (file)
  *  - add FUSE_SECURITY_CTX init flag
  *  - add security context to create, mkdir, symlink, and mknod requests
  *  - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX
+ *
+ *  7.37
+ *  - add FUSE_TMPFILE
+ *
+ *  7.38
+ *  - add FUSE_EXPIRE_ONLY flag to fuse_notify_inval_entry
+ *  - add FOPEN_PARALLEL_DIRECT_WRITES
  */
 
 #ifndef _LINUX_FUSE_H
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 36
+#define FUSE_KERNEL_MINOR_VERSION 38
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -297,6 +304,7 @@ struct fuse_file_lock {
  * FOPEN_CACHE_DIR: allow caching this directory
  * FOPEN_STREAM: the file is stream-like (no file position at all)
  * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE)
+ * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode
  */
 #define FOPEN_DIRECT_IO                (1 << 0)
 #define FOPEN_KEEP_CACHE       (1 << 1)
@@ -304,6 +312,7 @@ struct fuse_file_lock {
 #define FOPEN_CACHE_DIR                (1 << 3)
 #define FOPEN_STREAM           (1 << 4)
 #define FOPEN_NOFLUSH          (1 << 5)
+#define FOPEN_PARALLEL_DIRECT_WRITES   (1 << 6)
 
 /**
  * INIT request/reply flags
@@ -484,6 +493,12 @@ struct fuse_file_lock {
  */
 #define FUSE_SETXATTR_ACL_KILL_SGID    (1 << 0)
 
+/**
+ * notify_inval_entry flags
+ * FUSE_EXPIRE_ONLY
+ */
+#define FUSE_EXPIRE_ONLY               (1 << 0)
+
 enum fuse_opcode {
        FUSE_LOOKUP             = 1,
        FUSE_FORGET             = 2,  /* no reply */
@@ -533,6 +548,7 @@ enum fuse_opcode {
        FUSE_SETUPMAPPING       = 48,
        FUSE_REMOVEMAPPING      = 49,
        FUSE_SYNCFS             = 50,
+       FUSE_TMPFILE            = 51,
 
        /* CUSE specific operations */
        CUSE_INIT               = 4096,
@@ -911,7 +927,7 @@ struct fuse_notify_inval_inode_out {
 struct fuse_notify_inval_entry_out {
        uint64_t        parent;
        uint32_t        namelen;
-       uint32_t        padding;
+       uint32_t        flags;
 };
 
 struct fuse_notify_delete_out {
index 50790aee5ac7746e69f8d5e0a8b14dc51e88a176..f6bab08540d81247024938067e84306dc1c00cde 100644 (file)
 #define KEY_KBD_LAYOUT_NEXT    0x248   /* AC Next Keyboard Layout Select */
 #define KEY_EMOJI_PICKER       0x249   /* Show/hide emoji picker (HUTRR101) */
 #define KEY_DICTATE            0x24a   /* Start or Stop Voice Dictation Session (HUTRR99) */
+#define KEY_CAMERA_ACCESS_ENABLE       0x24b   /* Enables programmatic access to camera devices. (HUTRR72) */
+#define KEY_CAMERA_ACCESS_DISABLE      0x24c   /* Disables programmatic access to camera devices. (HUTRR72) */
+#define KEY_CAMERA_ACCESS_TOGGLE       0x24d   /* Toggles the current state of the camera access control. (HUTRR72) */
 
 #define KEY_BRIGHTNESS_MIN             0x250   /* Set Brightness to Minimum */
 #define KEY_BRIGHTNESS_MAX             0x251   /* Set Brightness to Maximum */
 #define ABS_TOOL_WIDTH         0x1c
 
 #define ABS_VOLUME             0x20
+#define ABS_PROFILE            0x21
 
 #define ABS_MISC               0x28
 
index 57b8e2ffb1dd37fc99a356588d34f782e053bafe..85ab1278811e1495d1ee007dfcb0ef860a779d4c 100644 (file)
 /* Precision Time Measurement */
 #define PCI_PTM_CAP                    0x04        /* PTM Capability */
 #define  PCI_PTM_CAP_REQ               0x00000001  /* Requester capable */
+#define  PCI_PTM_CAP_RES               0x00000002  /* Responder capable */
 #define  PCI_PTM_CAP_ROOT              0x00000004  /* Root capable */
 #define  PCI_PTM_GRANULARITY_MASK      0x0000FF00  /* Clock granularity */
 #define PCI_PTM_CTRL                   0x08        /* PTM Control */
 #define  PCI_DOE_STATUS_DATA_OBJECT_READY      0x80000000  /* Data Object Ready */
 #define PCI_DOE_WRITE          0x10    /* DOE Write Data Mailbox Register */
 #define PCI_DOE_READ           0x14    /* DOE Read Data Mailbox Register */
+#define PCI_DOE_CAP_SIZEOF     0x18    /* Size of DOE register block */
 
 /* DOE Data Object - note not actually registers */
 #define PCI_DOE_DATA_OBJECT_HEADER_1_VID               0x0000ffff
index 2dcc90826ae7d30ccc7169355b43800c21551214..e81715cd70c37c06472ca4b1965b2181f430078a 100644 (file)
@@ -40,6 +40,7 @@
 #define VIRTIO_BLK_F_MQ                12      /* support more than one vq */
 #define VIRTIO_BLK_F_DISCARD   13      /* DISCARD is supported */
 #define VIRTIO_BLK_F_WRITE_ZEROES      14      /* WRITE ZEROES is supported */
+#define VIRTIO_BLK_F_SECURE_ERASE      16 /* Secure Erase is supported */
 
 /* Legacy feature bits */
 #ifndef VIRTIO_BLK_NO_LEGACY
@@ -119,6 +120,21 @@ struct virtio_blk_config {
        uint8_t write_zeroes_may_unmap;
 
        uint8_t unused1[3];
+
+       /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */
+       /*
+        * The maximum secure erase sectors (in 512-byte sectors) for
+        * one segment.
+        */
+       __virtio32 max_secure_erase_sectors;
+       /*
+        * The maximum number of secure erase segments in a
+        * secure erase command.
+        */
+       __virtio32 max_secure_erase_seg;
+       /* Secure erase commands must be aligned to this number of sectors. */
+       __virtio32 secure_erase_sector_alignment;
+
 } QEMU_PACKED;
 
 /*
@@ -153,6 +169,9 @@ struct virtio_blk_config {
 /* Write zeroes command */
 #define VIRTIO_BLK_T_WRITE_ZEROES      13
 
+/* Secure erase command */
+#define VIRTIO_BLK_T_SECURE_ERASE      14
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER   0x80000000
index 245e1eff4b9ddc48fcd9d9860d864beb50149ea6..a11ecc3f92df1adfec255979da3b166057a8a3bf 100644 (file)
@@ -9,6 +9,7 @@
 #define VIRTIO_BT_F_VND_HCI    0       /* Indicates vendor command support */
 #define VIRTIO_BT_F_MSFT_EXT   1       /* Indicates MSFT vendor support */
 #define VIRTIO_BT_F_AOSP_EXT   2       /* Indicates AOSP vendor support */
+#define VIRTIO_BT_F_CONFIG_V2  3       /* Use second version configuration */
 
 enum virtio_bt_config_type {
        VIRTIO_BT_CONFIG_TYPE_PRIMARY   = 0,
@@ -28,4 +29,11 @@ struct virtio_bt_config {
        uint16_t msft_opcode;
 } QEMU_PACKED;
 
+struct virtio_bt_config_v2 {
+       uint8_t  type;
+       uint8_t  alignment;
+       uint16_t vendor;
+       uint16_t msft_opcode;
+};
+
 #endif /* _LINUX_VIRTIO_BT_H */
index 42c68caf719d9941234604354a32028a82c8c12b..c0e797067aae3664357c8664c152d8c73a692490 100644 (file)
@@ -57,6 +57,9 @@
                                         * Steering */
 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23  /* Set MAC address */
 #define VIRTIO_NET_F_NOTF_COAL 53      /* Device supports notifications coalescing */
+#define VIRTIO_NET_F_GUEST_USO4        54      /* Guest can handle USOv4 in. */
+#define VIRTIO_NET_F_GUEST_USO6        55      /* Guest can handle USOv6 in. */
+#define VIRTIO_NET_F_HOST_USO  56      /* Host can handle USO in. */
 #define VIRTIO_NET_F_HASH_REPORT  57   /* Supports hash report */
 #define VIRTIO_NET_F_RSS         60    /* Supports RSS RX steering */
 #define VIRTIO_NET_F_RSC_EXT     61    /* extended coalescing info */
@@ -130,6 +133,7 @@ struct virtio_net_hdr_v1 {
 #define VIRTIO_NET_HDR_GSO_TCPV4       1       /* GSO frame, IPv4 TCP (TSO) */
 #define VIRTIO_NET_HDR_GSO_UDP         3       /* GSO frame, IPv4 UDP (UFO) */
 #define VIRTIO_NET_HDR_GSO_TCPV6       4       /* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_UDP_L4      5       /* GSO frame, IPv4& IPv6 UDP (USO) */
 #define VIRTIO_NET_HDR_GSO_ECN         0x80    /* TCP has ECN set */
        uint8_t gso_type;
        __virtio16 hdr_len;     /* Ethernet + IP + tcp/udp hdrs */
index 6858e39cb65f6f2a720bd5d2b82f4fbade1bfca1..2b6d27db7c721a6744814a1a7740e47efbbda760 100644 (file)
  */
 
 BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
-BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
-                              uint64_t shared_perm, Error **errp);
-BlockBackend *blk_new_open(const char *filename, const char *reference,
-                           QDict *options, int flags, Error **errp);
+
+BlockBackend * no_coroutine_fn
+blk_new_with_bs(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm,
+                Error **errp);
+
+BlockBackend * coroutine_fn no_co_wrapper
+blk_co_new_with_bs(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm,
+                   Error **errp);
+
+BlockBackend * no_coroutine_fn
+blk_new_open(const char *filename, const char *reference, QDict *options,
+             int flags, Error **errp);
+
+BlockBackend * coroutine_fn no_co_wrapper
+blk_co_new_open(const char *filename, const char *reference, QDict *options,
+                int flags, Error **errp);
+
 int blk_get_refcnt(BlockBackend *blk);
 void blk_ref(BlockBackend *blk);
 void blk_unref(BlockBackend *blk);
index b1196ab93c6b6e507308c383080880cc5c39ec13..40ab1787192f3282b7aebc6d39ce51586be1c25e 100644 (file)
@@ -55,10 +55,11 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
 void blk_inc_in_flight(BlockBackend *blk);
 void blk_dec_in_flight(BlockBackend *blk);
 
-bool coroutine_fn blk_co_is_inserted(BlockBackend *blk);
-bool co_wrapper_mixed blk_is_inserted(BlockBackend *blk);
+bool coroutine_fn GRAPH_RDLOCK blk_co_is_inserted(BlockBackend *blk);
+bool co_wrapper_mixed_bdrv_rdlock blk_is_inserted(BlockBackend *blk);
 
-bool blk_is_available(BlockBackend *blk);
+bool coroutine_fn GRAPH_RDLOCK blk_co_is_available(BlockBackend *blk);
+bool co_wrapper_mixed_bdrv_rdlock blk_is_available(BlockBackend *blk);
 
 void coroutine_fn blk_co_lock_medium(BlockBackend *blk, bool locked);
 void co_wrapper blk_lock_medium(BlockBackend *blk, bool locked);
index 5b38c7bd0451ae9d9c3a058a2b30762d52bc89da..97d0243aeeffdf04aa2d2c33d05f6e9552d46233 100644 (file)
@@ -51,14 +51,34 @@ typedef struct sockaddr_un {
 extern "C" {
 #endif
 
-#if defined(_WIN64)
-/* On w64, setjmp is implemented by _setjmp which needs a second parameter.
+#if defined(__aarch64__)
+/*
+ * On windows-arm64, setjmp is available in only one variant, and longjmp always
+ * does stack unwinding. This crash with generated code.
+ * Thus, we use another implementation of setjmp (not windows one), coming from
+ * mingw, which never performs stack unwinding.
+ */
+#undef setjmp
+#undef longjmp
+/*
+ * These functions are not declared in setjmp.h because __aarch64__ defines
+ * setjmp to _setjmpex instead. However, they are still defined in libmingwex.a,
+ * which gets linked automatically.
+ */
+extern int __mingw_setjmp(jmp_buf);
+extern void __attribute__((noreturn)) __mingw_longjmp(jmp_buf, int);
+#define setjmp(env) __mingw_setjmp(env)
+#define longjmp(env, val) __mingw_longjmp(env, val)
+#elif defined(_WIN64)
+/*
+ * On windows-x64, setjmp is implemented by _setjmp which needs a second parameter.
  * If this parameter is NULL, longjump does no stack unwinding.
  * That is what we need for QEMU. Passing the value of register rsp (default)
- * lets longjmp try a stack unwinding which will crash with generated code. */
+ * lets longjmp try a stack unwinding which will crash with generated code.
+ */
 # undef setjmp
 # define setjmp(env) _setjmp(env, NULL)
-#endif
+#endif /* __aarch64__ */
 /* QEMU uses sigsetjmp()/siglongjmp() as the portable way to specify
  * "longjmp and don't touch the signal masks". Since we know that the
  * savemask parameter will always be zero we can safely define these
index 7ec0882b50b80fdb6413e3e569f29c4b20344bf1..6e5ab09f71ce00ca891d3911f2356aa920f5383a 100644 (file)
@@ -72,7 +72,7 @@ void replay_start(void);
 /*! Closes replay log file and frees other resources. */
 void replay_finish(void);
 /*! Adds replay blocker with the specified error description */
-void replay_add_blocker(Error *reason);
+void replay_add_blocker(const char *feature);
 /* Returns name of the replay log file */
 const char *replay_get_filename(void);
 /*
index 8e6cf782a11988709ea995ee2a5dbdb55dbc99ad..1cb53acc33088db07452ca6bb31306cc94e5fc00 100644 (file)
@@ -4,7 +4,6 @@
 #include "ui/qemu-pixman.h"
 #include "qom/object.h"
 #include "qemu/notify.h"
-#include "qemu/error-report.h"
 #include "qapi/qapi-types-ui.h"
 
 #ifdef CONFIG_OPENGL
index c730cb8ec568d55e9dea5f86b05b0610316f167a..8052945ba02528fde40d05820d703feace91faf4 100644 (file)
@@ -389,12 +389,76 @@ static void qio_channel_tls_set_aio_fd_handler(QIOChannel *ioc,
     qio_channel_set_aio_fd_handler(tioc->master, ctx, io_read, io_write, opaque);
 }
 
+typedef struct QIOChannelTLSSource QIOChannelTLSSource;
+struct QIOChannelTLSSource {
+    GSource parent;
+    QIOChannelTLS *tioc;
+};
+
+static gboolean
+qio_channel_tls_source_check(GSource *source)
+{
+    QIOChannelTLSSource *tsource = (QIOChannelTLSSource *)source;
+
+    return qcrypto_tls_session_check_pending(tsource->tioc->session) > 0;
+}
+
+static gboolean
+qio_channel_tls_source_prepare(GSource *source, gint *timeout)
+{
+    *timeout = -1;
+    return qio_channel_tls_source_check(source);
+}
+
+static gboolean
+qio_channel_tls_source_dispatch(GSource *source, GSourceFunc callback,
+                                gpointer user_data)
+{
+    return G_SOURCE_CONTINUE;
+}
+
+static void
+qio_channel_tls_source_finalize(GSource *source)
+{
+    QIOChannelTLSSource *tsource = (QIOChannelTLSSource *)source;
+
+    object_unref(OBJECT(tsource->tioc));
+}
+
+static GSourceFuncs qio_channel_tls_source_funcs = {
+    qio_channel_tls_source_prepare,
+    qio_channel_tls_source_check,
+    qio_channel_tls_source_dispatch,
+    qio_channel_tls_source_finalize
+};
+
+static void
+qio_channel_tls_read_watch(QIOChannelTLS *tioc, GSource *source)
+{
+    GSource *child;
+    QIOChannelTLSSource *tlssource;
+
+    child = g_source_new(&qio_channel_tls_source_funcs,
+                          sizeof(QIOChannelTLSSource));
+    tlssource = (QIOChannelTLSSource *)child;
+
+    tlssource->tioc = tioc;
+    object_ref(OBJECT(tioc));
+
+    g_source_add_child_source(source, child);
+}
+
 static GSource *qio_channel_tls_create_watch(QIOChannel *ioc,
                                              GIOCondition condition)
 {
     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
+    GSource *source = qio_channel_create_watch(tioc->master, condition);
+
+    if (condition & G_IO_IN) {
+        qio_channel_tls_read_watch(tioc, source);
+    }
 
-    return qio_channel_create_watch(tioc->master, condition);
+    return source;
 }
 
 QCryptoTLSSession *
index 4bf2d7246e5cff245f142a8be40378e416f9610c..a7cfefb3a809cfe5aa7d7c36e296609e32a0c30d 100644 (file)
@@ -43,6 +43,7 @@
 #define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
 
 #define KVM_REG_SIZE(id)                                               \
        (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
index 4f3d5aaa11f531164beab5a47bed8478e5f17546..de687009bfe5394f139b57f05562eb327e0101af 100644 (file)
 #define HUGETLB_FLAG_ENCODE_SHIFT      26
 #define HUGETLB_FLAG_ENCODE_MASK       0x3f
 
-#define HUGETLB_FLAG_ENCODE_16KB       (14 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_64KB       (16 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_512KB      (19 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_1MB                (20 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_2MB                (21 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_8MB                (23 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_16MB       (24 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_32MB       (25 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_256MB      (28 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_512MB      (29 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_1GB                (30 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_2GB                (31 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_16GB       (34 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16KB       (14U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_64KB       (16U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512KB      (19U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1MB                (20U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2MB                (21U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_8MB                (23U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16MB       (24U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_32MB       (25U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_256MB      (28U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512MB      (29U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1GB                (30U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2GB                (31U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16GB       (34U << HUGETLB_FLAG_ENCODE_SHIFT)
 
 #endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */
index 6c1aa92a92e4411946335cbb9724b75d8efa987a..6ce1f1ceb432c64599f706b86e74a12581c2a54e 100644 (file)
@@ -77,6 +77,8 @@
 
 #define MADV_DONTNEED_LOCKED   24      /* like DONTNEED, but drop locked pages too */
 
+#define MADV_COLLAPSE  25              /* Synchronous hugepage collapse */
+
 /* compatibility flags */
 #define MAP_FILE       0
 
index 1be428663c102701e26a1516bb6ca8eb0a083a75..c6e1fc77c9968874feefc79b7c94a165d0ad89d2 100644 (file)
 
 #define MADV_DONTNEED_LOCKED   24      /* like DONTNEED, but drop locked pages too */
 
+#define MADV_COLLAPSE  25              /* Synchronous hugepage collapse */
+
 /* compatibility flags */
 #define MAP_FILE       0
 
index 7351417afd62e32c69eaa6ec9bc57675815ad322..92af6f3f057cf7d33031343ee09e01588b7aef17 100644 (file)
@@ -48,6 +48,10 @@ struct kvm_sregs {
 /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
 struct kvm_riscv_config {
        unsigned long isa;
+       unsigned long zicbom_block_size;
+       unsigned long mvendorid;
+       unsigned long marchid;
+       unsigned long mimpid;
 };
 
 /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
@@ -98,6 +102,9 @@ enum KVM_RISCV_ISA_EXT_ID {
        KVM_RISCV_ISA_EXT_M,
        KVM_RISCV_ISA_EXT_SVPBMT,
        KVM_RISCV_ISA_EXT_SSTC,
+       KVM_RISCV_ISA_EXT_SVINVAL,
+       KVM_RISCV_ISA_EXT_ZIHINTPAUSE,
+       KVM_RISCV_ISA_EXT_ZICBOM,
        KVM_RISCV_ISA_EXT_MAX,
 };
 
index 46de10a809ecbd81aa30d28afd78ebcc266e97a5..2747d2ce14fb444c853506019b7bf989e8eb9e1b 100644 (file)
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
 
-struct kvm_memory_alias {
-       __u32 slot;  /* this has a different namespace than memory slots */
-       __u32 flags;
-       __u64 guest_phys_addr;
-       __u64 memory_size;
-       __u64 target_phys_addr;
-};
-
 /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
 struct kvm_pic_state {
        __u8 last_irr;  /* edge detection */
@@ -214,6 +206,8 @@ struct kvm_msr_list {
 struct kvm_msr_filter_range {
 #define KVM_MSR_FILTER_READ  (1 << 0)
 #define KVM_MSR_FILTER_WRITE (1 << 1)
+#define KVM_MSR_FILTER_RANGE_VALID_MASK (KVM_MSR_FILTER_READ | \
+                                        KVM_MSR_FILTER_WRITE)
        __u32 flags;
        __u32 nmsrs; /* number of msrs in bitmap */
        __u32 base;  /* MSR index the bitmap starts at */
@@ -224,6 +218,7 @@ struct kvm_msr_filter_range {
 struct kvm_msr_filter {
 #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
 #define KVM_MSR_FILTER_DEFAULT_DENY  (1 << 0)
+#define KVM_MSR_FILTER_VALID_MASK (KVM_MSR_FILTER_DEFAULT_DENY)
        __u32 flags;
        struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
 };
index ebdafa576d6ff19fd19226d140ee4dd7acc1dc94..1e2c16cfe3039eaccfd68303118c383a47e63a05 100644 (file)
@@ -86,14 +86,6 @@ struct kvm_debug_guest {
 /* *** End of deprecated interfaces *** */
 
 
-/* for KVM_CREATE_MEMORY_REGION */
-struct kvm_memory_region {
-       __u32 slot;
-       __u32 flags;
-       __u64 guest_phys_addr;
-       __u64 memory_size; /* bytes */
-};
-
 /* for KVM_SET_USER_MEMORY_REGION */
 struct kvm_userspace_memory_region {
        __u32 slot;
@@ -104,9 +96,9 @@ struct kvm_userspace_memory_region {
 };
 
 /*
- * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
- * other bits are reserved for kvm internal use which are defined in
- * include/linux/kvm_host.h.
+ * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
+ * userspace, other bits are reserved for kvm internal use which are defined
+ * in include/linux/kvm_host.h.
  */
 #define KVM_MEM_LOG_DIRTY_PAGES        (1UL << 0)
 #define KVM_MEM_READONLY       (1UL << 1)
@@ -483,6 +475,9 @@ struct kvm_run {
 #define KVM_MSR_EXIT_REASON_INVAL      (1 << 0)
 #define KVM_MSR_EXIT_REASON_UNKNOWN    (1 << 1)
 #define KVM_MSR_EXIT_REASON_FILTER     (1 << 2)
+#define KVM_MSR_EXIT_REASON_VALID_MASK (KVM_MSR_EXIT_REASON_INVAL   |  \
+                                        KVM_MSR_EXIT_REASON_UNKNOWN |  \
+                                        KVM_MSR_EXIT_REASON_FILTER)
                        __u32 reason; /* kernel -> user */
                        __u32 index; /* kernel -> user */
                        __u64 data; /* kernel <-> user */
@@ -1175,6 +1170,9 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220
 #define KVM_CAP_S390_ZPCI_OP 221
 #define KVM_CAP_S390_CPU_TOPOLOGY 222
+#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
+#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
+#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1264,6 +1262,7 @@ struct kvm_x86_mce {
 #define KVM_XEN_HVM_CONFIG_RUNSTATE            (1 << 3)
 #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL       (1 << 4)
 #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND         (1 << 5)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG        (1 << 6)
 
 struct kvm_xen_hvm_config {
        __u32 flags;
@@ -1434,18 +1433,12 @@ struct kvm_vfio_spapr_tce {
        __s32   tablefd;
 };
 
-/*
- * ioctls for VM fds
- */
-#define KVM_SET_MEMORY_REGION     _IOW(KVMIO,  0x40, struct kvm_memory_region)
 /*
  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
  * a vcpu fd.
  */
 #define KVM_CREATE_VCPU           _IO(KVMIO,   0x41)
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO,  0x42, struct kvm_dirty_log)
-/* KVM_SET_MEMORY_ALIAS is obsolete: */
-#define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO,  0x43, struct kvm_memory_alias)
 #define KVM_SET_NR_MMU_PAGES      _IO(KVMIO,   0x44)
 #define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)
 #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
@@ -1737,6 +1730,8 @@ enum pv_cmd_id {
        KVM_PV_UNSHARE_ALL,
        KVM_PV_INFO,
        KVM_PV_DUMP,
+       KVM_PV_ASYNC_CLEANUP_PREPARE,
+       KVM_PV_ASYNC_CLEANUP_PERFORM,
 };
 
 struct kvm_pv_cmd {
@@ -1767,8 +1762,10 @@ struct kvm_xen_hvm_attr {
        union {
                __u8 long_mode;
                __u8 vector;
+               __u8 runstate_update_flag;
                struct {
                        __u64 gfn;
+#define KVM_XEN_INVALID_GFN ((__u64)-1)
                } shared_info;
                struct {
                        __u32 send_port;
@@ -1800,6 +1797,7 @@ struct kvm_xen_hvm_attr {
        } u;
 };
 
+
 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
 #define KVM_XEN_ATTR_TYPE_LONG_MODE            0x0
 #define KVM_XEN_ATTR_TYPE_SHARED_INFO          0x1
@@ -1807,6 +1805,8 @@ struct kvm_xen_hvm_attr {
 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
 #define KVM_XEN_ATTR_TYPE_EVTCHN               0x3
 #define KVM_XEN_ATTR_TYPE_XEN_VERSION          0x4
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
+#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
 
 /* Per-vCPU Xen attributes */
 #define KVM_XEN_VCPU_GET_ATTR  _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
@@ -1823,6 +1823,7 @@ struct kvm_xen_vcpu_attr {
        __u16 pad[3];
        union {
                __u64 gpa;
+#define KVM_XEN_INVALID_GPA ((__u64)-1)
                __u64 pad[8];
                struct {
                        __u64 state;
index 213b2a0f70b5384abd1c3fdfedccf37e80f5f142..74f3cb5007c94372c6e54ff82a7fb12b8a60ced7 100644 (file)
 #define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU      PSCI_0_2_FN64(7)
 
 #define PSCI_1_0_FN_PSCI_FEATURES              PSCI_0_2_FN(10)
+#define PSCI_1_0_FN_CPU_FREEZE                 PSCI_0_2_FN(11)
+#define PSCI_1_0_FN_CPU_DEFAULT_SUSPEND                PSCI_0_2_FN(12)
+#define PSCI_1_0_FN_NODE_HW_STATE              PSCI_0_2_FN(13)
 #define PSCI_1_0_FN_SYSTEM_SUSPEND             PSCI_0_2_FN(14)
 #define PSCI_1_0_FN_SET_SUSPEND_MODE           PSCI_0_2_FN(15)
+#define PSCI_1_0_FN_STAT_RESIDENCY             PSCI_0_2_FN(16)
+#define PSCI_1_0_FN_STAT_COUNT                 PSCI_0_2_FN(17)
+
 #define PSCI_1_1_FN_SYSTEM_RESET2              PSCI_0_2_FN(18)
+#define PSCI_1_1_FN_MEM_PROTECT                        PSCI_0_2_FN(19)
+#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE    PSCI_0_2_FN(20)
 
+#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND      PSCI_0_2_FN64(12)
+#define PSCI_1_0_FN64_NODE_HW_STATE            PSCI_0_2_FN64(13)
 #define PSCI_1_0_FN64_SYSTEM_SUSPEND           PSCI_0_2_FN64(14)
+#define PSCI_1_0_FN64_STAT_RESIDENCY           PSCI_0_2_FN64(16)
+#define PSCI_1_0_FN64_STAT_COUNT               PSCI_0_2_FN64(17)
+
 #define PSCI_1_1_FN64_SYSTEM_RESET2            PSCI_0_2_FN64(18)
+#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE  PSCI_0_2_FN64(20)
 
 /* PSCI v0.2 power state encoding for CPU_SUSPEND function */
 #define PSCI_0_2_POWER_STATE_ID_MASK           0xffff
index a3a377cd449137a370da582b44ed91c32fc711f9..ba5d0df52fb8c1074a7eb3d602690b873abc9600 100644 (file)
 
 #include <linux/types.h>
 
+/* ioctls for /dev/userfaultfd */
+#define USERFAULTFD_IOC 0xAA
+#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00)
+
 /*
  * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
  * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR.  In
index ede44b5572530818a55103ff61492fb43562319c..c59692ce0bcebe3670ad67b789c4b9a38d347693 100644 (file)
@@ -819,12 +819,20 @@ struct vfio_device_feature {
  * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P
  * is supported in addition to the STOP_COPY states.
  *
+ * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY means that
+ * PRE_COPY is supported in addition to the STOP_COPY states.
+ *
+ * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY
+ * means that RUNNING_P2P, PRE_COPY and PRE_COPY_P2P are supported
+ * in addition to the STOP_COPY states.
+ *
  * Other combinations of flags have behavior to be defined in the future.
  */
 struct vfio_device_feature_migration {
        __aligned_u64 flags;
 #define VFIO_MIGRATION_STOP_COPY       (1 << 0)
 #define VFIO_MIGRATION_P2P             (1 << 1)
+#define VFIO_MIGRATION_PRE_COPY                (1 << 2)
 };
 #define VFIO_DEVICE_FEATURE_MIGRATION 1
 
@@ -875,8 +883,13 @@ struct vfio_device_feature_mig_state {
  *  RESUMING - The device is stopped and is loading a new internal state
  *  ERROR - The device has failed and must be reset
  *
- * And 1 optional state to support VFIO_MIGRATION_P2P:
+ * And optional states to support VFIO_MIGRATION_P2P:
  *  RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA
+ * And VFIO_MIGRATION_PRE_COPY:
+ *  PRE_COPY - The device is running normally but tracking internal state
+ *             changes
+ * And VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY:
+ *  PRE_COPY_P2P - PRE_COPY, except the device cannot do peer to peer DMA
  *
  * The FSM takes actions on the arcs between FSM states. The driver implements
  * the following behavior for the FSM arcs:
@@ -908,20 +921,48 @@ struct vfio_device_feature_mig_state {
  *
  *   To abort a RESUMING session the device must be reset.
  *
+ * PRE_COPY -> RUNNING
  * RUNNING_P2P -> RUNNING
  *   While in RUNNING the device is fully operational, the device may generate
  *   interrupts, DMA, respond to MMIO, all vfio device regions are functional,
  *   and the device may advance its internal state.
  *
+ *   The PRE_COPY arc will terminate a data transfer session.
+ *
+ * PRE_COPY_P2P -> RUNNING_P2P
  * RUNNING -> RUNNING_P2P
  * STOP -> RUNNING_P2P
  *   While in RUNNING_P2P the device is partially running in the P2P quiescent
  *   state defined below.
  *
+ *   The PRE_COPY_P2P arc will terminate a data transfer session.
+ *
+ * RUNNING -> PRE_COPY
+ * RUNNING_P2P -> PRE_COPY_P2P
  * STOP -> STOP_COPY
- *   This arc begin the process of saving the device state and will return a
- *   new data_fd.
+ *   PRE_COPY, PRE_COPY_P2P and STOP_COPY form the "saving group" of states
+ *   which share a data transfer session. Moving between these states alters
+ *   what is streamed in session, but does not terminate or otherwise affect
+ *   the associated fd.
+ *
+ *   These arcs begin the process of saving the device state and will return a
+ *   new data_fd. The migration driver may perform actions such as enabling
+ *   dirty logging of device state when entering PRE_COPY or PER_COPY_P2P.
+ *
+ *   Each arc does not change the device operation, the device remains
+ *   RUNNING, P2P quiesced or in STOP. The STOP_COPY state is described below
+ *   in PRE_COPY_P2P -> STOP_COPY.
+ *
+ * PRE_COPY -> PRE_COPY_P2P
+ *   Entering PRE_COPY_P2P continues all the behaviors of PRE_COPY above.
+ *   However, while in the PRE_COPY_P2P state, the device is partially running
+ *   in the P2P quiescent state defined below, like RUNNING_P2P.
+ *
+ * PRE_COPY_P2P -> PRE_COPY
+ *   This arc allows returning the device to a full RUNNING behavior while
+ *   continuing all the behaviors of PRE_COPY.
  *
+ * PRE_COPY_P2P -> STOP_COPY
  *   While in the STOP_COPY state the device has the same behavior as STOP
  *   with the addition that the data transfers session continues to stream the
  *   migration state. End of stream on the FD indicates the entire device
@@ -939,6 +980,13 @@ struct vfio_device_feature_mig_state {
  *   device state for this arc if required to prepare the device to receive the
  *   migration data.
  *
+ * STOP_COPY -> PRE_COPY
+ * STOP_COPY -> PRE_COPY_P2P
+ *   These arcs are not permitted and return error if requested. Future
+ *   revisions of this API may define behaviors for these arcs, in this case
+ *   support will be discoverable by a new flag in
+ *   VFIO_DEVICE_FEATURE_MIGRATION.
+ *
  * any -> ERROR
  *   ERROR cannot be specified as a device state, however any transition request
  *   can be failed with an errno return and may then move the device_state into
@@ -950,7 +998,7 @@ struct vfio_device_feature_mig_state {
  * The optional peer to peer (P2P) quiescent state is intended to be a quiescent
  * state for the device for the purposes of managing multiple devices within a
  * user context where peer-to-peer DMA between devices may be active. The
- * RUNNING_P2P states must prevent the device from initiating
+ * RUNNING_P2P and PRE_COPY_P2P states must prevent the device from initiating
  * any new P2P DMA transactions. If the device can identify P2P transactions
  * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration
  * driver must complete any such outstanding operations prior to completing the
@@ -963,6 +1011,8 @@ struct vfio_device_feature_mig_state {
  * above FSM arcs. As there are multiple paths through the FSM arcs the path
  * should be selected based on the following rules:
  *   - Select the shortest path.
+ *   - The path cannot have saving group states as interior arcs, only
+ *     starting/end states.
  * Refer to vfio_mig_get_next_state() for the result of the algorithm.
  *
  * The automatic transit through the FSM arcs that make up the combination
@@ -976,6 +1026,9 @@ struct vfio_device_feature_mig_state {
  * support them. The user can discover if these states are supported by using
  * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can
  * avoid knowing about these optional states if the kernel driver supports them.
+ *
+ * Arcs touching PRE_COPY and PRE_COPY_P2P are removed if support for PRE_COPY
+ * is not present.
  */
 enum vfio_device_mig_state {
        VFIO_DEVICE_STATE_ERROR = 0,
@@ -984,8 +1037,225 @@ enum vfio_device_mig_state {
        VFIO_DEVICE_STATE_STOP_COPY = 3,
        VFIO_DEVICE_STATE_RESUMING = 4,
        VFIO_DEVICE_STATE_RUNNING_P2P = 5,
+       VFIO_DEVICE_STATE_PRE_COPY = 6,
+       VFIO_DEVICE_STATE_PRE_COPY_P2P = 7,
+};
+
+/**
+ * VFIO_MIG_GET_PRECOPY_INFO - _IO(VFIO_TYPE, VFIO_BASE + 21)
+ *
+ * This ioctl is used on the migration data FD in the precopy phase of the
+ * migration data transfer. It returns an estimate of the current data sizes
+ * remaining to be transferred. It allows the user to judge when it is
+ * appropriate to leave PRE_COPY for STOP_COPY.
+ *
+ * This ioctl is valid only in PRE_COPY states and kernel driver should
+ * return -EINVAL from any other migration state.
+ *
+ * The vfio_precopy_info data structure returned by this ioctl provides
+ * estimates of data available from the device during the PRE_COPY states.
+ * This estimate is split into two categories, initial_bytes and
+ * dirty_bytes.
+ *
+ * The initial_bytes field indicates the amount of initial precopy
+ * data available from the device. This field should have a non-zero initial
+ * value and decrease as migration data is read from the device.
+ * It is recommended to leave PRE_COPY for STOP_COPY only after this field
+ * reaches zero. Leaving PRE_COPY earlier might make things slower.
+ *
+ * The dirty_bytes field tracks device state changes relative to data
+ * previously retrieved.  This field starts at zero and may increase as
+ * the internal device state is modified or decrease as that modified
+ * state is read from the device.
+ *
+ * Userspace may use the combination of these fields to estimate the
+ * potential data size available during the PRE_COPY phases, as well as
+ * trends relative to the rate the device is dirtying its internal
+ * state, but these fields are not required to have any bearing relative
+ * to the data size available during the STOP_COPY phase.
+ *
+ * Drivers have a lot of flexibility in when and what they transfer during the
+ * PRE_COPY phase, and how they report this from VFIO_MIG_GET_PRECOPY_INFO.
+ *
+ * During pre-copy the migration data FD has a temporary "end of stream" that is
+ * reached when both initial_bytes and dirty_byte are zero. For instance, this
+ * may indicate that the device is idle and not currently dirtying any internal
+ * state. When read() is done on this temporary end of stream the kernel driver
+ * should return ENOMSG from read(). Userspace can wait for more data (which may
+ * never come) by using poll.
+ *
+ * Once in STOP_COPY the migration data FD has a permanent end of stream
+ * signaled in the usual way by read() always returning 0 and poll always
+ * returning readable. ENOMSG may not be returned in STOP_COPY.
+ * Support for this ioctl is mandatory if a driver claims to support
+ * VFIO_MIGRATION_PRE_COPY.
+ *
+ * Return: 0 on success, -1 and errno set on failure.
+ */
+struct vfio_precopy_info {
+       __u32 argsz;
+       __u32 flags;
+       __aligned_u64 initial_bytes;
+       __aligned_u64 dirty_bytes;
+};
+
+#define VFIO_MIG_GET_PRECOPY_INFO _IO(VFIO_TYPE, VFIO_BASE + 21)
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power
+ * state with the platform-based power management.  Device use of lower power
+ * states depends on factors managed by the runtime power management core,
+ * including system level support and coordinating support among dependent
+ * devices.  Enabling device low power entry does not guarantee lower power
+ * usage by the device, nor is a mechanism provided through this feature to
+ * know the current power state of the device.  If any device access happens
+ * (either from the host or through the vfio uAPI) when the device is in the
+ * low power state, then the host will move the device out of the low power
+ * state as necessary prior to the access.  Once the access is completed, the
+ * device may re-enter the low power state.  For single shot low power support
+ * with wake-up notification, see
+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below.  Access to mmap'd
+ * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after
+ * calling LOW_POWER_EXIT.
+ */
+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3
+
+/*
+ * This device feature has the same behavior as
+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user
+ * provides an eventfd for wake-up notification.  When the device moves out of
+ * the low power state for the wake-up, the host will not allow the device to
+ * re-enter a low power state without a subsequent user call to one of the low
+ * power entry device feature IOCTLs.  Access to mmap'd device regions is
+ * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the
+ * low power exit.  The low power exit can happen either through LOW_POWER_EXIT
+ * or through any other access (where the wake-up notification has been
+ * generated).  The access to mmap'd device regions will not trigger low power
+ * exit.
+ *
+ * The notification through the provided eventfd will be generated only when
+ * the device has entered and is resumed from a low power state after
+ * calling this device feature IOCTL.  A device that has not entered low power
+ * state, as managed through the runtime power management core, will not
+ * generate a notification through the provided eventfd on access.  Calling the
+ * LOW_POWER_EXIT feature is optional in the case where notification has been
+ * signaled on the provided eventfd that a resume from low power has occurred.
+ */
+struct vfio_device_low_power_entry_with_wakeup {
+       __s32 wakeup_eventfd;
+       __u32 reserved;
+};
+
+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as
+ * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or
+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features.
+ * This device feature IOCTL may itself generate a wakeup eventfd notification
+ * in the latter case if the device had previously entered a low power state.
+ */
+#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging.
+ * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports
+ * DMA logging.
+ *
+ * DMA logging allows a device to internally record what DMAs the device is
+ * initiating and report them back to userspace. It is part of the VFIO
+ * migration infrastructure that allows implementing dirty page tracking
+ * during the pre copy phase of live migration. Only DMA WRITEs are logged,
+ * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE.
+ *
+ * When DMA logging is started a range of IOVAs to monitor is provided and the
+ * device can optimize its logging to cover only the IOVA range given. Each
+ * DMA that the device initiates inside the range will be logged by the device
+ * for later retrieval.
+ *
+ * page_size is an input that hints what tracking granularity the device
+ * should try to achieve. If the device cannot do the hinted page size then
+ * it's the driver choice which page size to pick based on its support.
+ * On output the device will return the page size it selected.
+ *
+ * ranges is a pointer to an array of
+ * struct vfio_device_feature_dma_logging_range.
+ *
+ * The core kernel code guarantees to support by minimum num_ranges that fit
+ * into a single kernel page. User space can try higher values but should give
+ * up if the above can't be achieved as of some driver limitations.
+ *
+ * A single call to start device DMA logging can be issued and a matching stop
+ * should follow at the end. Another start is not allowed in the meantime.
+ */
+struct vfio_device_feature_dma_logging_control {
+       __aligned_u64 page_size;
+       __u32 num_ranges;
+       __u32 __reserved;
+       __aligned_u64 ranges;
 };
 
+struct vfio_device_feature_dma_logging_range {
+       __aligned_u64 iova;
+       __aligned_u64 length;
+};
+
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started
+ * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START
+ */
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log
+ *
+ * Query the device's DMA log for written pages within the given IOVA range.
+ * During querying the log is cleared for the IOVA range.
+ *
+ * bitmap is a pointer to an array of u64s that will hold the output bitmap
+ * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits
+ * is given by:
+ *  bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64))
+ *
+ * The input page_size can be any power of two value and does not have to
+ * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver
+ * will format its internal logging to match the reporting page size, possibly
+ * by replicating bits if the internal page size is lower than requested.
+ *
+ * The LOGGING_REPORT will only set bits in the bitmap and never clear or
+ * perform any initialization of the user provided bitmap.
+ *
+ * If any error is returned userspace should assume that the dirty log is
+ * corrupted. Error recovery is to consider all memory dirty and try to
+ * restart the dirty tracking, or to abort/restart the whole migration.
+ *
+ * If DMA logging is not enabled, an error will be returned.
+ *
+ */
+struct vfio_device_feature_dma_logging_report {
+       __aligned_u64 iova;
+       __aligned_u64 length;
+       __aligned_u64 page_size;
+       __aligned_u64 bitmap;
+};
+
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_GET read back the estimated data length that will
+ * be required to complete stop copy.
+ *
+ * Note: Can be called on each device state.
+ */
+
+struct vfio_device_feature_mig_data_size {
+       __aligned_u64 stop_copy_length;
+};
+
+#define VFIO_DEVICE_FEATURE_MIG_DATA_SIZE 9
+
 /* -------- API for Type1 VFIO IOMMU -------- */
 
 /**
index c0790f3246b91be85b33a036b5d1d054e5302e41..a9924232578ef2cd2b60da7c8de0d2385cb8a101 100644 (file)
@@ -356,7 +356,7 @@ void cpu_loop(CPUARMState *env)
             break;
         case EXCP_SWI:
             {
-                env->eabi = 1;
+                env->eabi = true;
                 /* system call */
                 if (env->thumb) {
                     /* Thumb is always EABI style with syscall number in r7 */
@@ -382,7 +382,7 @@ void cpu_loop(CPUARMState *env)
                          * > 0xfffff and are handled below as out-of-range.
                          */
                         n ^= ARM_SYSCALL_BASE;
-                        env->eabi = 0;
+                        env->eabi = false;
                     }
                 }
 
index 4290651c3cf713653c3c81e300eb58e82e965971..4ff30ff98066502dd0af7acceacc16a3b8027809 100644 (file)
@@ -161,13 +161,15 @@ void fork_end(int child)
         }
         qemu_init_cpu_list();
         gdbserver_fork(thread_cpu);
-        /* qemu_init_cpu_list() takes care of reinitializing the
-         * exclusive state, so we don't need to end_exclusive() here.
-         */
     } else {
         cpu_list_unlock();
-        end_exclusive();
     }
+    /*
+     * qemu_init_cpu_list() reinitialized the child exclusive state, but we
+     * also need to keep current_cpu consistent, so call end_exclusive() for
+     * both child and parent.
+     */
+    end_exclusive();
 }
 
 __thread CPUState *thread_cpu;
index 5ccf9e942eaa83db0b29ffb2c06bbbc80bb0b985..212e62d0a6272baef9d06d7aecbe891b08840a3f 100644 (file)
@@ -25,8 +25,8 @@
 
 void cpu_loop(CPUMBState *env)
 {
+    int trapnr, ret, si_code, sig;
     CPUState *cs = env_cpu(env);
-    int trapnr, ret, si_code;
 
     while (1) {
         cpu_exec_start(cs);
@@ -76,6 +76,7 @@ void cpu_loop(CPUMBState *env)
             env->iflags &= ~(IMM_FLAG | D_FLAG);
             switch (env->esr & 31) {
             case ESR_EC_DIVZERO:
+                sig = TARGET_SIGFPE;
                 si_code = TARGET_FPE_INTDIV;
                 break;
             case ESR_EC_FPU:
@@ -84,6 +85,7 @@ void cpu_loop(CPUMBState *env)
                  * if there's no recognized bit set.  Possibly this
                  * implies that si_code is 0, but follow the structure.
                  */
+                sig = TARGET_SIGFPE;
                 si_code = env->fsr;
                 if (si_code & FSR_IO) {
                     si_code = TARGET_FPE_FLTINV;
@@ -97,13 +99,17 @@ void cpu_loop(CPUMBState *env)
                     si_code = TARGET_FPE_FLTRES;
                 }
                 break;
+            case ESR_EC_PRIVINSN:
+                sig = SIGILL;
+                si_code = ILL_PRVOPC;
+                break;
             default:
                 fprintf(stderr, "Unhandled hw-exception: 0x%x\n",
                         env->esr & ESR_EC_MASK);
                 cpu_dump_state(cs, stderr, 0);
                 exit(EXIT_FAILURE);
             }
-            force_sig_fault(TARGET_SIGFPE, si_code, env->pc);
+            force_sig_fault(sig, si_code, env->pc);
             break;
 
         case EXCP_DEBUG:
index 434c90a55f8f02ecacf9aef7c03b59eebc108868..c120c422786a563e89824808a85d2cbbf4425a24 100644 (file)
@@ -248,6 +248,14 @@ void cpu_loop (CPUSPARCState *env)
             cpu_exec_step_atomic(cs);
             break;
         default:
+            /*
+             * Most software trap numbers vector to BAD_TRAP.
+             * Handle anything not explicitly matched above.
+             */
+            if (trapnr >= TT_TRAP && trapnr <= TT_TRAP + 0x7f) {
+                force_sig_fault(TARGET_SIGILL, ILL_ILLTRP, env->pc);
+                break;
+            }
             fprintf(stderr, "Unhandled trap: 0x%x\n", trapnr);
             cpu_dump_state(cs, stderr, 0);
             exit(EXIT_FAILURE);
index 1e868e9b0e279be7c827d04762bb16fc6ee5b4c1..a6c426d73cfe933c82b7f03b0d759d0134539361 100644 (file)
@@ -6752,6 +6752,7 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
             cpu_clone_regs_parent(env, flags);
             fork_end(0);
         }
+        g_assert(!cpu_in_exclusive_context(cpu));
     }
     return ret;
 }
index 0280e76addda504ea9745a17324ba9d6923335bc..3576da413f407df68bf67dbe74e25a410d9c333a 100644 (file)
@@ -135,7 +135,7 @@ void print_termios(void *arg);
 #ifdef TARGET_ARM
 static inline int regpairs_aligned(CPUArchState *cpu_env, int num)
 {
-    return cpu_env->eabi == 1;
+    return cpu_env->eabi;
 }
 #elif defined(TARGET_MIPS) && defined(TARGET_ABI_MIPSO32)
 static inline int regpairs_aligned(CPUArchState *cpu_env, int num) { return 1; }
index 4ba3bf3431711f93b0b7383db31f7a75a519e6f3..6cb2b1a42f8ad88ce284dc2e6c6855be4fbe79d6 100644 (file)
@@ -215,10 +215,6 @@ endif
 # Specify linker-script with add_project_link_arguments so that it is not placed
 # within a linker --start-group/--end-group pair
 if get_option('fuzzing')
-  add_project_link_arguments(['-Wl,-T,',
-                              (meson.current_source_dir() / 'tests/qtest/fuzz/fork_fuzz.ld')],
-                             native: false, language: all_languages)
-
   # Specify a filter to only instrument code that is directly related to
   # virtual-devices.
   configure_file(output: 'instrumentation-filter',
@@ -1649,10 +1645,14 @@ if libbpf.found() and not cc.links('''
 endif
 
 # libdw
-libdw = dependency('libdw',
-                   method: 'pkg-config',
-                   kwargs: static_kwargs,
-                   required: false)
+libdw = not_found
+if not get_option('libdw').auto() or \
+        (not enable_static and (have_system or have_user))
+    libdw = dependency('libdw',
+                       method: 'pkg-config',
+                       kwargs: static_kwargs,
+                       required: get_option('libdw'))
+endif
 
 #################
 # config-host.h #
@@ -2129,6 +2129,18 @@ config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_WO_TID', cc.links(gnu_source_pre
     pthread_create(&thread, 0, f, 0);
     return 0;
   }''', dependencies: threads))
+config_host_data.set('CONFIG_PTHREAD_SET_NAME_NP', cc.links(gnu_source_prefix + '''
+  #include <pthread.h>
+  #include <pthread_np.h>
+
+  static void *f(void *p) { return NULL; }
+  int main(void)
+  {
+    pthread_t thread;
+    pthread_create(&thread, 0, f, 0);
+    pthread_set_name_np(thread, "QEMU");
+    return 0;
+  }''', dependencies: threads))
 config_host_data.set('CONFIG_PTHREAD_CONDATTR_SETCLOCK', cc.links(gnu_source_prefix + '''
   #include <pthread.h>
   #include <time.h>
@@ -2351,6 +2363,22 @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
     int main(int argc, char *argv[]) { return bar(argv[argc - 1]); }
   '''), error_message: 'AVX512F not available').allowed())
 
+config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
+  .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
+  .require(cc.links('''
+    #pragma GCC push_options
+    #pragma GCC target("avx512bw")
+    #include <cpuid.h>
+    #include <immintrin.h>
+    static int bar(void *a) {
+
+      __m512i *x = a;
+      __m512i res= _mm512_abs_epi8(*x);
+      return res[1];
+    }
+    int main(int argc, char *argv[]) { return bar(argv[0]); }
+  '''), error_message: 'AVX512BW not available').allowed())
+
 have_pvrdma = get_option('pvrdma') \
   .require(rdma.found(), error_message: 'PVRDMA requires OpenFabrics libraries') \
   .require(cc.compiles(gnu_source_prefix + '''
@@ -2450,6 +2478,27 @@ if targetos == 'windows'
     }''', name: '_lock_file and _unlock_file'))
 endif
 
+if targetos == 'windows'
+  mingw_has_setjmp_longjmp = cc.links('''
+    #include <setjmp.h>
+    int main(void) {
+      /*
+       * These functions are not available in setjmp header, but may be
+       * available at link time, from libmingwex.a.
+       */
+      extern int __mingw_setjmp(jmp_buf);
+      extern void __attribute__((noreturn)) __mingw_longjmp(jmp_buf, int);
+      jmp_buf env;
+      __mingw_setjmp(env);
+      __mingw_longjmp(env, 0);
+    }
+  ''', name: 'mingw setjmp and longjmp')
+
+  if cpu == 'aarch64' and not mingw_has_setjmp_longjmp
+    error('mingw must provide setjmp/longjmp for windows-arm64')
+  endif
+endif
+
 ########################
 # Target configuration #
 ########################
@@ -3783,8 +3832,14 @@ summary_info += {'debug stack usage': get_option('debug_stack_usage')}
 summary_info += {'mutex debugging':   get_option('debug_mutex')}
 summary_info += {'memory allocator':  get_option('malloc')}
 summary_info += {'avx2 optimization': config_host_data.get('CONFIG_AVX2_OPT')}
+summary_info += {'avx512bw optimization': config_host_data.get('CONFIG_AVX512BW_OPT')}
 summary_info += {'avx512f optimization': config_host_data.get('CONFIG_AVX512F_OPT')}
-summary_info += {'gprof enabled':     get_option('gprof')}
+if get_option('gprof')
+  gprof_info = 'YES (deprecated)'
+else
+  gprof_info = get_option('gprof')
+endif
+summary_info += {'gprof':             gprof_info}
 summary_info += {'gcov':              get_option('b_coverage')}
 summary_info += {'thread sanitizer':  config_host.has_key('CONFIG_TSAN')}
 summary_info += {'CFI support':       get_option('cfi')}
@@ -3853,7 +3908,6 @@ if have_block
   summary_info += {'Block whitelist (ro)': get_option('block_drv_ro_whitelist')}
   summary_info += {'Use block whitelist in tools': get_option('block_drv_whitelist_in_tools')}
   summary_info += {'VirtFS support':    have_virtfs}
-  summary_info += {'build virtiofs daemon': have_virtiofsd}
   summary_info += {'Live block migration': config_host_data.get('CONFIG_LIVE_BLOCK_MIGRATION')}
   summary_info += {'replication support': config_host_data.get('CONFIG_REPLICATION')}
   summary_info += {'bochs support':     get_option('bochs').allowed()}
index 559a571b6b6cd1550c13da3fdcba0b2720fe9922..6b0900205ea7cf3b636cc8bf19d124288bd26977 100644 (file)
@@ -104,6 +104,8 @@ option('avx2', type: 'feature', value: 'auto',
        description: 'AVX2 optimizations')
 option('avx512f', type: 'feature', value: 'disabled',
        description: 'AVX512F optimizations')
+option('avx512bw', type: 'feature', value: 'auto',
+       description: 'AVX512BW optimizations')
 option('keyring', type: 'feature', value: 'auto',
        description: 'Linux keyring support')
 
@@ -129,6 +131,8 @@ option('gio', type : 'feature', value : 'auto',
        description: 'use libgio for D-Bus support')
 option('glusterfs', type : 'feature', value : 'auto',
        description: 'Glusterfs block device driver')
+option('libdw', type : 'feature', value : 'auto',
+       description: 'debuginfo support')
 option('libiscsi', type : 'feature', value : 'auto',
        description: 'libiscsi userspace initiator')
 option('libnfs', type : 'feature', value : 'auto',
@@ -268,8 +272,6 @@ option('vhost_user_blk_server', type: 'feature', value: 'auto',
        description: 'build vhost-user-blk server')
 option('virtfs', type: 'feature', value: 'auto',
        description: 'virtio-9p support')
-option('virtiofsd', type: 'feature', value: 'auto',
-       description: 'build virtiofs daemon (virtiofsd)')
 option('libvduse', type: 'feature', value: 'auto',
        description: 'build VDUSE Library')
 option('vduse_blk_export', type: 'feature', value: 'auto',
@@ -316,7 +318,8 @@ option('debug_stack_usage', type: 'boolean', value: false,
 option('qom_cast_debug', type: 'boolean', value: false,
        description: 'cast debugging support')
 option('gprof', type: 'boolean', value: false,
-       description: 'QEMU profiling with gprof')
+       description: 'QEMU profiling with gprof',
+       deprecated: true)
 option('profiler', type: 'boolean', value: false,
        description: 'profiler support')
 option('slirp_smbd', type : 'feature', value : 'auto',
index 5a621419d388299f98572b550f7c21993e57debd..fe73aa94b1c0fba8aa1b55ebd1eacf4aa95ee330 100644 (file)
@@ -763,9 +763,8 @@ static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
 }
 
 static void dirty_bitmap_state_pending(void *opaque,
-                                       uint64_t *res_precopy_only,
-                                       uint64_t *res_compatible,
-                                       uint64_t *res_postcopy_only)
+                                       uint64_t *must_precopy,
+                                       uint64_t *can_postcopy)
 {
     DBMSaveState *s = &((DBMState *)opaque)->save;
     SaveBitmapState *dbms;
@@ -785,7 +784,7 @@ static void dirty_bitmap_state_pending(void *opaque,
 
     trace_dirty_bitmap_state_pending(pending);
 
-    *res_postcopy_only += pending;
+    *can_postcopy += pending;
 }
 
 /* First occurrence of this bitmap. It should be created if doesn't exist */
index 29f69025aff054bbb49182652545838d93ab758a..426a25bb192e04ac8188317c61ba2cc9685eb5af 100644 (file)
 #define MAX_IO_BUFFERS 512
 #define MAX_PARALLEL_IO 16
 
-/* #define DEBUG_BLK_MIGRATION */
-
-#ifdef DEBUG_BLK_MIGRATION
-#define DPRINTF(fmt, ...) \
-    do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
-
 typedef struct BlkMigDevState {
     /* Written during setup phase.  Can be read without a lock.  */
     BlockBackend *blk;
@@ -502,7 +492,7 @@ static int blk_mig_save_bulked_block(QEMUFile *f)
         block_mig_state.prev_progress = progress;
         qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
                          | BLK_MIG_FLAG_PROGRESS);
-        DPRINTF("Completed %d %%\r", progress);
+        trace_migration_block_progression(progress);
     }
 
     return ret;
@@ -863,10 +853,8 @@ static int block_save_complete(QEMUFile *f, void *opaque)
     return 0;
 }
 
-static void block_state_pending(void *opaque,
-                                uint64_t *res_precopy_only,
-                                uint64_t *res_compatible,
-                                uint64_t *res_postcopy_only)
+static void block_state_pending(void *opaque, uint64_t *must_precopy,
+                                uint64_t *can_postcopy)
 {
     /* Estimate pending number of bytes to send */
     uint64_t pending;
@@ -887,7 +875,7 @@ static void block_state_pending(void *opaque,
 
     trace_migration_block_state_pending(pending);
     /* We don't do postcopy */
-    *res_precopy_only += pending;
+    *must_precopy += pending;
 }
 
 static int block_load(QEMUFile *f, void *opaque, int version_id)
index 42453481c41aec7d2b27a2ed699035cb007af253..6cb6f90357b29a0d429c485b162eb2fb4ae18136 100644 (file)
@@ -17,7 +17,6 @@
 #include "migration.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-migration.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/error-report.h"
 #include "trace.h"
 
@@ -78,7 +77,7 @@ FailoverStatus failover_get_state(void)
 void qmp_x_colo_lost_heartbeat(Error **errp)
 {
     if (get_colo_mode() == COLO_MODE_NONE) {
-        error_setg(errp, QERR_FEATURE_DISABLED, "colo");
+        error_setg(errp, "VM is not in COLO mode");
         return;
     }
 
index 232c8d44b1f755eb55bcc729608a89e4f80cf860..0716e64689c686e06299819b4a353ddb08fb20ce 100644 (file)
@@ -33,7 +33,6 @@
 #include "net/colo.h"
 #include "block/block.h"
 #include "qapi/qapi-events-migration.h"
-#include "qapi/qmp/qerror.h"
 #include "sysemu/cpus.h"
 #include "sysemu/runstate.h"
 #include "net/filter.h"
index ef25bc89290e66cd5c1d3b42ee5eef80f28eca18..72519ea99fdb5b7632ede99d7bf9bc4328055d2c 100644 (file)
@@ -23,7 +23,6 @@
 #include "qapi/qapi-commands-migration.h"
 #include "qapi/qapi-visit-migration.h"
 #include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/string-input-visitor.h"
 #include "qapi/string-output-visitor.h"
 #include "qemu/cutils.h"
index 7a14aa98d842d0ecbd3a2922dff8e3ede58457d1..ae2025d9d8d9fb996b68d64261f6e2ec467d546c 100644 (file)
@@ -184,16 +184,27 @@ static int migration_maybe_pause(MigrationState *s,
                                  int new_state);
 static void migrate_fd_cancel(MigrationState *s);
 
-static bool migrate_allow_multi_channels = true;
+static bool migration_needs_multiple_sockets(void)
+{
+    return migrate_use_multifd() || migrate_postcopy_preempt();
+}
 
-void migrate_protocol_allow_multi_channels(bool allow)
+static bool uri_supports_multi_channels(const char *uri)
 {
-    migrate_allow_multi_channels = allow;
+    return strstart(uri, "tcp:", NULL) || strstart(uri, "unix:", NULL) ||
+           strstart(uri, "vsock:", NULL);
 }
 
-bool migrate_multi_channels_is_allowed(void)
+static bool
+migration_channels_and_uri_compatible(const char *uri, Error **errp)
 {
-    return migrate_allow_multi_channels;
+    if (migration_needs_multiple_sockets() &&
+        !uri_supports_multi_channels(uri)) {
+        error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)");
+        return false;
+    }
+
+    return true;
 }
 
 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
@@ -224,6 +235,8 @@ void migration_object_init(void)
     qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
     qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
     qemu_sem_init(&current_incoming->postcopy_pause_sem_fast_load, 0);
+    qemu_sem_init(&current_incoming->postcopy_qemufile_dst_done, 0);
+
     qemu_mutex_init(&current_incoming->page_request_mutex);
     current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
 
@@ -302,6 +315,8 @@ void migration_incoming_state_destroy(void)
 {
     struct MigrationIncomingState *mis = migration_incoming_get_current();
 
+    multifd_load_cleanup();
+
     if (mis->to_src_file) {
         /* Tell source that we are done */
         migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
@@ -493,12 +508,15 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp)
 {
     const char *p = NULL;
 
-    migrate_protocol_allow_multi_channels(false); /* reset it anyway */
+    /* URI is not suitable for migration? */
+    if (!migration_channels_and_uri_compatible(uri, errp)) {
+        return;
+    }
+
     qapi_event_send_migration(MIGRATION_STATUS_SETUP);
     if (strstart(uri, "tcp:", &p) ||
         strstart(uri, "unix:", NULL) ||
         strstart(uri, "vsock:", NULL)) {
-        migrate_protocol_allow_multi_channels(true);
         socket_start_incoming_migration(p ? p : uri, errp);
 #ifdef CONFIG_RDMA
     } else if (strstart(uri, "rdma:", &p)) {
@@ -543,13 +561,7 @@ static void process_incoming_migration_bh(void *opaque)
      */
     qemu_announce_self(&mis->announce_timer, migrate_announce_params());
 
-    if (multifd_load_cleanup(&local_err) != 0) {
-        error_report_err(local_err);
-        autostart = false;
-    }
-    /* If global state section was not received or we are in running
-       state, we need to obey autostart. Any other state is set with
-       runstate_set. */
+    multifd_load_shutdown();
 
     dirty_bitmap_mig_before_vm_start();
 
@@ -649,9 +661,9 @@ fail:
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_FAILED);
     qemu_fclose(mis->from_src_file);
-    if (multifd_load_cleanup(&local_err) != 0) {
-        error_report_err(local_err);
-    }
+
+    multifd_load_cleanup();
+
     exit(EXIT_FAILURE);
 }
 
@@ -723,9 +735,29 @@ void migration_fd_process_incoming(QEMUFile *f, Error **errp)
     migration_incoming_process();
 }
 
-static bool migration_needs_multiple_sockets(void)
+/*
+ * Returns true when we want to start a new incoming migration process,
+ * false otherwise.
+ */
+static bool migration_should_start_incoming(bool main_channel)
 {
-    return migrate_use_multifd() || migrate_postcopy_preempt();
+    /* Multifd doesn't start unless all channels are established */
+    if (migrate_use_multifd()) {
+        return migration_has_all_channels();
+    }
+
+    /* Preempt channel only starts when the main channel is created */
+    if (migrate_postcopy_preempt()) {
+        return main_channel;
+    }
+
+    /*
+     * For all the rest types of migration, we should only reach here when
+     * it's the main channel that's being created, and we should always
+     * proceed with this channel.
+     */
+    assert(main_channel);
+    return true;
 }
 
 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
@@ -789,7 +821,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
         }
     }
 
-    if (migration_has_all_channels()) {
+    if (migration_should_start_incoming(default_channel)) {
         /* If it's a recovery, we're done */
         if (postcopy_try_recover()) {
             return;
@@ -1378,15 +1410,6 @@ static bool migrate_caps_check(bool *cap_list,
     }
 #endif
 
-
-    /* incoming side only */
-    if (runstate_check(RUN_STATE_INMIGRATE) &&
-        !migrate_multi_channels_is_allowed() &&
-        cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
-        error_setg(errp, "multifd is not supported by current protocol");
-        return false;
-    }
-
     if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) {
         if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
             error_setg(errp, "Postcopy preempt requires postcopy-ram");
@@ -2471,6 +2494,11 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     MigrationState *s = migrate_get_current();
     const char *p = NULL;
 
+    /* URI is not suitable for migration? */
+    if (!migration_channels_and_uri_compatible(uri, errp)) {
+        return;
+    }
+
     if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
                          has_resume && resume, errp)) {
         /* Error detected, put into errp */
@@ -2483,11 +2511,9 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
         }
     }
 
-    migrate_protocol_allow_multi_channels(false);
     if (strstart(uri, "tcp:", &p) ||
         strstart(uri, "unix:", NULL) ||
         strstart(uri, "vsock:", NULL)) {
-        migrate_protocol_allow_multi_channels(true);
         socket_start_outgoing_migration(s, p ? p : uri, &local_err);
 #ifdef CONFIG_RDMA
     } else if (strstart(uri, "rdma:", &p)) {
@@ -3022,6 +3048,7 @@ retry:
         case MIG_RP_MSG_PONG:
             tmp32 = ldl_be_p(buf);
             trace_source_return_path_thread_pong(tmp32);
+            qemu_sem_post(&ms->rp_state.rp_pong_acks);
             break;
 
         case MIG_RP_MSG_REQ_PAGES:
@@ -3155,6 +3182,13 @@ static int await_return_path_close_on_source(MigrationState *ms)
     return ms->rp_state.error;
 }
 
+static inline void
+migration_wait_main_channel(MigrationState *ms)
+{
+    /* Wait until one PONG message received */
+    qemu_sem_wait(&ms->rp_state.rp_pong_acks);
+}
+
 /*
  * Switch from normal iteration to postcopy
  * Returns non-0 on error
@@ -3169,9 +3203,12 @@ static int postcopy_start(MigrationState *ms)
     bool restart_block = false;
     int cur_state = MIGRATION_STATUS_ACTIVE;
 
-    if (postcopy_preempt_wait_channel(ms)) {
-        migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
-        return -1;
+    if (migrate_postcopy_preempt()) {
+        migration_wait_main_channel(ms);
+        if (postcopy_preempt_establish_channel(ms)) {
+            migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
+            return -1;
+        }
     }
 
     if (!migrate_pause_before_switchover()) {
@@ -3582,6 +3619,20 @@ static int postcopy_do_resume(MigrationState *s)
         return ret;
     }
 
+    /*
+     * If preempt is enabled, re-establish the preempt channel.  Note that
+     * we do it after resume prepare to make sure the main channel will be
+     * created before the preempt channel.  E.g. with weak network, the
+     * dest QEMU may get messed up with the preempt and main channels on
+     * the order of connection setup.  This guarantees the correct order.
+     */
+    ret = postcopy_preempt_establish_channel(s);
+    if (ret) {
+        error_report("%s: postcopy_preempt_establish_channel(): %d",
+                     __func__, ret);
+        return ret;
+    }
+
     /*
      * Last handshake with destination on the resume (destination will
      * switch to postcopy-active afterwards)
@@ -3643,14 +3694,6 @@ static MigThrError postcopy_pause(MigrationState *s)
         if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
             /* Woken up by a recover procedure. Give it a shot */
 
-            if (postcopy_preempt_wait_channel(s)) {
-                /*
-                 * Preempt enabled, and new channel create failed; loop
-                 * back to wait for another recovery.
-                 */
-                continue;
-            }
-
             /*
              * Firstly, let's wake up the return path now, with a new
              * return path channel.
@@ -3820,20 +3863,18 @@ typedef enum {
  */
 static MigIterateState migration_iteration_run(MigrationState *s)
 {
-    uint64_t pend_pre, pend_compat, pend_post;
+    uint64_t must_precopy, can_postcopy;
     bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
 
-    qemu_savevm_state_pending_estimate(&pend_pre, &pend_compat, &pend_post);
-    uint64_t pending_size = pend_pre + pend_compat + pend_post;
+    qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy);
+    uint64_t pending_size = must_precopy + can_postcopy;
 
-    trace_migrate_pending_estimate(pending_size,
-                                   pend_pre, pend_compat, pend_post);
+    trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy);
 
-    if (pend_pre + pend_compat <= s->threshold_size) {
-        qemu_savevm_state_pending_exact(&pend_pre, &pend_compat, &pend_post);
-        pending_size = pend_pre + pend_compat + pend_post;
-        trace_migrate_pending_exact(pending_size,
-                                    pend_pre, pend_compat, pend_post);
+    if (must_precopy <= s->threshold_size) {
+        qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy);
+        pending_size = must_precopy + can_postcopy;
+        trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy);
     }
 
     if (!pending_size || pending_size < s->threshold_size) {
@@ -3843,7 +3884,7 @@ static MigIterateState migration_iteration_run(MigrationState *s)
     }
 
     /* Still a significant amount to transfer */
-    if (!in_postcopy && pend_pre <= s->threshold_size &&
+    if (!in_postcopy && must_precopy <= s->threshold_size &&
         qatomic_read(&s->start_postcopy)) {
         if (postcopy_start(s)) {
             error_report("%s: postcopy failed to start", __func__);
@@ -4343,15 +4384,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
         }
     }
 
-    /* This needs to be done before resuming a postcopy */
-    if (postcopy_preempt_setup(s, &local_err)) {
-        error_report_err(local_err);
-        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
-                          MIGRATION_STATUS_FAILED);
-        migrate_fd_cleanup(s);
-        return;
-    }
-
     if (resume) {
         /* Wakeup the main migration thread to do the recovery */
         migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
@@ -4525,6 +4557,7 @@ static void migration_instance_finalize(Object *obj)
     qemu_sem_destroy(&ms->postcopy_pause_sem);
     qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
     qemu_sem_destroy(&ms->rp_state.rp_sem);
+    qemu_sem_destroy(&ms->rp_state.rp_pong_acks);
     qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
     error_free(ms->error);
 }
@@ -4571,6 +4604,7 @@ static void migration_instance_init(Object *obj)
     qemu_sem_init(&ms->postcopy_pause_sem, 0);
     qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
     qemu_sem_init(&ms->rp_state.rp_sem, 0);
+    qemu_sem_init(&ms->rp_state.rp_pong_acks, 0);
     qemu_sem_init(&ms->rate_limit_sem, 0);
     qemu_sem_init(&ms->wait_unplug_sem, 0);
     qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
index 66511ce532751acfba9f19919892007c3cd0ea4d..2da2f8a164fcff2930155195cc31fde7eb30edfc 100644 (file)
@@ -116,6 +116,12 @@ struct MigrationIncomingState {
     unsigned int postcopy_channels;
     /* QEMUFile for postcopy only; it'll be handled by a separate thread */
     QEMUFile *postcopy_qemufile_dst;
+    /*
+     * When postcopy_qemufile_dst is properly setup, this sem is posted.
+     * One can wait on this semaphore to wait until the preempt channel is
+     * properly setup.
+     */
+    QemuSemaphore postcopy_qemufile_dst_done;
     /* Postcopy priority thread is used to receive postcopy requested pages */
     QemuThread postcopy_prio_thread;
     bool postcopy_prio_thread_created;
@@ -276,6 +282,12 @@ struct MigrationState {
          */
         bool          rp_thread_created;
         QemuSemaphore rp_sem;
+        /*
+         * We post to this when we got one PONG from dest. So far it's an
+         * easy way to know the main channel has successfully established
+         * on dest QEMU.
+         */
+        QemuSemaphore rp_pong_acks;
     } rp_state;
 
     double mbps;
@@ -474,7 +486,4 @@ void migration_cancel(const Error *error);
 void populate_vfio_info(MigrationInfo *info);
 void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page);
 
-bool migrate_multi_channels_is_allowed(void);
-void migrate_protocol_allow_multi_channels(bool allow);
-
 #endif
index b7ad7002e02a4f45c0f8aec5ac1b90c48728d527..5e85c3ea9bc4a164ccef33f2394fbb843b9d9287 100644 (file)
@@ -516,7 +516,7 @@ void multifd_save_cleanup(void)
 {
     int i;
 
-    if (!migrate_use_multifd() || !migrate_multi_channels_is_allowed()) {
+    if (!migrate_use_multifd()) {
         return;
     }
     multifd_send_terminate_threads(NULL);
@@ -843,30 +843,29 @@ static bool multifd_channel_connect(MultiFDSendParams *p,
         ioc, object_get_typename(OBJECT(ioc)),
         migrate_get_current()->hostname, error);
 
-    if (!error) {
-        if (migrate_channel_requires_tls_upgrade(ioc)) {
-            multifd_tls_channel_connect(p, ioc, &error);
-            if (!error) {
-                /*
-                 * tls_channel_connect will call back to this
-                 * function after the TLS handshake,
-                 * so we mustn't call multifd_send_thread until then
-                 */
-                return true;
-            } else {
-                return false;
-            }
+    if (error) {
+        return false;
+    }
+    if (migrate_channel_requires_tls_upgrade(ioc)) {
+        multifd_tls_channel_connect(p, ioc, &error);
+        if (!error) {
+            /*
+             * tls_channel_connect will call back to this
+             * function after the TLS handshake,
+             * so we mustn't call multifd_send_thread until then
+             */
+            return true;
         } else {
-            migration_ioc_register_yank(ioc);
-            p->registered_yank = true;
-            p->c = ioc;
-            qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
-                                   QEMU_THREAD_JOINABLE);
-       }
-       return true;
+            return false;
+        }
+    } else {
+        migration_ioc_register_yank(ioc);
+        p->registered_yank = true;
+        p->c = ioc;
+        qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
+                           QEMU_THREAD_JOINABLE);
     }
-
-    return false;
+    return true;
 }
 
 static void multifd_new_send_channel_cleanup(MultiFDSendParams *p,
@@ -893,19 +892,15 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
     Error *local_err = NULL;
 
     trace_multifd_new_send_channel_async(p->id);
-    if (qio_task_propagate_error(task, &local_err)) {
-        goto cleanup;
-    } else {
+    if (!qio_task_propagate_error(task, &local_err)) {
         p->c = QIO_CHANNEL(sioc);
         qio_channel_set_delay(p->c, false);
         p->running = true;
-        if (!multifd_channel_connect(p, sioc, local_err)) {
-            goto cleanup;
+        if (multifd_channel_connect(p, sioc, local_err)) {
+            return;
         }
-        return;
     }
 
-cleanup:
     multifd_new_send_channel_cleanup(p, sioc, local_err);
 }
 
@@ -918,10 +913,6 @@ int multifd_save_setup(Error **errp)
     if (!migrate_use_multifd()) {
         return 0;
     }
-    if (!migrate_multi_channels_is_allowed()) {
-        error_setg(errp, "multifd is not supported by current protocol");
-        return -1;
-    }
 
     thread_count = migrate_multifd_channels();
     multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
@@ -1022,26 +1013,33 @@ static void multifd_recv_terminate_threads(Error *err)
     }
 }
 
-int multifd_load_cleanup(Error **errp)
+void multifd_load_shutdown(void)
+{
+    if (migrate_use_multifd()) {
+        multifd_recv_terminate_threads(NULL);
+    }
+}
+
+void multifd_load_cleanup(void)
 {
     int i;
 
-    if (!migrate_use_multifd() || !migrate_multi_channels_is_allowed()) {
-        return 0;
+    if (!migrate_use_multifd()) {
+        return;
     }
     multifd_recv_terminate_threads(NULL);
     for (i = 0; i < migrate_multifd_channels(); i++) {
         MultiFDRecvParams *p = &multifd_recv_state->params[i];
 
         if (p->running) {
-            p->quit = true;
             /*
              * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
              * however try to wakeup it without harm in cleanup phase.
              */
             qemu_sem_post(&p->sem_sync);
-            qemu_thread_join(&p->thread);
         }
+
+        qemu_thread_join(&p->thread);
     }
     for (i = 0; i < migrate_multifd_channels(); i++) {
         MultiFDRecvParams *p = &multifd_recv_state->params[i];
@@ -1067,8 +1065,6 @@ int multifd_load_cleanup(Error **errp)
     multifd_recv_state->params = NULL;
     g_free(multifd_recv_state);
     multifd_recv_state = NULL;
-
-    return 0;
 }
 
 void multifd_recv_sync_main(void)
@@ -1116,10 +1112,7 @@ static void *multifd_recv_thread(void *opaque)
 
         ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
                                        p->packet_len, &local_err);
-        if (ret == 0) {   /* EOF */
-            break;
-        }
-        if (ret == -1) {   /* Error */
+        if (ret == 0 || ret == -1) {   /* 0: EOF  -1: Error */
             break;
         }
 
@@ -1180,10 +1173,6 @@ int multifd_load_setup(Error **errp)
         return 0;
     }
 
-    if (!migrate_multi_channels_is_allowed()) {
-        error_setg(errp, "multifd is not supported by current protocol");
-        return -1;
-    }
     thread_count = migrate_multifd_channels();
     multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
     multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
diff --git a/migration/multifd.c.orig b/migration/multifd.c.orig
deleted file mode 100644 (file)
index ad89293..0000000
+++ /dev/null
@@ -1,1274 +0,0 @@
-/*
- * Multifd common code
- *
- * Copyright (c) 2019-2020 Red Hat Inc
- *
- * Authors:
- *  Juan Quintela <quintela@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/rcu.h"
-#include "exec/target_page.h"
-#include "sysemu/sysemu.h"
-#include "exec/ramblock.h"
-#include "qemu/error-report.h"
-#include "qapi/error.h"
-#include "ram.h"
-#include "migration.h"
-#include "socket.h"
-#include "tls.h"
-#include "qemu-file.h"
-#include "trace.h"
-#include "multifd.h"
-
-#include "qemu/yank.h"
-#include "io/channel-socket.h"
-#include "yank_functions.h"
-
-/* Multiple fd's */
-
-#define MULTIFD_MAGIC 0x11223344U
-#define MULTIFD_VERSION 1
-
-typedef struct {
-    uint32_t magic;
-    uint32_t version;
-    unsigned char uuid[16]; /* QemuUUID */
-    uint8_t id;
-    uint8_t unused1[7];     /* Reserved for future use */
-    uint64_t unused2[4];    /* Reserved for future use */
-} __attribute__((packed)) MultiFDInit_t;
-
-/* Multifd without compression */
-
-/**
- * nocomp_send_setup: setup send side
- *
- * For no compression this function does nothing.
- *
- * Returns 0 for success or -1 for error
- *
- * @p: Params for the channel that we are using
- * @errp: pointer to an error
- */
-static int nocomp_send_setup(MultiFDSendParams *p, Error **errp)
-{
-    return 0;
-}
-
-/**
- * nocomp_send_cleanup: cleanup send side
- *
- * For no compression this function does nothing.
- *
- * @p: Params for the channel that we are using
- * @errp: pointer to an error
- */
-static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
-{
-    return;
-}
-
-/**
- * nocomp_send_prepare: prepare date to be able to send
- *
- * For no compression we just have to calculate the size of the
- * packet.
- *
- * Returns 0 for success or -1 for error
- *
- * @p: Params for the channel that we are using
- * @errp: pointer to an error
- */
-static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
-{
-    MultiFDPages_t *pages = p->pages;
-
-    for (int i = 0; i < p->normal_num; i++) {
-        p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i];
-        p->iov[p->iovs_num].iov_len = p->page_size;
-        p->iovs_num++;
-    }
-
-    p->next_packet_size = p->normal_num * p->page_size;
-    p->flags |= MULTIFD_FLAG_NOCOMP;
-    return 0;
-}
-
-/**
- * nocomp_recv_setup: setup receive side
- *
- * For no compression this function does nothing.
- *
- * Returns 0 for success or -1 for error
- *
- * @p: Params for the channel that we are using
- * @errp: pointer to an error
- */
-static int nocomp_recv_setup(MultiFDRecvParams *p, Error **errp)
-{
-    return 0;
-}
-
-/**
- * nocomp_recv_cleanup: setup receive side
- *
- * For no compression this function does nothing.
- *
- * @p: Params for the channel that we are using
- */
-static void nocomp_recv_cleanup(MultiFDRecvParams *p)
-{
-}
-
-/**
- * nocomp_recv_pages: read the data from the channel into actual pages
- *
- * For no compression we just need to read things into the correct place.
- *
- * Returns 0 for success or -1 for error
- *
- * @p: Params for the channel that we are using
- * @errp: pointer to an error
- */
-static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp)
-{
-    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
-
-    if (flags != MULTIFD_FLAG_NOCOMP) {
-        error_setg(errp, "multifd %u: flags received %x flags expected %x",
-                   p->id, flags, MULTIFD_FLAG_NOCOMP);
-        return -1;
-    }
-    for (int i = 0; i < p->normal_num; i++) {
-        p->iov[i].iov_base = p->host + p->normal[i];
-        p->iov[i].iov_len = p->page_size;
-    }
-    return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
-}
-
-static MultiFDMethods multifd_nocomp_ops = {
-    .send_setup = nocomp_send_setup,
-    .send_cleanup = nocomp_send_cleanup,
-    .send_prepare = nocomp_send_prepare,
-    .recv_setup = nocomp_recv_setup,
-    .recv_cleanup = nocomp_recv_cleanup,
-    .recv_pages = nocomp_recv_pages
-};
-
-static MultiFDMethods *multifd_ops[MULTIFD_COMPRESSION__MAX] = {
-    [MULTIFD_COMPRESSION_NONE] = &multifd_nocomp_ops,
-};
-
-void multifd_register_ops(int method, MultiFDMethods *ops)
-{
-    assert(0 < method && method < MULTIFD_COMPRESSION__MAX);
-    multifd_ops[method] = ops;
-}
-
-static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
-{
-    MultiFDInit_t msg = {};
-    int ret;
-
-    msg.magic = cpu_to_be32(MULTIFD_MAGIC);
-    msg.version = cpu_to_be32(MULTIFD_VERSION);
-    msg.id = p->id;
-    memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
-
-    ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
-    if (ret != 0) {
-        return -1;
-    }
-    return 0;
-}
-
-static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
-{
-    MultiFDInit_t msg;
-    int ret;
-
-    ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
-    if (ret != 0) {
-        return -1;
-    }
-
-    msg.magic = be32_to_cpu(msg.magic);
-    msg.version = be32_to_cpu(msg.version);
-
-    if (msg.magic != MULTIFD_MAGIC) {
-        error_setg(errp, "multifd: received packet magic %x "
-                   "expected %x", msg.magic, MULTIFD_MAGIC);
-        return -1;
-    }
-
-    if (msg.version != MULTIFD_VERSION) {
-        error_setg(errp, "multifd: received packet version %u "
-                   "expected %u", msg.version, MULTIFD_VERSION);
-        return -1;
-    }
-
-    if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
-        char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
-        char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
-
-        error_setg(errp, "multifd: received uuid '%s' and expected "
-                   "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
-        g_free(uuid);
-        g_free(msg_uuid);
-        return -1;
-    }
-
-    if (msg.id > migrate_multifd_channels()) {
-        error_setg(errp, "multifd: received channel version %u "
-                   "expected %u", msg.version, MULTIFD_VERSION);
-        return -1;
-    }
-
-    return msg.id;
-}
-
-static MultiFDPages_t *multifd_pages_init(size_t size)
-{
-    MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
-
-    pages->allocated = size;
-    pages->offset = g_new0(ram_addr_t, size);
-
-    return pages;
-}
-
-static void multifd_pages_clear(MultiFDPages_t *pages)
-{
-    pages->num = 0;
-    pages->allocated = 0;
-    pages->packet_num = 0;
-    pages->block = NULL;
-    g_free(pages->offset);
-    pages->offset = NULL;
-    g_free(pages);
-}
-
-static void multifd_send_fill_packet(MultiFDSendParams *p)
-{
-    MultiFDPacket_t *packet = p->packet;
-    int i;
-
-    packet->flags = cpu_to_be32(p->flags);
-    packet->pages_alloc = cpu_to_be32(p->pages->allocated);
-    packet->normal_pages = cpu_to_be32(p->normal_num);
-    packet->next_packet_size = cpu_to_be32(p->next_packet_size);
-    packet->packet_num = cpu_to_be64(p->packet_num);
-
-    if (p->pages->block) {
-        strncpy(packet->ramblock, p->pages->block->idstr, 256);
-    }
-
-    for (i = 0; i < p->normal_num; i++) {
-        /* there are architectures where ram_addr_t is 32 bit */
-        uint64_t temp = p->normal[i];
-
-        packet->offset[i] = cpu_to_be64(temp);
-    }
-}
-
-static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
-{
-    MultiFDPacket_t *packet = p->packet;
-    RAMBlock *block;
-    int i;
-
-    packet->magic = be32_to_cpu(packet->magic);
-    if (packet->magic != MULTIFD_MAGIC) {
-        error_setg(errp, "multifd: received packet "
-                   "magic %x and expected magic %x",
-                   packet->magic, MULTIFD_MAGIC);
-        return -1;
-    }
-
-    packet->version = be32_to_cpu(packet->version);
-    if (packet->version != MULTIFD_VERSION) {
-        error_setg(errp, "multifd: received packet "
-                   "version %u and expected version %u",
-                   packet->version, MULTIFD_VERSION);
-        return -1;
-    }
-
-    p->flags = be32_to_cpu(packet->flags);
-
-    packet->pages_alloc = be32_to_cpu(packet->pages_alloc);
-    /*
-     * If we received a packet that is 100 times bigger than expected
-     * just stop migration.  It is a magic number.
-     */
-    if (packet->pages_alloc > p->page_count) {
-        error_setg(errp, "multifd: received packet "
-                   "with size %u and expected a size of %u",
-                   packet->pages_alloc, p->page_count) ;
-        return -1;
-    }
-
-    p->normal_num = be32_to_cpu(packet->normal_pages);
-    if (p->normal_num > packet->pages_alloc) {
-        error_setg(errp, "multifd: received packet "
-                   "with %u pages and expected maximum pages are %u",
-                   p->normal_num, packet->pages_alloc) ;
-        return -1;
-    }
-
-    p->next_packet_size = be32_to_cpu(packet->next_packet_size);
-    p->packet_num = be64_to_cpu(packet->packet_num);
-
-    if (p->normal_num == 0) {
-        return 0;
-    }
-
-    /* make sure that ramblock is 0 terminated */
-    packet->ramblock[255] = 0;
-    block = qemu_ram_block_by_name(packet->ramblock);
-    if (!block) {
-        error_setg(errp, "multifd: unknown ram block %s",
-                   packet->ramblock);
-        return -1;
-    }
-
-    p->host = block->host;
-    for (i = 0; i < p->normal_num; i++) {
-        uint64_t offset = be64_to_cpu(packet->offset[i]);
-
-        if (offset > (block->used_length - p->page_size)) {
-            error_setg(errp, "multifd: offset too long %" PRIu64
-                       " (max " RAM_ADDR_FMT ")",
-                       offset, block->used_length);
-            return -1;
-        }
-        p->normal[i] = offset;
-    }
-
-    return 0;
-}
-
-struct {
-    MultiFDSendParams *params;
-    /* array of pages to sent */
-    MultiFDPages_t *pages;
-    /* global number of generated multifd packets */
-    uint64_t packet_num;
-    /* send channels ready */
-    QemuSemaphore channels_ready;
-    /*
-     * Have we already run terminate threads.  There is a race when it
-     * happens that we got one error while we are exiting.
-     * We will use atomic operations.  Only valid values are 0 and 1.
-     */
-    int exiting;
-    /* multifd ops */
-    MultiFDMethods *ops;
-} *multifd_send_state;
-
-/*
- * How we use multifd_send_state->pages and channel->pages?
- *
- * We create a pages for each channel, and a main one.  Each time that
- * we need to send a batch of pages we interchange the ones between
- * multifd_send_state and the channel that is sending it.  There are
- * two reasons for that:
- *    - to not have to do so many mallocs during migration
- *    - to make easier to know what to free at the end of migration
- *
- * This way we always know who is the owner of each "pages" struct,
- * and we don't need any locking.  It belongs to the migration thread
- * or to the channel thread.  Switching is safe because the migration
- * thread is using the channel mutex when changing it, and the channel
- * have to had finish with its own, otherwise pending_job can't be
- * false.
- */
-
-static int multifd_send_pages(QEMUFile *f)
-{
-    int i;
-    static int next_channel;
-    MultiFDSendParams *p = NULL; /* make happy gcc */
-    MultiFDPages_t *pages = multifd_send_state->pages;
-    uint64_t transferred;
-
-    if (qatomic_read(&multifd_send_state->exiting)) {
-        return -1;
-    }
-
-    qemu_sem_wait(&multifd_send_state->channels_ready);
-    /*
-     * next_channel can remain from a previous migration that was
-     * using more channels, so ensure it doesn't overflow if the
-     * limit is lower now.
-     */
-    next_channel %= migrate_multifd_channels();
-    for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
-        p = &multifd_send_state->params[i];
-
-        qemu_mutex_lock(&p->mutex);
-        if (p->quit) {
-            error_report("%s: channel %d has already quit!", __func__, i);
-            qemu_mutex_unlock(&p->mutex);
-            return -1;
-        }
-        if (!p->pending_job) {
-            p->pending_job++;
-            next_channel = (i + 1) % migrate_multifd_channels();
-            break;
-        }
-        qemu_mutex_unlock(&p->mutex);
-    }
-    assert(!p->pages->num);
-    assert(!p->pages->block);
-
-    p->packet_num = multifd_send_state->packet_num++;
-    multifd_send_state->pages = p->pages;
-    p->pages = pages;
-    transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len;
-    qemu_file_acct_rate_limit(f, transferred);
-    ram_counters.multifd_bytes += transferred;
-    stat64_add(&ram_atomic_counters.transferred, transferred);
-    qemu_mutex_unlock(&p->mutex);
-    qemu_sem_post(&p->sem);
-
-    return 1;
-}
-
-int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
-{
-    MultiFDPages_t *pages = multifd_send_state->pages;
-    bool changed = false;
-
-    if (!pages->block) {
-        pages->block = block;
-    }
-
-    if (pages->block == block) {
-        pages->offset[pages->num] = offset;
-        pages->num++;
-
-        if (pages->num < pages->allocated) {
-            return 1;
-        }
-    } else {
-        changed = true;
-    }
-
-    if (multifd_send_pages(f) < 0) {
-        return -1;
-    }
-
-    if (changed) {
-        return multifd_queue_page(f, block, offset);
-    }
-
-    return 1;
-}
-
-static void multifd_send_terminate_threads(Error *err)
-{
-    int i;
-
-    trace_multifd_send_terminate_threads(err != NULL);
-
-    if (err) {
-        MigrationState *s = migrate_get_current();
-        migrate_set_error(s, err);
-        if (s->state == MIGRATION_STATUS_SETUP ||
-            s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
-            s->state == MIGRATION_STATUS_DEVICE ||
-            s->state == MIGRATION_STATUS_ACTIVE) {
-            migrate_set_state(&s->state, s->state,
-                              MIGRATION_STATUS_FAILED);
-        }
-    }
-
-    /*
-     * We don't want to exit each threads twice.  Depending on where
-     * we get the error, or if there are two independent errors in two
-     * threads at the same time, we can end calling this function
-     * twice.
-     */
-    if (qatomic_xchg(&multifd_send_state->exiting, 1)) {
-        return;
-    }
-
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-
-        qemu_mutex_lock(&p->mutex);
-        p->quit = true;
-        qemu_sem_post(&p->sem);
-        if (p->c) {
-            qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
-        }
-        qemu_mutex_unlock(&p->mutex);
-    }
-}
-
-void multifd_save_cleanup(void)
-{
-    int i;
-
-    if (!migrate_use_multifd() || !migrate_multi_channels_is_allowed()) {
-        return;
-    }
-    multifd_send_terminate_threads(NULL);
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-
-        if (p->running) {
-            qemu_thread_join(&p->thread);
-        }
-    }
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-        Error *local_err = NULL;
-
-        if (p->registered_yank) {
-            migration_ioc_unregister_yank(p->c);
-        }
-        socket_send_channel_destroy(p->c);
-        p->c = NULL;
-        qemu_mutex_destroy(&p->mutex);
-        qemu_sem_destroy(&p->sem);
-        qemu_sem_destroy(&p->sem_sync);
-        g_free(p->name);
-        p->name = NULL;
-        multifd_pages_clear(p->pages);
-        p->pages = NULL;
-        p->packet_len = 0;
-        g_free(p->packet);
-        p->packet = NULL;
-        g_free(p->iov);
-        p->iov = NULL;
-        g_free(p->normal);
-        p->normal = NULL;
-        multifd_send_state->ops->send_cleanup(p, &local_err);
-        if (local_err) {
-            migrate_set_error(migrate_get_current(), local_err);
-            error_free(local_err);
-        }
-    }
-    qemu_sem_destroy(&multifd_send_state->channels_ready);
-    g_free(multifd_send_state->params);
-    multifd_send_state->params = NULL;
-    multifd_pages_clear(multifd_send_state->pages);
-    multifd_send_state->pages = NULL;
-    g_free(multifd_send_state);
-    multifd_send_state = NULL;
-}
-
-static int multifd_zero_copy_flush(QIOChannel *c)
-{
-    int ret;
-    Error *err = NULL;
-
-    ret = qio_channel_flush(c, &err);
-    if (ret < 0) {
-        error_report_err(err);
-        return -1;
-    }
-    if (ret == 1) {
-        dirty_sync_missed_zero_copy();
-    }
-
-    return ret;
-}
-
-int multifd_send_sync_main(QEMUFile *f)
-{
-    int i;
-    bool flush_zero_copy;
-
-    if (!migrate_use_multifd()) {
-        return 0;
-    }
-    if (multifd_send_state->pages->num) {
-        if (multifd_send_pages(f) < 0) {
-            error_report("%s: multifd_send_pages fail", __func__);
-            return -1;
-        }
-    }
-
-    /*
-     * When using zero-copy, it's necessary to flush the pages before any of
-     * the pages can be sent again, so we'll make sure the new version of the
-     * pages will always arrive _later_ than the old pages.
-     *
-     * Currently we achieve this by flushing the zero-page requested writes
-     * per ram iteration, but in the future we could potentially optimize it
-     * to be less frequent, e.g. only after we finished one whole scanning of
-     * all the dirty bitmaps.
-     */
-
-    flush_zero_copy = migrate_use_zero_copy_send();
-
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-
-        trace_multifd_send_sync_main_signal(p->id);
-
-        qemu_mutex_lock(&p->mutex);
-
-        if (p->quit) {
-            error_report("%s: channel %d has already quit", __func__, i);
-            qemu_mutex_unlock(&p->mutex);
-            return -1;
-        }
-
-        p->packet_num = multifd_send_state->packet_num++;
-        p->flags |= MULTIFD_FLAG_SYNC;
-        p->pending_job++;
-        qemu_file_acct_rate_limit(f, p->packet_len);
-        ram_counters.multifd_bytes += p->packet_len;
-        stat64_add(&ram_atomic_counters.transferred, p->packet_len);
-        qemu_mutex_unlock(&p->mutex);
-        qemu_sem_post(&p->sem);
-
-        if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) {
-            return -1;
-        }
-    }
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-
-        trace_multifd_send_sync_main_wait(p->id);
-        qemu_sem_wait(&p->sem_sync);
-    }
-    trace_multifd_send_sync_main(multifd_send_state->packet_num);
-
-    return 0;
-}
-
-static void *multifd_send_thread(void *opaque)
-{
-    MultiFDSendParams *p = opaque;
-    Error *local_err = NULL;
-    int ret = 0;
-    bool use_zero_copy_send = migrate_use_zero_copy_send();
-
-    trace_multifd_send_thread_start(p->id);
-    rcu_register_thread();
-
-    if (multifd_send_initial_packet(p, &local_err) < 0) {
-        ret = -1;
-        goto out;
-    }
-    /* initial packet */
-    p->num_packets = 1;
-
-    while (true) {
-        qemu_sem_wait(&p->sem);
-
-        if (qatomic_read(&multifd_send_state->exiting)) {
-            break;
-        }
-        qemu_mutex_lock(&p->mutex);
-
-        if (p->pending_job) {
-            uint64_t packet_num = p->packet_num;
-            uint32_t flags = p->flags;
-            p->normal_num = 0;
-
-            if (use_zero_copy_send) {
-                p->iovs_num = 0;
-            } else {
-                p->iovs_num = 1;
-            }
-
-            for (int i = 0; i < p->pages->num; i++) {
-                p->normal[p->normal_num] = p->pages->offset[i];
-                p->normal_num++;
-            }
-
-            if (p->normal_num) {
-                ret = multifd_send_state->ops->send_prepare(p, &local_err);
-                if (ret != 0) {
-                    qemu_mutex_unlock(&p->mutex);
-                    break;
-                }
-            }
-            multifd_send_fill_packet(p);
-            p->flags = 0;
-            p->num_packets++;
-            p->total_normal_pages += p->normal_num;
-            p->pages->num = 0;
-            p->pages->block = NULL;
-            qemu_mutex_unlock(&p->mutex);
-
-            trace_multifd_send(p->id, packet_num, p->normal_num, flags,
-                               p->next_packet_size);
-
-            if (use_zero_copy_send) {
-                /* Send header first, without zerocopy */
-                ret = qio_channel_write_all(p->c, (void *)p->packet,
-                                            p->packet_len, &local_err);
-                if (ret != 0) {
-                    break;
-                }
-            } else {
-                /* Send header using the same writev call */
-                p->iov[0].iov_len = p->packet_len;
-                p->iov[0].iov_base = p->packet;
-            }
-
-            ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL,
-                                              0, p->write_flags, &local_err);
-            if (ret != 0) {
-                break;
-            }
-
-            qemu_mutex_lock(&p->mutex);
-            p->pending_job--;
-            qemu_mutex_unlock(&p->mutex);
-
-            if (flags & MULTIFD_FLAG_SYNC) {
-                qemu_sem_post(&p->sem_sync);
-            }
-            qemu_sem_post(&multifd_send_state->channels_ready);
-        } else if (p->quit) {
-            qemu_mutex_unlock(&p->mutex);
-            break;
-        } else {
-            qemu_mutex_unlock(&p->mutex);
-            /* sometimes there are spurious wakeups */
-        }
-    }
-
-out:
-    if (local_err) {
-        trace_multifd_send_error(p->id);
-        multifd_send_terminate_threads(local_err);
-        error_free(local_err);
-    }
-
-    /*
-     * Error happen, I will exit, but I can't just leave, tell
-     * who pay attention to me.
-     */
-    if (ret != 0) {
-        qemu_sem_post(&p->sem_sync);
-        qemu_sem_post(&multifd_send_state->channels_ready);
-    }
-
-    qemu_mutex_lock(&p->mutex);
-    p->running = false;
-    qemu_mutex_unlock(&p->mutex);
-
-    rcu_unregister_thread();
-    trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages);
-
-    return NULL;
-}
-
-static bool multifd_channel_connect(MultiFDSendParams *p,
-                                    QIOChannel *ioc,
-                                    Error *error);
-
-static void multifd_tls_outgoing_handshake(QIOTask *task,
-                                           gpointer opaque)
-{
-    MultiFDSendParams *p = opaque;
-    QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task));
-    Error *err = NULL;
-
-    if (qio_task_propagate_error(task, &err)) {
-        trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err));
-    } else {
-        trace_multifd_tls_outgoing_handshake_complete(ioc);
-    }
-
-    if (!multifd_channel_connect(p, ioc, err)) {
-        /*
-         * Error happen, mark multifd_send_thread status as 'quit' although it
-         * is not created, and then tell who pay attention to me.
-         */
-        p->quit = true;
-        qemu_sem_post(&multifd_send_state->channels_ready);
-        qemu_sem_post(&p->sem_sync);
-    }
-}
-
-static void *multifd_tls_handshake_thread(void *opaque)
-{
-    MultiFDSendParams *p = opaque;
-    QIOChannelTLS *tioc = QIO_CHANNEL_TLS(p->c);
-
-    qio_channel_tls_handshake(tioc,
-                              multifd_tls_outgoing_handshake,
-                              p,
-                              NULL,
-                              NULL);
-    return NULL;
-}
-
-static void multifd_tls_channel_connect(MultiFDSendParams *p,
-                                        QIOChannel *ioc,
-                                        Error **errp)
-{
-    MigrationState *s = migrate_get_current();
-    const char *hostname = s->hostname;
-    QIOChannelTLS *tioc;
-
-    tioc = migration_tls_client_create(s, ioc, hostname, errp);
-    if (!tioc) {
-        return;
-    }
-
-    object_unref(OBJECT(ioc));
-    trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname);
-    qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing");
-    p->c = QIO_CHANNEL(tioc);
-    qemu_thread_create(&p->thread, "multifd-tls-handshake-worker",
-                       multifd_tls_handshake_thread, p,
-                       QEMU_THREAD_JOINABLE);
-}
-
-static bool multifd_channel_connect(MultiFDSendParams *p,
-                                    QIOChannel *ioc,
-                                    Error *error)
-{
-    trace_multifd_set_outgoing_channel(
-        ioc, object_get_typename(OBJECT(ioc)),
-        migrate_get_current()->hostname, error);
-
-    if (!error) {
-        if (migrate_channel_requires_tls_upgrade(ioc)) {
-            multifd_tls_channel_connect(p, ioc, &error);
-            if (!error) {
-                /*
-                 * tls_channel_connect will call back to this
-                 * function after the TLS handshake,
-                 * so we mustn't call multifd_send_thread until then
-                 */
-                return true;
-            } else {
-                return false;
-            }
-        } else {
-            migration_ioc_register_yank(ioc);
-            p->registered_yank = true;
-            p->c = ioc;
-            qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
-                                   QEMU_THREAD_JOINABLE);
-       }
-       return true;
-    }
-
-    return false;
-}
-
-static void multifd_new_send_channel_cleanup(MultiFDSendParams *p,
-                                             QIOChannel *ioc, Error *err)
-{
-     migrate_set_error(migrate_get_current(), err);
-     /* Error happen, we need to tell who pay attention to me */
-     qemu_sem_post(&multifd_send_state->channels_ready);
-     qemu_sem_post(&p->sem_sync);
-     /*
-      * Although multifd_send_thread is not created, but main migration
-      * thread neet to judge whether it is running, so we need to mark
-      * its status.
-      */
-     p->quit = true;
-     object_unref(OBJECT(ioc));
-     error_free(err);
-}
-
-static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
-{
-    MultiFDSendParams *p = opaque;
-    QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
-    Error *local_err = NULL;
-
-    trace_multifd_new_send_channel_async(p->id);
-    if (qio_task_propagate_error(task, &local_err)) {
-        goto cleanup;
-    } else {
-        p->c = QIO_CHANNEL(sioc);
-        qio_channel_set_delay(p->c, false);
-        p->running = true;
-        if (!multifd_channel_connect(p, sioc, local_err)) {
-            goto cleanup;
-        }
-        return;
-    }
-
-cleanup:
-    multifd_new_send_channel_cleanup(p, sioc, local_err);
-}
-
-int multifd_save_setup(Error **errp)
-{
-    int thread_count;
-    uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
-    uint8_t i;
-
-    if (!migrate_use_multifd()) {
-        return 0;
-    }
-    if (!migrate_multi_channels_is_allowed()) {
-        error_setg(errp, "multifd is not supported by current protocol");
-        return -1;
-    }
-
-    thread_count = migrate_multifd_channels();
-    multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
-    multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
-    multifd_send_state->pages = multifd_pages_init(page_count);
-    qemu_sem_init(&multifd_send_state->channels_ready, 0);
-    qatomic_set(&multifd_send_state->exiting, 0);
-    multifd_send_state->ops = multifd_ops[migrate_multifd_compression()];
-
-    for (i = 0; i < thread_count; i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-
-        qemu_mutex_init(&p->mutex);
-        qemu_sem_init(&p->sem, 0);
-        qemu_sem_init(&p->sem_sync, 0);
-        p->quit = false;
-        p->pending_job = 0;
-        p->id = i;
-        p->pages = multifd_pages_init(page_count);
-        p->packet_len = sizeof(MultiFDPacket_t)
-                      + sizeof(uint64_t) * page_count;
-        p->packet = g_malloc0(p->packet_len);
-        p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
-        p->packet->version = cpu_to_be32(MULTIFD_VERSION);
-        p->name = g_strdup_printf("multifdsend_%d", i);
-        /* We need one extra place for the packet header */
-        p->iov = g_new0(struct iovec, page_count + 1);
-        p->normal = g_new0(ram_addr_t, page_count);
-        p->page_size = qemu_target_page_size();
-        p->page_count = page_count;
-
-        if (migrate_use_zero_copy_send()) {
-            p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
-        } else {
-            p->write_flags = 0;
-        }
-
-        socket_send_channel_create(multifd_new_send_channel_async, p);
-    }
-
-    for (i = 0; i < thread_count; i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-        Error *local_err = NULL;
-        int ret;
-
-        ret = multifd_send_state->ops->send_setup(p, &local_err);
-        if (ret) {
-            error_propagate(errp, local_err);
-            return ret;
-        }
-    }
-    return 0;
-}
-
-struct {
-    MultiFDRecvParams *params;
-    /* number of created threads */
-    int count;
-    /* syncs main thread and channels */
-    QemuSemaphore sem_sync;
-    /* global number of generated multifd packets */
-    uint64_t packet_num;
-    /* multifd ops */
-    MultiFDMethods *ops;
-} *multifd_recv_state;
-
-static void multifd_recv_terminate_threads(Error *err)
-{
-    int i;
-
-    trace_multifd_recv_terminate_threads(err != NULL);
-
-    if (err) {
-        MigrationState *s = migrate_get_current();
-        migrate_set_error(s, err);
-        if (s->state == MIGRATION_STATUS_SETUP ||
-            s->state == MIGRATION_STATUS_ACTIVE) {
-            migrate_set_state(&s->state, s->state,
-                              MIGRATION_STATUS_FAILED);
-        }
-    }
-
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-
-        qemu_mutex_lock(&p->mutex);
-        p->quit = true;
-        /*
-         * We could arrive here for two reasons:
-         *  - normal quit, i.e. everything went fine, just finished
-         *  - error quit: We close the channels so the channel threads
-         *    finish the qio_channel_read_all_eof()
-         */
-        if (p->c) {
-            qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
-        }
-        qemu_mutex_unlock(&p->mutex);
-    }
-}
-
-int multifd_load_cleanup(Error **errp)
-{
-    int i;
-
-    if (!migrate_use_multifd() || !migrate_multi_channels_is_allowed()) {
-        return 0;
-    }
-    multifd_recv_terminate_threads(NULL);
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-
-        if (p->running) {
-            p->quit = true;
-            /*
-             * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
-             * however try to wakeup it without harm in cleanup phase.
-             */
-            qemu_sem_post(&p->sem_sync);
-            qemu_thread_join(&p->thread);
-        }
-    }
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-
-        migration_ioc_unregister_yank(p->c);
-        object_unref(OBJECT(p->c));
-        p->c = NULL;
-        qemu_mutex_destroy(&p->mutex);
-        qemu_sem_destroy(&p->sem_sync);
-        g_free(p->name);
-        p->name = NULL;
-        p->packet_len = 0;
-        g_free(p->packet);
-        p->packet = NULL;
-        g_free(p->iov);
-        p->iov = NULL;
-        g_free(p->normal);
-        p->normal = NULL;
-        multifd_recv_state->ops->recv_cleanup(p);
-    }
-    qemu_sem_destroy(&multifd_recv_state->sem_sync);
-    g_free(multifd_recv_state->params);
-    multifd_recv_state->params = NULL;
-    g_free(multifd_recv_state);
-    multifd_recv_state = NULL;
-
-    return 0;
-}
-
-void multifd_recv_sync_main(void)
-{
-    int i;
-
-    if (!migrate_use_multifd()) {
-        return;
-    }
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-
-        trace_multifd_recv_sync_main_wait(p->id);
-        qemu_sem_wait(&multifd_recv_state->sem_sync);
-    }
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-
-        WITH_QEMU_LOCK_GUARD(&p->mutex) {
-            if (multifd_recv_state->packet_num < p->packet_num) {
-                multifd_recv_state->packet_num = p->packet_num;
-            }
-        }
-        trace_multifd_recv_sync_main_signal(p->id);
-        qemu_sem_post(&p->sem_sync);
-    }
-    trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
-}
-
-static void *multifd_recv_thread(void *opaque)
-{
-    MultiFDRecvParams *p = opaque;
-    Error *local_err = NULL;
-    int ret;
-
-    trace_multifd_recv_thread_start(p->id);
-    rcu_register_thread();
-
-    while (true) {
-        uint32_t flags;
-
-        if (p->quit) {
-            break;
-        }
-
-        ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
-                                       p->packet_len, &local_err);
-        if (ret == 0) {   /* EOF */
-            break;
-        }
-        if (ret == -1) {   /* Error */
-            break;
-        }
-
-        qemu_mutex_lock(&p->mutex);
-        ret = multifd_recv_unfill_packet(p, &local_err);
-        if (ret) {
-            qemu_mutex_unlock(&p->mutex);
-            break;
-        }
-
-        flags = p->flags;
-        /* recv methods don't know how to handle the SYNC flag */
-        p->flags &= ~MULTIFD_FLAG_SYNC;
-        trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags,
-                           p->next_packet_size);
-        p->num_packets++;
-        p->total_normal_pages += p->normal_num;
-        qemu_mutex_unlock(&p->mutex);
-
-        if (p->normal_num) {
-            ret = multifd_recv_state->ops->recv_pages(p, &local_err);
-            if (ret != 0) {
-                break;
-            }
-        }
-
-        if (flags & MULTIFD_FLAG_SYNC) {
-            qemu_sem_post(&multifd_recv_state->sem_sync);
-            qemu_sem_wait(&p->sem_sync);
-        }
-    }
-
-    if (local_err) {
-        multifd_recv_terminate_threads(local_err);
-        error_free(local_err);
-    }
-    qemu_mutex_lock(&p->mutex);
-    p->running = false;
-    qemu_mutex_unlock(&p->mutex);
-
-    rcu_unregister_thread();
-    trace_multifd_recv_thread_end(p->id, p->num_packets, p->total_normal_pages);
-
-    return NULL;
-}
-
-int multifd_load_setup(Error **errp)
-{
-    int thread_count;
-    uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
-    uint8_t i;
-
-    /*
-     * Return successfully if multiFD recv state is already initialised
-     * or multiFD is not enabled.
-     */
-    if (multifd_recv_state || !migrate_use_multifd()) {
-        return 0;
-    }
-
-    if (!migrate_multi_channels_is_allowed()) {
-        error_setg(errp, "multifd is not supported by current protocol");
-        return -1;
-    }
-    thread_count = migrate_multifd_channels();
-    multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
-    multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
-    qatomic_set(&multifd_recv_state->count, 0);
-    qemu_sem_init(&multifd_recv_state->sem_sync, 0);
-    multifd_recv_state->ops = multifd_ops[migrate_multifd_compression()];
-
-    for (i = 0; i < thread_count; i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-
-        qemu_mutex_init(&p->mutex);
-        qemu_sem_init(&p->sem_sync, 0);
-        p->quit = false;
-        p->id = i;
-        p->packet_len = sizeof(MultiFDPacket_t)
-                      + sizeof(uint64_t) * page_count;
-        p->packet = g_malloc0(p->packet_len);
-        p->name = g_strdup_printf("multifdrecv_%d", i);
-        p->iov = g_new0(struct iovec, page_count);
-        p->normal = g_new0(ram_addr_t, page_count);
-        p->page_count = page_count;
-        p->page_size = qemu_target_page_size();
-    }
-
-    for (i = 0; i < thread_count; i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-        Error *local_err = NULL;
-        int ret;
-
-        ret = multifd_recv_state->ops->recv_setup(p, &local_err);
-        if (ret) {
-            error_propagate(errp, local_err);
-            return ret;
-        }
-    }
-    return 0;
-}
-
-bool multifd_recv_all_channels_created(void)
-{
-    int thread_count = migrate_multifd_channels();
-
-    if (!migrate_use_multifd()) {
-        return true;
-    }
-
-    if (!multifd_recv_state) {
-        /* Called before any connections created */
-        return false;
-    }
-
-    return thread_count == qatomic_read(&multifd_recv_state->count);
-}
-
-/*
- * Try to receive all multifd channels to get ready for the migration.
- * Sets @errp when failing to receive the current channel.
- */
-void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
-{
-    MultiFDRecvParams *p;
-    Error *local_err = NULL;
-    int id;
-
-    id = multifd_recv_initial_packet(ioc, &local_err);
-    if (id < 0) {
-        multifd_recv_terminate_threads(local_err);
-        error_propagate_prepend(errp, local_err,
-                                "failed to receive packet"
-                                " via multifd channel %d: ",
-                                qatomic_read(&multifd_recv_state->count));
-        return;
-    }
-    trace_multifd_recv_new_channel(id);
-
-    p = &multifd_recv_state->params[id];
-    if (p->c != NULL) {
-        error_setg(&local_err, "multifd: received id '%d' already setup'",
-                   id);
-        multifd_recv_terminate_threads(local_err);
-        error_propagate(errp, local_err);
-        return;
-    }
-    p->c = ioc;
-    object_ref(OBJECT(ioc));
-    /* initial packet */
-    p->num_packets = 1;
-
-    p->running = true;
-    qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
-                       QEMU_THREAD_JOINABLE);
-    qatomic_inc(&multifd_recv_state->count);
-}
index ff3aa2e2e9605707dd141bbfc00cb8ccc2b04a03..7cfc2651487a3a38e8955db1cc7953aa0a7563fa 100644 (file)
@@ -16,7 +16,8 @@
 int multifd_save_setup(Error **errp);
 void multifd_save_cleanup(void);
 int multifd_load_setup(Error **errp);
-int multifd_load_cleanup(Error **errp);
+void multifd_load_cleanup(void);
+void multifd_load_shutdown(void);
 bool multifd_recv_all_channels_created(void);
 void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
 void multifd_recv_sync_main(void);
index 53299b7a5ebd7d8fae68eb069f07d14f73577bd5..f54f44d89988f6198c06ca5cf15136fbbb60ce84 100644 (file)
@@ -1197,6 +1197,11 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
     }
 
     if (migrate_postcopy_preempt()) {
+        /*
+         * The preempt channel is established in asynchronous way.  Wait
+         * for its completion.
+         */
+        qemu_sem_wait(&mis->postcopy_qemufile_dst_done);
         /*
          * This thread needs to be created after the temp pages because
          * it'll fetch RAM_CHANNEL_POSTCOPY PostcopyTmpPage immediately.
@@ -1544,6 +1549,7 @@ void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
      */
     qemu_file_set_blocking(file, true);
     mis->postcopy_qemufile_dst = file;
+    qemu_sem_post(&mis->postcopy_qemufile_dst_done);
     trace_postcopy_preempt_new_channel();
 }
 
@@ -1612,14 +1618,21 @@ out:
     postcopy_preempt_send_channel_done(s, ioc, local_err);
 }
 
-/* Returns 0 if channel established, -1 for error. */
-int postcopy_preempt_wait_channel(MigrationState *s)
+/*
+ * This function will kick off an async task to establish the preempt
+ * channel, and wait until the connection setup completed.  Returns 0 if
+ * channel established, -1 for error.
+ */
+int postcopy_preempt_establish_channel(MigrationState *s)
 {
     /* If preempt not enabled, no need to wait */
     if (!migrate_postcopy_preempt()) {
         return 0;
     }
 
+    /* Kick off async task to establish preempt channel */
+    postcopy_preempt_setup(s);
+
     /*
      * We need the postcopy preempt channel to be established before
      * starting doing anything.
@@ -1629,22 +1642,10 @@ int postcopy_preempt_wait_channel(MigrationState *s)
     return s->postcopy_qemufile_src ? 0 : -1;
 }
 
-int postcopy_preempt_setup(MigrationState *s, Error **errp)
+void postcopy_preempt_setup(MigrationState *s)
 {
-    if (!migrate_postcopy_preempt()) {
-        return 0;
-    }
-
-    if (!migrate_multi_channels_is_allowed()) {
-        error_setg(errp, "Postcopy preempt is not supported as current "
-                   "migration stream does not support multi-channels.");
-        return -1;
-    }
-
     /* Kick an async task to connect */
     socket_send_channel_create(postcopy_preempt_send_channel_new, s);
-
-    return 0;
 }
 
 static void postcopy_pause_ram_fast_load(MigrationIncomingState *mis)
index 25881c41276259ee9dcd13e3e4199cc53d7c0fe5..b4867a32d573d07f9c4e2aab94d5a367644e0934 100644 (file)
@@ -191,7 +191,7 @@ enum PostcopyChannels {
 };
 
 void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
-int postcopy_preempt_setup(MigrationState *s, Error **errp);
-int postcopy_preempt_wait_channel(MigrationState *s);
+void postcopy_preempt_setup(MigrationState *s);
+int postcopy_preempt_establish_channel(MigrationState *s);
 
 #endif
index 2d5f74ffc22690197500f4b86cbd03ca280c63be..102ab3b4392c2f71aeb18ed5053bd95bfb56d81b 100644 (file)
@@ -940,3 +940,37 @@ QIOChannel *qemu_file_get_ioc(QEMUFile *file)
 {
     return file->ioc;
 }
+
+/*
+ * Read size bytes from QEMUFile f and write them to fd.
+ */
+int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size)
+{
+    while (size) {
+        size_t pending = f->buf_size - f->buf_index;
+        ssize_t rc;
+
+        if (!pending) {
+            rc = qemu_fill_buffer(f);
+            if (rc < 0) {
+                return rc;
+            }
+            if (rc == 0) {
+                return -EIO;
+            }
+            continue;
+        }
+
+        rc = write(fd, f->buf + f->buf_index, MIN(pending, size));
+        if (rc < 0) {
+            return -errno;
+        }
+        if (rc == 0) {
+            return -EIO;
+        }
+        f->buf_index += rc;
+        size -= rc;
+    }
+
+    return 0;
+}
index fa13d04d787ca8d3c3b46a90bbad097990e96d57..9d0155a2a12e10808b587319972b9d1af9dec1d6 100644 (file)
@@ -148,6 +148,7 @@ int qemu_file_shutdown(QEMUFile *f);
 QEMUFile *qemu_file_get_return_path(QEMUFile *f);
 void qemu_fflush(QEMUFile *f);
 void qemu_file_set_blocking(QEMUFile *f, bool block);
+int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size);
 
 void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
 void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
index b966e148c228533fbcaeef452198e9df930a3716..96e8a19a583879d613e7a12d2d38966c117d2e24 100644 (file)
 /***********************************************************/
 /* ram save/restore */
 
-/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
- * worked for pages that where filled with the same char.  We switched
+/*
+ * RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
+ * worked for pages that were filled with the same char.  We switched
  * it to only search for the zero value.  And to avoid confusion with
- * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
+ * RAM_SAVE_FLAG_COMPRESS_PAGE just rename it.
  */
-
-#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
+/*
+ * RAM_SAVE_FLAG_FULL was obsoleted in 2009, it can be reused now
+ */
+#define RAM_SAVE_FLAG_FULL     0x01
 #define RAM_SAVE_FLAG_ZERO     0x02
 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
 #define RAM_SAVE_FLAG_PAGE     0x08
 #define RAM_SAVE_FLAG_EOS      0x10
 #define RAM_SAVE_FLAG_CONTINUE 0x20
 #define RAM_SAVE_FLAG_XBZRLE   0x40
-/* 0x80 is reserved in migration.h start with 0x100 next */
+/* 0x80 is reserved in qemu-file.h for RAM_SAVE_FLAG_HOOK */
 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
+/* We can't use any flag that is bigger than 0x200 */
+
+int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
+     uint8_t *, int) = xbzrle_encode_buffer;
+#if defined(CONFIG_AVX512BW_OPT)
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+    unsigned max = __get_cpuid_max(0, NULL);
+    int a, b, c, d;
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+         /* We must check that AVX is not just available, but usable.  */
+        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+            int bv;
+            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+            __cpuid_count(7, 0, a, b, c, d);
+           /* 0xe6:
+            *  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+            *                    and ZMM16-ZMM31 state are enabled by OS)
+            *  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+            */
+            if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+                xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
+            }
+        }
+    }
+}
+#endif
 
 XBZRLECacheStats xbzrle_counters;
 
@@ -330,6 +362,8 @@ struct RAMState {
     PageSearchStatus pss[RAM_CHANNEL_MAX];
     /* UFFD file descriptor, used in 'write-tracking' migration */
     int uffdio_fd;
+    /* total ram size in bytes */
+    uint64_t ram_bytes_total;
     /* Last block that we have visited searching for dirty pages */
     RAMBlock *last_seen_block;
     /* Last dirty target page we have sent */
@@ -450,6 +484,13 @@ void dirty_sync_missed_zero_copy(void)
     ram_counters.dirty_sync_missed_zero_copy++;
 }
 
+struct MigrationOps {
+    int (*ram_save_target_page)(RAMState *rs, PageSearchStatus *pss);
+};
+typedef struct MigrationOps MigrationOps;
+
+MigrationOps *migration_ops;
+
 CompressionStats compression_counters;
 
 struct CompressParam {
@@ -797,9 +838,9 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss,
     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
 
     /* XBZRLE encoding (if there is no overflow) */
-    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
-                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
-                                       TARGET_PAGE_SIZE);
+    encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf,
+                                            TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+                                            TARGET_PAGE_SIZE);
 
     /*
      * Update the cache contents, so that it corresponds to the data
@@ -1546,17 +1587,23 @@ retry:
     return pages;
 }
 
+#define PAGE_ALL_CLEAN 0
+#define PAGE_TRY_AGAIN 1
+#define PAGE_DIRTY_FOUND 2
 /**
  * find_dirty_block: find the next dirty page and update any state
  * associated with the search process.
  *
- * Returns true if a page is found
+ * Returns:
+ *         PAGE_ALL_CLEAN: no dirty page found, give up
+ *         PAGE_TRY_AGAIN: no dirty page found, retry for next block
+ *         PAGE_DIRTY_FOUND: dirty page found
  *
  * @rs: current RAM state
  * @pss: data about the state of the current dirty page scan
  * @again: set to false if the search has scanned the whole of RAM
  */
-static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
+static int find_dirty_block(RAMState *rs, PageSearchStatus *pss)
 {
     /* Update pss->page for the next dirty bit in ramblock */
     pss_find_next_dirty(pss);
@@ -1567,8 +1614,7 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
          * We've been once around the RAM and haven't found anything.
          * Give up.
          */
-        *again = false;
-        return false;
+        return PAGE_ALL_CLEAN;
     }
     if (!offset_in_ramblock(pss->block,
                             ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
@@ -1597,13 +1643,10 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
             }
         }
         /* Didn't find anything this time, but try again on the new block */
-        *again = true;
-        return false;
+        return PAGE_TRY_AGAIN;
     } else {
-        /* Can go around again, but... */
-        *again = true;
-        /* We've found something so probably don't need to */
-        return true;
+        /* We've found something */
+        return PAGE_DIRTY_FOUND;
     }
 }
 
@@ -2291,14 +2334,14 @@ static bool save_compress_page(RAMState *rs, PageSearchStatus *pss,
 }
 
 /**
- * ram_save_target_page: save one target page
+ * ram_save_target_page_legacy: save one target page
  *
  * Returns the number of pages written
  *
  * @rs: current RAM state
  * @pss: data about the page we want to send
  */
-static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
+static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
 {
     RAMBlock *block = pss->block;
     ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
@@ -2424,7 +2467,7 @@ static int ram_save_host_page_urgent(PageSearchStatus *pss)
 
         if (page_dirty) {
             /* Be strict to return code; it must be 1, or what else? */
-            if (ram_save_target_page(rs, pss) != 1) {
+            if (migration_ops->ram_save_target_page(rs, pss) != 1) {
                 error_report_once("%s: ram_save_target_page failed", __func__);
                 ret = -1;
                 goto out;
@@ -2493,7 +2536,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
             if (preempt_active) {
                 qemu_mutex_unlock(&rs->bitmap_mutex);
             }
-            tmppages = ram_save_target_page(rs, pss);
+            tmppages = migration_ops->ram_save_target_page(rs, pss);
             if (tmppages >= 0) {
                 pages += tmppages;
                 /*
@@ -2542,10 +2585,9 @@ static int ram_find_and_save_block(RAMState *rs)
 {
     PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY];
     int pages = 0;
-    bool again, found;
 
     /* No dirty page as there is zero RAM */
-    if (!ram_bytes_total()) {
+    if (!rs->ram_bytes_total) {
         return pages;
     }
 
@@ -2563,19 +2605,23 @@ static int ram_find_and_save_block(RAMState *rs)
 
     pss_init(pss, rs->last_seen_block, rs->last_page);
 
-    do {
-        again = true;
-        found = get_queued_page(rs, pss);
-
-        if (!found) {
+    while (true){
+        if (!get_queued_page(rs, pss)) {
             /* priority queue empty, so just search for something dirty */
-            found = find_dirty_block(rs, pss, &again);
+            int res = find_dirty_block(rs, pss);
+            if (res != PAGE_DIRTY_FOUND) {
+                if (res == PAGE_ALL_CLEAN) {
+                    break;
+                } else if (res == PAGE_TRY_AGAIN) {
+                    continue;
+                }
+            }
         }
-
-        if (found) {
-            pages = ram_save_host_page(rs, pss);
+        pages = ram_save_host_page(rs, pss);
+        if (pages) {
+            break;
         }
-    } while (!pages && again);
+    }
 
     rs->last_seen_block = pss->block;
     rs->last_page = pss->page;
@@ -2596,28 +2642,30 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero)
     }
 }
 
-static uint64_t ram_bytes_total_common(bool count_ignored)
+static uint64_t ram_bytes_total_with_ignored(void)
 {
     RAMBlock *block;
     uint64_t total = 0;
 
     RCU_READ_LOCK_GUARD();
 
-    if (count_ignored) {
-        RAMBLOCK_FOREACH_MIGRATABLE(block) {
-            total += block->used_length;
-        }
-    } else {
-        RAMBLOCK_FOREACH_NOT_IGNORED(block) {
-            total += block->used_length;
-        }
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
+        total += block->used_length;
     }
     return total;
 }
 
 uint64_t ram_bytes_total(void)
 {
-    return ram_bytes_total_common(false);
+    RAMBlock *block;
+    uint64_t total = 0;
+
+    RCU_READ_LOCK_GUARD();
+
+    RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+        total += block->used_length;
+    }
+    return total;
 }
 
 static void xbzrle_load_setup(void)
@@ -2688,6 +2736,8 @@ static void ram_save_cleanup(void *opaque)
     xbzrle_cleanup();
     compress_threads_save_cleanup();
     ram_state_cleanup(rsp);
+    g_free(migration_ops);
+    migration_ops = NULL;
 }
 
 static void ram_state_reset(RAMState *rs)
@@ -3002,13 +3052,14 @@ static int ram_state_init(RAMState **rsp)
     qemu_mutex_init(&(*rsp)->bitmap_mutex);
     qemu_mutex_init(&(*rsp)->src_page_req_mutex);
     QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
+    (*rsp)->ram_bytes_total = ram_bytes_total();
 
     /*
      * Count the total number of pages used by ram blocks not including any
      * gaps due to alignment or unplugs.
      * This must match with the initial values of dirty bitmap.
      */
-    (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
+    (*rsp)->migration_dirty_pages = (*rsp)->ram_bytes_total >> TARGET_PAGE_BITS;
     ram_state_reset(*rsp);
 
     return 0;
@@ -3222,7 +3273,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     (*rsp)->pss[RAM_CHANNEL_PRECOPY].pss_channel = f;
 
     WITH_RCU_READ_LOCK_GUARD() {
-        qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
+        qemu_put_be64(f, ram_bytes_total_with_ignored()
+                         | RAM_SAVE_FLAG_MEM_SIZE);
 
         RAMBLOCK_FOREACH_MIGRATABLE(block) {
             qemu_put_byte(f, strlen(block->idstr));
@@ -3241,6 +3293,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
 
+    migration_ops = g_malloc0(sizeof(MigrationOps));
+    migration_ops->ram_save_target_page = ram_save_target_page_legacy;
     ret =  multifd_send_sync_main(f);
     if (ret < 0) {
         return ret;
@@ -3435,10 +3489,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
     return 0;
 }
 
-static void ram_state_pending_estimate(void *opaque,
-                                       uint64_t *res_precopy_only,
-                                       uint64_t *res_compatible,
-                                       uint64_t *res_postcopy_only)
+static void ram_state_pending_estimate(void *opaque, uint64_t *must_precopy,
+                                       uint64_t *can_postcopy)
 {
     RAMState **temp = opaque;
     RAMState *rs = *temp;
@@ -3447,16 +3499,14 @@ static void ram_state_pending_estimate(void *opaque,
 
     if (migrate_postcopy_ram()) {
         /* We can do postcopy, and all the data is postcopiable */
-        *res_postcopy_only += remaining_size;
+        *can_postcopy += remaining_size;
     } else {
-        *res_precopy_only += remaining_size;
+        *must_precopy += remaining_size;
     }
 }
 
-static void ram_state_pending_exact(void *opaque,
-                                    uint64_t *res_precopy_only,
-                                    uint64_t *res_compatible,
-                                    uint64_t *res_postcopy_only)
+static void ram_state_pending_exact(void *opaque, uint64_t *must_precopy,
+                                    uint64_t *can_postcopy)
 {
     RAMState **temp = opaque;
     RAMState *rs = *temp;
@@ -3474,9 +3524,9 @@ static void ram_state_pending_exact(void *opaque,
 
     if (migrate_postcopy_ram()) {
         /* We can do postcopy, and all the data is postcopiable */
-        *res_compatible += remaining_size;
+        *can_postcopy += remaining_size;
     } else {
-        *res_precopy_only += remaining_size;
+        *must_precopy += remaining_size;
     }
 }
 
index e9cf4999ade09ed5c3ff2af86c440b3137bdfd0e..aa54a67fdaa8f8b393301bcf847937f04e6e37d4 100644 (file)
@@ -1541,18 +1541,16 @@ flush:
  * the result is split into the amount for units that can and
  * for units that can't do postcopy.
  */
-void qemu_savevm_state_pending_estimate(uint64_t *res_precopy_only,
-                                        uint64_t *res_compatible,
-                                        uint64_t *res_postcopy_only)
+void qemu_savevm_state_pending_estimate(uint64_t *must_precopy,
+                                        uint64_t *can_postcopy)
 {
     SaveStateEntry *se;
 
-    *res_precopy_only = 0;
-    *res_compatible = 0;
-    *res_postcopy_only = 0;
+    *must_precopy = 0;
+    *can_postcopy = 0;
 
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
-        if (!se->ops || !se->ops->state_pending_exact) {
+        if (!se->ops || !se->ops->state_pending_estimate) {
             continue;
         }
         if (se->ops->is_active) {
@@ -1560,24 +1558,20 @@ void qemu_savevm_state_pending_estimate(uint64_t *res_precopy_only,
                 continue;
             }
         }
-        se->ops->state_pending_exact(se->opaque,
-                                     res_precopy_only, res_compatible,
-                                     res_postcopy_only);
+        se->ops->state_pending_estimate(se->opaque, must_precopy, can_postcopy);
     }
 }
 
-void qemu_savevm_state_pending_exact(uint64_t *res_precopy_only,
-                                     uint64_t *res_compatible,
-                                     uint64_t *res_postcopy_only)
+void qemu_savevm_state_pending_exact(uint64_t *must_precopy,
+                                     uint64_t *can_postcopy)
 {
     SaveStateEntry *se;
 
-    *res_precopy_only = 0;
-    *res_compatible = 0;
-    *res_postcopy_only = 0;
+    *must_precopy = 0;
+    *can_postcopy = 0;
 
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
-        if (!se->ops || !se->ops->state_pending_estimate) {
+        if (!se->ops || !se->ops->state_pending_exact) {
             continue;
         }
         if (se->ops->is_active) {
@@ -1585,9 +1579,7 @@ void qemu_savevm_state_pending_exact(uint64_t *res_precopy_only,
                 continue;
             }
         }
-        se->ops->state_pending_estimate(se->opaque,
-                                        res_precopy_only, res_compatible,
-                                        res_postcopy_only);
+        se->ops->state_pending_exact(se->opaque, must_precopy, can_postcopy);
     }
 }
 
@@ -2200,7 +2192,11 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
     qemu_sem_post(&mis->postcopy_pause_sem_fault);
 
     if (migrate_postcopy_preempt()) {
-        /* The channel should already be setup again; make sure of it */
+        /*
+         * The preempt channel will be created in async manner, now let's
+         * wait for it and make sure it's created.
+         */
+        qemu_sem_wait(&mis->postcopy_qemufile_dst_done);
         assert(mis->postcopy_qemufile_dst);
         /* Kick the fast ram load thread too */
         qemu_sem_post(&mis->postcopy_pause_sem_fast_load);
index b1901e68d5686b2ec2fde82fb7eff45efc4ed4bf..fb636735f0afa13f907459fa84878bbb842ddbe7 100644 (file)
@@ -40,12 +40,10 @@ void qemu_savevm_state_cleanup(void);
 void qemu_savevm_state_complete_postcopy(QEMUFile *f);
 int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
                                        bool inactivate_disks);
-void qemu_savevm_state_pending_exact(uint64_t *res_precopy_only,
-                                     uint64_t *res_compatible,
-                                     uint64_t *res_postcopy_only);
-void qemu_savevm_state_pending_estimate(uint64_t *res_precopy_only,
-                                        uint64_t *res_compatible,
-                                        uint64_t *res_postcopy_only);
+void qemu_savevm_state_pending_exact(uint64_t *must_precopy,
+                                     uint64_t *can_postcopy);
+void qemu_savevm_state_pending_estimate(uint64_t *must_precopy,
+                                        uint64_t *can_postcopy);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
 void qemu_savevm_send_open_return_path(QEMUFile *f);
 int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
index 67b65a70ff26eabfd7e832765b811fd41e446d96..92161eeac59625f9b208d34cbe6e5f6fcd902e74 100644 (file)
@@ -150,8 +150,8 @@ migrate_fd_cleanup(void) ""
 migrate_fd_error(const char *error_desc) "error=%s"
 migrate_fd_cancel(void) ""
 migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at 0x%zx len 0x%zx"
-migrate_pending_exact(uint64_t size, uint64_t pre, uint64_t compat, uint64_t post) "exact pending size %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
-migrate_pending_estimate(uint64_t size, uint64_t pre, uint64_t compat, uint64_t post) "estimate pending size %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
+migrate_pending_exact(uint64_t size, uint64_t pre, uint64_t post) "exact pending size %" PRIu64 " (pre = %" PRIu64 " post=%" PRIu64 ")"
+migrate_pending_estimate(uint64_t size, uint64_t pre, uint64_t post) "estimate pending size %" PRIu64 " (pre = %" PRIu64 " post=%" PRIu64 ")"
 migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d"
 migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi64
 migration_completion_file_err(void) ""
@@ -357,6 +357,7 @@ migration_block_flush_blks(const char *action, int submitted, int read_done, int
 migration_block_save(const char *mig_stage, int submitted, int transferred) "Enter save live %s submitted %d transferred %d"
 migration_block_save_complete(void) "Block migration completed"
 migration_block_state_pending(uint64_t pending) "Enter save live pending  %" PRIu64
+migration_block_progression(unsigned percent) "Completed %u%%"
 
 # page_cache.c
 migration_pagecache_init(int64_t max_num_items) "Setting cache buckets to %" PRId64
index 1ba482ded9c4ffcfb48fe774396e379bb7726284..05366e86c05a3d7e901d197fc406271f54f0eebb 100644 (file)
@@ -174,3 +174,127 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
 
     return d;
 }
+
+#if defined(CONFIG_AVX512BW_OPT)
+#pragma GCC push_options
+#pragma GCC target("avx512bw")
+#include <immintrin.h>
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+                             uint8_t *dst, int dlen)
+{
+    uint32_t zrun_len = 0, nzrun_len = 0;
+    int d = 0, i = 0, num = 0;
+    uint8_t *nzrun_start = NULL;
+    /* add 1 to include residual part in main loop */
+    uint32_t count512s = (slen >> 6) + 1;
+    /* countResidual is tail of data, i.e., countResidual = slen % 64 */
+    uint32_t count_residual = slen & 0b111111;
+    bool never_same = true;
+    uint64_t mask_residual = 1;
+    mask_residual <<= count_residual;
+    mask_residual -= 1;
+    __m512i r = _mm512_set1_epi32(0);
+
+    while (count512s) {
+        if (d + 2 > dlen) {
+            return -1;
+        }
+
+        int bytes_to_check = 64;
+        uint64_t mask = 0xffffffffffffffff;
+        if (count512s == 1) {
+            bytes_to_check = count_residual;
+            mask = mask_residual;
+        }
+        __m512i old_data = _mm512_mask_loadu_epi8(r,
+                                                  mask, old_buf + i);
+        __m512i new_data = _mm512_mask_loadu_epi8(r,
+                                                  mask, new_buf + i);
+        uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
+        count512s--;
+
+        bool is_same = (comp & 0x1);
+        while (bytes_to_check) {
+            if (is_same) {
+                if (nzrun_len) {
+                    d += uleb128_encode_small(dst + d, nzrun_len);
+                    if (d + nzrun_len > dlen) {
+                        return -1;
+                    }
+                    nzrun_start = new_buf + i - nzrun_len;
+                    memcpy(dst + d, nzrun_start, nzrun_len);
+                    d += nzrun_len;
+                    nzrun_len = 0;
+                }
+                /* 64 data at a time for speed */
+                if (count512s && (comp == 0xffffffffffffffff)) {
+                    i += 64;
+                    zrun_len += 64;
+                    break;
+                }
+                never_same = false;
+                num = __builtin_ctzll(~comp);
+                num = (num < bytes_to_check) ? num : bytes_to_check;
+                zrun_len += num;
+                bytes_to_check -= num;
+                comp >>= num;
+                i += num;
+                if (bytes_to_check) {
+                    /* still has different data after same data */
+                    d += uleb128_encode_small(dst + d, zrun_len);
+                    zrun_len = 0;
+                } else {
+                    break;
+                }
+            }
+            if (never_same || zrun_len) {
+                /*
+                 * never_same only acts if
+                 * data begins with diff in first count512s
+                 */
+                d += uleb128_encode_small(dst + d, zrun_len);
+                zrun_len = 0;
+                never_same = false;
+            }
+            /* has diff, 64 data at a time for speed */
+            if ((bytes_to_check == 64) && (comp == 0x0)) {
+                i += 64;
+                nzrun_len += 64;
+                break;
+            }
+            num = __builtin_ctzll(comp);
+            num = (num < bytes_to_check) ? num : bytes_to_check;
+            nzrun_len += num;
+            bytes_to_check -= num;
+            comp >>= num;
+            i += num;
+            if (bytes_to_check) {
+                /* mask like 111000 */
+                d += uleb128_encode_small(dst + d, nzrun_len);
+                /* overflow */
+                if (d + nzrun_len > dlen) {
+                    return -1;
+                }
+                nzrun_start = new_buf + i - nzrun_len;
+                memcpy(dst + d, nzrun_start, nzrun_len);
+                d += nzrun_len;
+                nzrun_len = 0;
+                is_same = true;
+            }
+        }
+    }
+
+    if (nzrun_len != 0) {
+        d += uleb128_encode_small(dst + d, nzrun_len);
+        /* overflow */
+        if (d + nzrun_len > dlen) {
+            return -1;
+        }
+        nzrun_start = new_buf + i - nzrun_len;
+        memcpy(dst + d, nzrun_start, nzrun_len);
+        d += nzrun_len;
+    }
+    return d;
+}
+#pragma GCC pop_options
+#endif
index a0db507b9cd9475277749de89a91d2cb98315c21..6feb49160adfff0fd1cdf52e16bf0d6952117954 100644 (file)
@@ -18,4 +18,8 @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
                          uint8_t *dst, int dlen);
 
 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
+#if defined(CONFIG_AVX512BW_OPT)
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+                                uint8_t *dst, int dlen);
+#endif
 #endif
index 53b2d32573b4640b640e0b3ffe979e892e9ac3e7..b5547cb917af98484f332db80462f9e2f9f411b4 100644 (file)
@@ -42,7 +42,7 @@
  */
 
 #define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
-#define BUFFER_SIZE 2048
+#define BUFFER_SIZE 16384
 #define IOVSIZE 2
 #define MAX_L2TPV3_MSGCNT 64
 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
index 251fc5ab55b92db5928951e9199e17cf5fc506df..ebc7ce023186763337e1519ec2e6923b97998fc4 100644 (file)
--- a/net/net.c
+++ b/net/net.c
@@ -899,6 +899,40 @@ static int nic_get_free_idx(void)
     return -1;
 }
 
+GPtrArray *qemu_get_nic_models(const char *device_type)
+{
+    GPtrArray *nic_models = g_ptr_array_new();
+    GSList *list = object_class_get_list_sorted(device_type, false);
+
+    while (list) {
+        DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, list->data,
+                                             TYPE_DEVICE);
+        GSList *next;
+        if (test_bit(DEVICE_CATEGORY_NETWORK, dc->categories) &&
+            dc->user_creatable) {
+            const char *name = object_class_get_name(list->data);
+            /*
+             * A network device might also be something else than a NIC, see
+             * e.g. the "rocker" device. Thus we have to look for the "netdev"
+             * property, too. Unfortunately, some devices like virtio-net only
+             * create this property during instance_init, so we have to create
+             * a temporary instance here to be able to check it.
+             */
+            Object *obj = object_new_with_class(OBJECT_CLASS(dc));
+            if (object_property_find(obj, "netdev")) {
+                g_ptr_array_add(nic_models, (gpointer)name);
+            }
+            object_unref(obj);
+        }
+        next = list->next;
+        g_slist_free_1(list);
+        list = next;
+    }
+    g_ptr_array_add(nic_models, NULL);
+
+    return nic_models;
+}
+
 int qemu_show_nic_models(const char *arg, const char *const *models)
 {
     int i;
@@ -907,7 +941,7 @@ int qemu_show_nic_models(const char *arg, const char *const *models)
         return 0;
     }
 
-    printf("Supported NIC models:\n");
+    printf("Available NIC models:\n");
     for (i = 0 ; models[i]; i++) {
         printf("%s\n", models[i]);
     }
@@ -1508,8 +1542,18 @@ static int net_param_nic(void *dummy, QemuOpts *opts, Error **errp)
     const char *type;
 
     type = qemu_opt_get(opts, "type");
-    if (type && g_str_equal(type, "none")) {
-        return 0;    /* Nothing to do, default_net is cleared in vl.c */
+    if (type) {
+        if (g_str_equal(type, "none")) {
+            return 0;    /* Nothing to do, default_net is cleared in vl.c */
+        }
+        if (is_help_option(type)) {
+            GPtrArray *nic_models = qemu_get_nic_models(TYPE_DEVICE);
+            show_netdevs();
+            printf("\n");
+            qemu_show_nic_models(type, (const char **)nic_models->pdata);
+            g_ptr_array_free(nic_models, true);
+            exit(0);
+        }
     }
 
     idx = nic_get_free_idx();
index 37ff727e0c42fdd514ec853d50f139a8c14bae0f..9204b4c96e4079e343ff1b39733e59de910732fb 100644 (file)
@@ -39,6 +39,8 @@
 #include "io/channel-socket.h"
 #include "io/net-listener.h"
 #include "qapi/qapi-events-net.h"
+#include "qapi/qapi-visit-sockets.h"
+#include "qapi/clone-visitor.h"
 
 typedef struct NetStreamState {
     NetClientState nc;
@@ -49,11 +51,15 @@ typedef struct NetStreamState {
     guint ioc_write_tag;
     SocketReadState rs;
     unsigned int send_index;      /* number of bytes sent*/
+    uint32_t reconnect;
+    guint timer_tag;
+    SocketAddress *addr;
 } NetStreamState;
 
 static void net_stream_listen(QIONetListener *listener,
                               QIOChannelSocket *cioc,
                               void *opaque);
+static void net_stream_arm_reconnect(NetStreamState *s);
 
 static gboolean net_stream_writable(QIOChannel *ioc,
                                     GIOCondition condition,
@@ -170,6 +176,7 @@ static gboolean net_stream_send(QIOChannel *ioc,
         qemu_set_info_str(&s->nc, "%s", "");
 
         qapi_event_send_netdev_stream_disconnected(s->nc.name);
+        net_stream_arm_reconnect(s);
 
         return G_SOURCE_REMOVE;
     }
@@ -187,6 +194,14 @@ static gboolean net_stream_send(QIOChannel *ioc,
 static void net_stream_cleanup(NetClientState *nc)
 {
     NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc);
+    if (s->timer_tag) {
+        g_source_remove(s->timer_tag);
+        s->timer_tag = 0;
+    }
+    if (s->addr) {
+        qapi_free_SocketAddress(s->addr);
+        s->addr = NULL;
+    }
     if (s->ioc) {
         if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) {
             if (s->ioc_read_tag) {
@@ -346,12 +361,37 @@ static void net_stream_client_connected(QIOTask *task, gpointer opaque)
 error:
     object_unref(OBJECT(s->ioc));
     s->ioc = NULL;
+    net_stream_arm_reconnect(s);
+}
+
+static gboolean net_stream_reconnect(gpointer data)
+{
+    NetStreamState *s = data;
+    QIOChannelSocket *sioc;
+
+    s->timer_tag = 0;
+
+    sioc = qio_channel_socket_new();
+    s->ioc = QIO_CHANNEL(sioc);
+    qio_channel_socket_connect_async(sioc, s->addr,
+                                     net_stream_client_connected, s,
+                                     NULL, NULL);
+    return G_SOURCE_REMOVE;
+}
+
+static void net_stream_arm_reconnect(NetStreamState *s)
+{
+    if (s->reconnect && s->timer_tag == 0) {
+        s->timer_tag = g_timeout_add_seconds(s->reconnect,
+                                             net_stream_reconnect, s);
+    }
 }
 
 static int net_stream_client_init(NetClientState *peer,
                                   const char *model,
                                   const char *name,
                                   SocketAddress *addr,
+                                  uint32_t reconnect,
                                   Error **errp)
 {
     NetStreamState *s;
@@ -364,6 +404,10 @@ static int net_stream_client_init(NetClientState *peer,
     s->ioc = QIO_CHANNEL(sioc);
     s->nc.link_down = true;
 
+    s->reconnect = reconnect;
+    if (reconnect) {
+        s->addr = QAPI_CLONE(SocketAddress, addr);
+    }
     qio_channel_socket_connect_async(sioc, addr,
                                      net_stream_client_connected, s,
                                      NULL, NULL);
@@ -380,7 +424,14 @@ int net_init_stream(const Netdev *netdev, const char *name,
     sock = &netdev->u.stream;
 
     if (!sock->has_server || !sock->server) {
-        return net_stream_client_init(peer, "stream", name, sock->addr, errp);
+        return net_stream_client_init(peer, "stream", name, sock->addr,
+                                      sock->has_reconnect ? sock->reconnect : 0,
+                                      errp);
+    }
+    if (sock->has_reconnect) {
+        error_setg(errp, "'reconnect' option is incompatible with "
+                         "socket in server mode");
+        return -1;
     }
     return net_stream_server_init(peer, "stream", name, sock->addr, errp);
 }
index 1a13a34d35c0577bf0efb221c8b800decfcd144f..de5ed8ff22ca6a744b1ef2c89377193a8238c84a 100644 (file)
@@ -384,7 +384,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
             g_strerror(errno), errno);
         return -1;
     }
-    if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) ||
+    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
         !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
         return 0;
     }
index 2cb60b9ddde7b32ef063cc9f23aec08e6b36e2ac..295828348501ba291ca88e5e3632c6156ff6fc10 100644 (file)
@@ -17,6 +17,7 @@
 #include "clients.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
+#include "sysemu/runstate.h"
 
 #include <vmnet/vmnet.h>
 #include <dispatch/dispatch.h>
@@ -242,6 +243,35 @@ static void vmnet_bufs_init(VmnetState *s)
     }
 }
 
+/**
+ * Called on state change to un-register/re-register handlers
+ */
+static void vmnet_vm_state_change_cb(void *opaque, bool running, RunState state)
+{
+    VmnetState *s = opaque;
+
+    if (running) {
+        vmnet_interface_set_event_callback(
+            s->vmnet_if,
+            VMNET_INTERFACE_PACKETS_AVAILABLE,
+            s->if_queue,
+            ^(interface_event_t event_id, xpc_object_t event) {
+                assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE);
+                /*
+                 * This function is being called from a non qemu thread, so
+                 * we only schedule a BH, and do the rest of the io completion
+                 * handling from vmnet_send_bh() which runs in a qemu context.
+                 */
+                qemu_bh_schedule(s->send_bh);
+            });
+    } else {
+        vmnet_interface_set_event_callback(
+            s->vmnet_if,
+            VMNET_INTERFACE_PACKETS_AVAILABLE,
+            NULL,
+            NULL);
+    }
+}
 
 int vmnet_if_create(NetClientState *nc,
                     xpc_object_t if_desc,
@@ -329,19 +359,9 @@ int vmnet_if_create(NetClientState *nc,
     s->packets_send_current_pos = 0;
     s->packets_send_end_pos = 0;
 
-    vmnet_interface_set_event_callback(
-        s->vmnet_if,
-        VMNET_INTERFACE_PACKETS_AVAILABLE,
-        s->if_queue,
-        ^(interface_event_t event_id, xpc_object_t event) {
-            assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE);
-            /*
-             * This function is being called from a non qemu thread, so
-             * we only schedule a BH, and do the rest of the io completion
-             * handling from vmnet_send_bh() which runs in a qemu context.
-             */
-            qemu_bh_schedule(s->send_bh);
-        });
+    vmnet_vm_state_change_cb(s, 1, RUN_STATE_RUNNING);
+
+    s->change = qemu_add_vm_change_state_handler(vmnet_vm_state_change_cb, s);
 
     return 0;
 }
@@ -356,6 +376,8 @@ void vmnet_cleanup_common(NetClientState *nc)
         return;
     }
 
+    vmnet_vm_state_change_cb(s, 0, RUN_STATE_SHUTDOWN);
+    qemu_del_vm_change_state_handler(s->change);
     if_stopped_sem = dispatch_semaphore_create(0);
     vmnet_stop_interface(
         s->vmnet_if,
index d0b90594f293c4ece81e8ce9bfb3ba26d5374cae..a8a033dc9687da1390644566b2f180ba9ba7a85d 100644 (file)
@@ -45,6 +45,8 @@ typedef struct VmnetState {
     int packets_send_end_pos;
 
     struct iovec iov_buf[VMNET_PACKETS_LIMIT];
+
+    VMChangeStateEntry *change;
 } VmnetState;
 
 const char *vmnet_status_map_str(vmnet_return_t status);
index b94f3fb0811b16a2bb87e63a2c01f3a2a2c1980a..3702ed485c638b7499cbb6050057978128184164 100644 (file)
   -machine pseries,x-vof=on. When enabled, the firmware acts as a slim shim and
   QEMU implements parts of the IEEE 1275 Open Firmware interface.
 
-- sgabios (the Serial Graphics Adapter option ROM) provides a means for
-  legacy x86 software to communicate with an attached serial console as
-  if a video card were attached.  The master sources reside in a subversion
-  repository at http://sgabios.googlecode.com/svn/trunk.  A git mirror is
-  available at https://gitlab.com/qemu-project/sgabios.git.
-
 - The PXE roms come from the iPXE project. Built with BANNER_TIME 0.
   Sources available at http://ipxe.org.  Vendor:Device ID -> ROM mapping:
 
index 388e0db6e4035192bd2439ca2195b27efc3a8f71..a7224ef4699bc90b02b7d2d6e4485b6296089691 100644 (file)
@@ -28,7 +28,6 @@ blobs = [
   'bios-256k.bin',
   'bios-microvm.bin',
   'qboot.rom',
-  'sgabios.bin',
   'vgabios.bin',
   'vgabios-cirrus.bin',
   'vgabios-stdvga.bin',
diff --git a/pc-bios/sgabios.bin b/pc-bios/sgabios.bin
deleted file mode 100644 (file)
index 6308f2e..0000000
Binary files a/pc-bios/sgabios.bin and /dev/null differ
index 904f324bb11ac71b5091af55bfedb5649d227bc5..c3ceb1ca0ab173c1f74ddb1fb1bb9a23a3830fe2 100644 (file)
@@ -11,8 +11,8 @@ qemu.egg-info/
 .idea/
 .vscode/
 
-# virtual environments (pipenv et al)
-.venv/
+# virtual environments
+.min-venv/
 .tox/
 .dev-venv/
 
index b170708398abe257478e7729d614062d9e9cd143..c5bd6ff83ac90fe95b6ff88588034143f30eeaeb 100644 (file)
@@ -1,15 +1,16 @@
 QEMU_VENV_DIR=.dev-venv
+QEMU_MINVENV_DIR=.min-venv
 QEMU_TOX_EXTRA_ARGS ?=
 
 .PHONY: help
 help:
        @echo "python packaging help:"
        @echo ""
-       @echo "make check-pipenv:"
-       @echo "    Run tests in pipenv's virtual environment."
+       @echo "make check-minreqs:"
+       @echo "    Run tests in the minreqs virtual environment."
        @echo "    These tests use the oldest dependencies."
-       @echo "    Requires: Python 3.6 and pipenv."
-       @echo "    Hint (Fedora): 'sudo dnf install python3.6 pipenv'"
+       @echo "    Requires: Python 3.6"
+       @echo "    Hint (Fedora): 'sudo dnf install python3.6'"
        @echo ""
        @echo "make check-tox:"
        @echo "    Run tests against multiple python versions."
@@ -33,8 +34,8 @@ help:
        @echo "    and install the qemu package in editable mode."
        @echo "    (Can be used in or outside of a venv.)"
        @echo ""
-       @echo "make pipenv"
-       @echo "    Creates pipenv's virtual environment (.venv)"
+       @echo "make min-venv"
+       @echo "    Creates the minreqs virtual environment ($(QEMU_MINVENV_DIR))"
        @echo ""
        @echo "make dev-venv"
        @echo "    Creates a simple venv for check-dev. ($(QEMU_VENV_DIR))"
@@ -43,21 +44,38 @@ help:
        @echo "    Remove package build output."
        @echo ""
        @echo "make distclean:"
-       @echo "    remove pipenv/venv files, qemu package forwarder,"
+       @echo "    remove venv files, qemu package forwarder,"
        @echo "    built distribution files, and everything from 'make clean'."
        @echo ""
        @echo -e "Have a nice day ^_^\n"
 
-.PHONY: pipenv
-pipenv: .venv
-.venv: Pipfile.lock
-       @PIPENV_VENV_IN_PROJECT=1 pipenv sync --dev --keep-outdated
-       rm -f pyproject.toml
-       @touch .venv
+.PHONY: pipenv check-pipenv
+pipenv check-pipenv:
+       @echo "pipenv was dropped; try 'make check-minreqs' or 'make min-venv'"
+       @exit 1
+
+.PHONY: min-venv
+min-venv: $(QEMU_MINVENV_DIR) $(QEMU_MINVENV_DIR)/bin/activate
+$(QEMU_MINVENV_DIR) $(QEMU_MINVENV_DIR)/bin/activate: setup.cfg tests/minreqs.txt
+       @echo "VENV $(QEMU_MINVENV_DIR)"
+       @python3.6 -m venv $(QEMU_MINVENV_DIR)
+       @(                                                              \
+               echo "ACTIVATE $(QEMU_MINVENV_DIR)";                    \
+               . $(QEMU_MINVENV_DIR)/bin/activate;                     \
+               echo "INSTALL -r tests/minreqs.txt $(QEMU_MINVENV_DIR)";\
+               pip install -r tests/minreqs.txt 1>/dev/null;           \
+               echo "INSTALL -e qemu $(QEMU_MINVENV_DIR)";             \
+               pip install -e . 1>/dev/null;                           \
+       )
+       @touch $(QEMU_MINVENV_DIR)
 
-.PHONY: check-pipenv
-check-pipenv: pipenv
-       @pipenv run make check
+.PHONY: check-minreqs
+check-minreqs: min-venv
+       @(                                                      \
+               echo "ACTIVATE $(QEMU_MINVENV_DIR)";            \
+               . $(QEMU_MINVENV_DIR)/bin/activate;             \
+               make check;                                     \
+       )
 
 .PHONY: dev-venv
 dev-venv: $(QEMU_VENV_DIR) $(QEMU_VENV_DIR)/bin/activate
@@ -106,6 +124,7 @@ clean:
 
 .PHONY: distclean
 distclean: clean
-       rm -rf qemu.egg-info/ .venv/ .tox/ $(QEMU_VENV_DIR) dist/
+       rm -rf qemu.egg-info/ .eggs/ dist/
+       rm -rf $(QEMU_VENV_DIR) $(QEMU_MINVENV_DIR) .tox/
        rm -f .coverage .coverage.*
        rm -rf htmlcov/
diff --git a/python/Pipfile b/python/Pipfile
deleted file mode 100644 (file)
index e7acb8c..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-[[source]]
-name = "pypi"
-url = "https://pypi.org/simple"
-verify_ssl = true
-
-[dev-packages]
-qemu = {editable = true, extras = ["devel"], path = "."}
-
-[packages]
-qemu = {editable = true,path = "."}
-
-[requires]
-python_version = "3.6"
diff --git a/python/Pipfile.lock b/python/Pipfile.lock
deleted file mode 100644 (file)
index ce46404..0000000
+++ /dev/null
@@ -1,347 +0,0 @@
-{
-    "_meta": {
-        "hash": {
-            "sha256": "f1a25654d884a5b450e38d78b1f2e3ebb9073e421cc4358d4bbb83ac251a5670"
-        },
-        "pipfile-spec": 6,
-        "requires": {
-            "python_version": "3.6"
-        },
-        "sources": [
-            {
-                "name": "pypi",
-                "url": "https://pypi.org/simple",
-                "verify_ssl": true
-            }
-        ]
-    },
-    "default": {
-        "qemu": {
-            "editable": true,
-            "path": "."
-        }
-    },
-    "develop": {
-        "appdirs": {
-            "hashes": [
-                "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41",
-                "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"
-            ],
-            "version": "==1.4.4"
-        },
-        "astroid": {
-            "hashes": [
-                "sha256:09bdb456e02564731f8b5957cdd0c98a7f01d2db5e90eb1d794c353c28bfd705",
-                "sha256:6a8a51f64dae307f6e0c9db752b66a7951e282389d8362cc1d39a56f3feeb31d"
-            ],
-            "index": "pypi",
-            "version": "==2.6.0"
-        },
-        "avocado-framework": {
-            "hashes": [
-                "sha256:244cb569f8eb4e50a22ac82e1a2b2bba2458999f4281efbe2651bd415d59c65b",
-                "sha256:6f15998b67ecd0e7dde790c4de4dd249d6df52dfe6d5cc4e2dd6596df51c3583"
-            ],
-            "index": "pypi",
-            "version": "==90.0"
-        },
-        "distlib": {
-            "hashes": [
-                "sha256:106fef6dc37dd8c0e2c0a60d3fca3e77460a48907f335fa28420463a6f799736",
-                "sha256:23e223426b28491b1ced97dc3bbe183027419dfc7982b4fa2f05d5f3ff10711c"
-            ],
-            "index": "pypi",
-            "version": "==0.3.2"
-        },
-        "filelock": {
-            "hashes": [
-                "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
-                "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
-            ],
-            "index": "pypi",
-            "version": "==3.0.12"
-        },
-        "flake8": {
-            "hashes": [
-                "sha256:6a35f5b8761f45c5513e3405f110a86bea57982c3b75b766ce7b65217abe1670",
-                "sha256:c01f8a3963b3571a8e6bd7a4063359aff90749e160778e03817cd9b71c9e07d2"
-            ],
-            "index": "pypi",
-            "version": "==3.6.0"
-        },
-        "fusepy": {
-            "hashes": [
-                "sha256:10f5c7f5414241bffecdc333c4d3a725f1d6605cae6b4eaf86a838ff49cdaf6c",
-                "sha256:a9f3a3699080ddcf0919fd1eb2cf743e1f5859ca54c2018632f939bdfac269ee"
-            ],
-            "index": "pypi",
-            "version": "==2.0.4"
-        },
-        "importlib-metadata": {
-            "hashes": [
-                "sha256:90bb658cdbbf6d1735b6341ce708fc7024a3e14e99ffdc5783edea9f9b077f83",
-                "sha256:dc15b2969b4ce36305c51eebe62d418ac7791e9a157911d58bfb1f9ccd8e2070"
-            ],
-            "markers": "python_version < '3.8'",
-            "version": "==1.7.0"
-        },
-        "importlib-resources": {
-            "hashes": [
-                "sha256:54161657e8ffc76596c4ede7080ca68cb02962a2e074a2586b695a93a925d36e",
-                "sha256:e962bff7440364183203d179d7ae9ad90cb1f2b74dcb84300e88ecc42dca3351"
-            ],
-            "index": "pypi",
-            "version": "==5.1.4"
-        },
-        "isort": {
-            "hashes": [
-                "sha256:408e4d75d84f51b64d0824894afee44469eba34a4caee621dc53799f80d71ccc",
-                "sha256:64022dea6a06badfa09b300b4dfe8ba968114a737919e8ed50aea1c288f078aa"
-            ],
-            "index": "pypi",
-            "version": "==5.1.2"
-        },
-        "lazy-object-proxy": {
-            "hashes": [
-                "sha256:17e0967ba374fc24141738c69736da90e94419338fd4c7c7bef01ee26b339653",
-                "sha256:1fee665d2638491f4d6e55bd483e15ef21f6c8c2095f235fef72601021e64f61",
-                "sha256:22ddd618cefe54305df49e4c069fa65715be4ad0e78e8d252a33debf00f6ede2",
-                "sha256:24a5045889cc2729033b3e604d496c2b6f588c754f7a62027ad4437a7ecc4837",
-                "sha256:410283732af311b51b837894fa2f24f2c0039aa7f220135192b38fcc42bd43d3",
-                "sha256:4732c765372bd78a2d6b2150a6e99d00a78ec963375f236979c0626b97ed8e43",
-                "sha256:489000d368377571c6f982fba6497f2aa13c6d1facc40660963da62f5c379726",
-                "sha256:4f60460e9f1eb632584c9685bccea152f4ac2130e299784dbaf9fae9f49891b3",
-                "sha256:5743a5ab42ae40caa8421b320ebf3a998f89c85cdc8376d6b2e00bd12bd1b587",
-                "sha256:85fb7608121fd5621cc4377a8961d0b32ccf84a7285b4f1d21988b2eae2868e8",
-                "sha256:9698110e36e2df951c7c36b6729e96429c9c32b3331989ef19976592c5f3c77a",
-                "sha256:9d397bf41caad3f489e10774667310d73cb9c4258e9aed94b9ec734b34b495fd",
-                "sha256:b579f8acbf2bdd9ea200b1d5dea36abd93cabf56cf626ab9c744a432e15c815f",
-                "sha256:b865b01a2e7f96db0c5d12cfea590f98d8c5ba64ad222300d93ce6ff9138bcad",
-                "sha256:bf34e368e8dd976423396555078def5cfc3039ebc6fc06d1ae2c5a65eebbcde4",
-                "sha256:c6938967f8528b3668622a9ed3b31d145fab161a32f5891ea7b84f6b790be05b",
-                "sha256:d1c2676e3d840852a2de7c7d5d76407c772927addff8d742b9808fe0afccebdf",
-                "sha256:d7124f52f3bd259f510651450e18e0fd081ed82f3c08541dffc7b94b883aa981",
-                "sha256:d900d949b707778696fdf01036f58c9876a0d8bfe116e8d220cfd4b15f14e741",
-                "sha256:ebfd274dcd5133e0afae738e6d9da4323c3eb021b3e13052d8cbd0e457b1256e",
-                "sha256:ed361bb83436f117f9917d282a456f9e5009ea12fd6de8742d1a4752c3017e93",
-                "sha256:f5144c75445ae3ca2057faac03fda5a902eff196702b0a24daf1d6ce0650514b"
-            ],
-            "index": "pypi",
-            "version": "==1.6.0"
-        },
-        "mccabe": {
-            "hashes": [
-                "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
-                "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
-            ],
-            "version": "==0.6.1"
-        },
-        "mypy": {
-            "hashes": [
-                "sha256:00cb1964a7476e871d6108341ac9c1a857d6bd20bf5877f4773ac5e9d92cd3cd",
-                "sha256:127de5a9b817a03a98c5ae8a0c46a20dc44442af6dcfa2ae7f96cb519b312efa",
-                "sha256:1f3976a945ad7f0a0727aafdc5651c2d3278e3c88dee94e2bf75cd3386b7b2f4",
-                "sha256:2f8c098f12b402c19b735aec724cc9105cc1a9eea405d08814eb4b14a6fb1a41",
-                "sha256:4ef13b619a289aa025f2273e05e755f8049bb4eaba6d703a425de37d495d178d",
-                "sha256:5d142f219bf8c7894dfa79ebfb7d352c4c63a325e75f10dfb4c3db9417dcd135",
-                "sha256:62eb5dd4ea86bda8ce386f26684f7f26e4bfe6283c9f2b6ca6d17faf704dcfad",
-                "sha256:64c36eb0936d0bfb7d8da49f92c18e312ad2e3ed46e5548ae4ca997b0d33bd59",
-                "sha256:75eed74d2faf2759f79c5f56f17388defd2fc994222312ec54ee921e37b31ad4",
-                "sha256:974bebe3699b9b46278a7f076635d219183da26e1a675c1f8243a69221758273",
-                "sha256:a5e5bb12b7982b179af513dddb06fca12285f0316d74f3964078acbfcf4c68f2",
-                "sha256:d31291df31bafb997952dc0a17ebb2737f802c754aed31dd155a8bfe75112c57",
-                "sha256:d3b4941de44341227ece1caaf5b08b23e42ad4eeb8b603219afb11e9d4cfb437",
-                "sha256:eadb865126da4e3c4c95bdb47fe1bb087a3e3ea14d39a3b13224b8a4d9f9a102"
-            ],
-            "index": "pypi",
-            "version": "==0.780"
-        },
-        "mypy-extensions": {
-            "hashes": [
-                "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
-                "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
-            ],
-            "version": "==0.4.3"
-        },
-        "packaging": {
-            "hashes": [
-                "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5",
-                "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"
-            ],
-            "index": "pypi",
-            "version": "==20.9"
-        },
-        "pluggy": {
-            "hashes": [
-                "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
-                "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
-            ],
-            "index": "pypi",
-            "version": "==0.13.1"
-        },
-        "py": {
-            "hashes": [
-                "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3",
-                "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"
-            ],
-            "index": "pypi",
-            "version": "==1.10.0"
-        },
-        "pycodestyle": {
-            "hashes": [
-                "sha256:74abc4e221d393ea5ce1f129ea6903209940c1ecd29e002e8c6933c2b21026e0",
-                "sha256:cbc619d09254895b0d12c2c691e237b2e91e9b2ecf5e84c26b35400f93dcfb83",
-                "sha256:cbfca99bd594a10f674d0cd97a3d802a1fdef635d4361e1a2658de47ed261e3a"
-            ],
-            "version": "==2.4.0"
-        },
-        "pyflakes": {
-            "hashes": [
-                "sha256:9a7662ec724d0120012f6e29d6248ae3727d821bba522a0e6b356eff19126a49",
-                "sha256:f661252913bc1dbe7fcfcbf0af0db3f42ab65aabd1a6ca68fe5d466bace94dae"
-            ],
-            "version": "==2.0.0"
-        },
-        "pygments": {
-            "hashes": [
-                "sha256:a18f47b506a429f6f4b9df81bb02beab9ca21d0a5fee38ed15aef65f0545519f",
-                "sha256:d66e804411278594d764fc69ec36ec13d9ae9147193a1740cd34d272ca383b8e"
-            ],
-            "index": "pypi",
-            "version": "==2.9.0"
-        },
-        "pylint": {
-            "hashes": [
-                "sha256:082a6d461b54f90eea49ca90fff4ee8b6e45e8029e5dbd72f6107ef84f3779c0",
-                "sha256:a01cd675eccf6e25b3bdb42be184eb46aaf89187d612ba0fb5f93328ed6b0fd5"
-            ],
-            "index": "pypi",
-            "version": "==2.8.0"
-        },
-        "pyparsing": {
-            "hashes": [
-                "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
-                "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
-            ],
-            "index": "pypi",
-            "version": "==2.4.7"
-        },
-        "qemu": {
-            "editable": true,
-            "path": "."
-        },
-        "setuptools": {
-            "hashes": [
-                "sha256:22c7348c6d2976a52632c67f7ab0cdf40147db7789f9aed18734643fe9cf3373",
-                "sha256:4ce92f1e1f8f01233ee9952c04f6b81d1e02939d6e1b488428154974a4d0783e"
-            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==59.6.0"
-        },
-        "six": {
-            "hashes": [
-                "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
-                "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==1.16.0"
-        },
-        "toml": {
-            "hashes": [
-                "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
-                "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
-            ],
-            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==0.10.2"
-        },
-        "tox": {
-            "hashes": [
-                "sha256:c60692d92fe759f46c610ac04c03cf0169432d1ff8e981e8ae63e068d0954fc3",
-                "sha256:f179cb4043d7dc1339425dd49ab1dd8c916246b0d9173143c1b0af7498a03ab0"
-            ],
-            "index": "pypi",
-            "version": "==3.18.0"
-        },
-        "typed-ast": {
-            "hashes": [
-                "sha256:01ae5f73431d21eead5015997ab41afa53aa1fbe252f9da060be5dad2c730ace",
-                "sha256:067a74454df670dcaa4e59349a2e5c81e567d8d65458d480a5b3dfecec08c5ff",
-                "sha256:0fb71b8c643187d7492c1f8352f2c15b4c4af3f6338f21681d3681b3dc31a266",
-                "sha256:1b3ead4a96c9101bef08f9f7d1217c096f31667617b58de957f690c92378b528",
-                "sha256:2068531575a125b87a41802130fa7e29f26c09a2833fea68d9a40cf33902eba6",
-                "sha256:209596a4ec71d990d71d5e0d312ac935d86930e6eecff6ccc7007fe54d703808",
-                "sha256:2c726c276d09fc5c414693a2de063f521052d9ea7c240ce553316f70656c84d4",
-                "sha256:398e44cd480f4d2b7ee8d98385ca104e35c81525dd98c519acff1b79bdaac363",
-                "sha256:52b1eb8c83f178ab787f3a4283f68258525f8d70f778a2f6dd54d3b5e5fb4341",
-                "sha256:5feca99c17af94057417d744607b82dd0a664fd5e4ca98061480fd8b14b18d04",
-                "sha256:7538e495704e2ccda9b234b82423a4038f324f3a10c43bc088a1636180f11a41",
-                "sha256:760ad187b1041a154f0e4d0f6aae3e40fdb51d6de16e5c99aedadd9246450e9e",
-                "sha256:777a26c84bea6cd934422ac2e3b78863a37017618b6e5c08f92ef69853e765d3",
-                "sha256:95431a26309a21874005845c21118c83991c63ea800dd44843e42a916aec5899",
-                "sha256:9ad2c92ec681e02baf81fdfa056fe0d818645efa9af1f1cd5fd6f1bd2bdfd805",
-                "sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c",
-                "sha256:aee0c1256be6c07bd3e1263ff920c325b59849dc95392a05f258bb9b259cf39c",
-                "sha256:af3d4a73793725138d6b334d9d247ce7e5f084d96284ed23f22ee626a7b88e39",
-                "sha256:b36b4f3920103a25e1d5d024d155c504080959582b928e91cb608a65c3a49e1a",
-                "sha256:b9574c6f03f685070d859e75c7f9eeca02d6933273b5e69572e5ff9d5e3931c3",
-                "sha256:bff6ad71c81b3bba8fa35f0f1921fb24ff4476235a6e94a26ada2e54370e6da7",
-                "sha256:c190f0899e9f9f8b6b7863debfb739abcb21a5c054f911ca3596d12b8a4c4c7f",
-                "sha256:c907f561b1e83e93fad565bac5ba9c22d96a54e7ea0267c708bffe863cbe4075",
-                "sha256:cae53c389825d3b46fb37538441f75d6aecc4174f615d048321b716df2757fb0",
-                "sha256:dd4a21253f42b8d2b48410cb31fe501d32f8b9fbeb1f55063ad102fe9c425e40",
-                "sha256:dde816ca9dac1d9c01dd504ea5967821606f02e510438120091b84e852367428",
-                "sha256:f2362f3cb0f3172c42938946dbc5b7843c2a28aec307c49100c8b38764eb6927",
-                "sha256:f328adcfebed9f11301eaedfa48e15bdece9b519fb27e6a8c01aa52a17ec31b3",
-                "sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f",
-                "sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"
-            ],
-            "markers": "python_version < '3.8' and implementation_name == 'cpython'",
-            "version": "==1.4.3"
-        },
-        "typing-extensions": {
-            "hashes": [
-                "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497",
-                "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342",
-                "sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84"
-            ],
-            "index": "pypi",
-            "version": "==3.10.0.0"
-        },
-        "urwid": {
-            "hashes": [
-                "sha256:588bee9c1cb208d0906a9f73c613d2bd32c3ed3702012f51efe318a3f2127eae"
-            ],
-            "index": "pypi",
-            "version": "==2.1.2"
-        },
-        "urwid-readline": {
-            "hashes": [
-                "sha256:018020cbc864bb5ed87be17dc26b069eae2755cb29f3a9c569aac3bded1efaf4"
-            ],
-            "index": "pypi",
-            "version": "==0.13"
-        },
-        "virtualenv": {
-            "hashes": [
-                "sha256:14fdf849f80dbb29a4eb6caa9875d476ee2a5cf76a5f5415fa2f1606010ab467",
-                "sha256:2b0126166ea7c9c3661f5b8e06773d28f83322de7a3ff7d06f0aed18c9de6a76"
-            ],
-            "index": "pypi",
-            "version": "==20.4.7"
-        },
-        "wrapt": {
-            "hashes": [
-                "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7"
-            ],
-            "version": "==1.12.1"
-        },
-        "zipp": {
-            "hashes": [
-                "sha256:3607921face881ba3e026887d8150cca609d517579abe052ac81fc5aeffdbd76",
-                "sha256:51cb66cc54621609dd593d1787f286ee42a5c0adbb4b29abea5a63edc3e03098"
-            ],
-            "index": "pypi",
-            "version": "==3.4.1"
-        }
-    }
-}
index 9c1fceaee73b11cb313b71a9a558a0bc2e4a813a..d62e71528d246a3ac19ed45001822be3da04ab82 100644 (file)
@@ -77,9 +77,6 @@ Files in this directory
 - ``MANIFEST.in`` is read by python setuptools, it specifies additional files
   that should be included by a source distribution.
 - ``PACKAGE.rst`` is used as the README file that is visible on PyPI.org.
-- ``Pipfile`` is used by Pipenv to generate ``Pipfile.lock``.
-- ``Pipfile.lock`` is a set of pinned package dependencies that this package
-  is tested under in our CI suite. It is used by ``make check-pipenv``.
 - ``README.rst`` you are here!
 - ``VERSION`` contains the PEP-440 compliant version used to describe
   this package; it is referenced by ``setup.cfg``.
index 6d3d739daa76d9b2d0a399da23f19875f0c39c3f..22e60298d2807c0e71e28b7bed278c8c6c595543 100644 (file)
@@ -207,7 +207,7 @@ class AsyncProtocol(Generic[T]):
     logger = logging.getLogger(__name__)
 
     # Maximum allowable size of read buffer
-    _limit = (64 * 1024)
+    _limit = 64 * 1024
 
     # -------------------------
     # Section: Public interface
index b5772e7f32b36790e8df99cf0f124716a9756f90..9d73ae6e7adab3a326945076ffb5ec15aab2d947 100644 (file)
@@ -198,7 +198,7 @@ class QMPClient(AsyncProtocol[Message], Events):
     logger = logging.getLogger(__name__)
 
     # Read buffer limit; 10MB like libvirt default
-    _limit = (10 * 1024 * 1024)
+    _limit = 10 * 1024 * 1024
 
     # Type alias for pending execute() result items
     _PendingT = Union[Message, ExecInterruptedError]
index 8c38a7ac9c0ee7ea96e5146d897ad7e5849dc829..d8411bb2d0b5010387512ad35ba2e28dbe9b6ba1 100644 (file)
@@ -155,7 +155,7 @@ class QemuGuestAgentClient:
 
     def fsfreeze(self, cmd: str) -> object:
         if cmd not in ['status', 'freeze', 'thaw']:
-            raise Exception('Invalid command: ' + cmd)
+            raise ValueError('Invalid command: ' + cmd)
         # Can be int (freeze, thaw) or GuestFsfreezeStatus (status)
         return getattr(self.qga, 'fsfreeze' + '_' + cmd)()
 
@@ -167,7 +167,7 @@ class QemuGuestAgentClient:
 
     def suspend(self, mode: str) -> None:
         if mode not in ['disk', 'ram', 'hybrid']:
-            raise Exception('Invalid mode: ' + mode)
+            raise ValueError('Invalid mode: ' + mode)
 
         try:
             getattr(self.qga, 'suspend' + '_' + mode)()
@@ -178,7 +178,7 @@ class QemuGuestAgentClient:
 
     def shutdown(self, mode: str = 'powerdown') -> None:
         if mode not in ['powerdown', 'halt', 'reboot']:
-            raise Exception('Invalid mode: ' + mode)
+            raise ValueError('Invalid mode: ' + mode)
 
         try:
             self.qga.shutdown(mode=mode)
index 5641815706548e294a9f9a2d6d89add92dcd3f55..9e923d97628f91e48408c73d1a02d0c171d9c45d 100644 (file)
@@ -33,9 +33,7 @@ packages =
 * = py.typed
 
 [options.extras_require]
-# For the devel group, When adding new dependencies or bumping the minimum
-# version, use e.g. "pipenv install --dev pylint==3.0.0".
-# Subsequently, edit 'Pipfile' to remove e.g. 'pylint = "==3.0.0'.
+# Remember to update tests/minreqs.txt if changing anything below:
 devel =
     avocado-framework >= 90.0
     flake8 >= 3.6.0
diff --git a/python/tests/minreqs.txt b/python/tests/minreqs.txt
new file mode 100644 (file)
index 0000000..dfb8abb
--- /dev/null
@@ -0,0 +1,45 @@
+# This file lists the ***oldest possible dependencies*** needed to run
+# "make check" successfully under ***Python 3.6***. It is used primarily
+# by GitLab CI to ensure that our stated minimum versions in setup.cfg
+# are truthful and regularly validated.
+#
+# This file should not contain any dependencies that are not expressed
+# by the [devel] section of setup.cfg, except for transitive
+# dependencies which must be enumerated here explicitly to eliminate
+# dependency resolution ambiguity.
+#
+# When adding new dependencies, pin the very oldest non-yanked version
+# on PyPI that allows the test suite to pass.
+
+# Dependencies for the TUI addon (Required for successful linting)
+urwid==2.1.2
+urwid-readline==0.13
+Pygments==2.9.0
+
+# Dependencies for FUSE support for qom-fuse
+fusepy==2.0.4
+
+# Test-runners, utilities, etc.
+avocado-framework==90.0
+
+# Linters
+flake8==3.6.0
+isort==5.1.2
+mypy==0.780
+pylint==2.8.0
+
+# Transitive flake8 dependencies
+mccabe==0.6.0
+pycodestyle==2.4.0
+pyflakes==2.0.0
+
+# Transitive mypy dependencies
+mypy-extensions==0.4.3
+typed-ast==1.4.0
+typing-extensions==3.7.4
+
+# Transitive pylint dependencies
+astroid==2.5.4
+lazy-object-proxy==1.4.0
+toml==0.10.0
+wrapt==1.12.1
index 7f331eb8eaca7b70756da8a48d8ab784ca8e6c9a..c05ad0c07e3de01c45a8d575813ab9065c623e6f 100644 (file)
 ##
 # @RbdImageEncryptionFormat:
 #
+# @luks-any: Used for opening either luks or luks2 (Since 8.0)
+#
 # Since: 6.1
 ##
 { 'enum': 'RbdImageEncryptionFormat',
-  'data': [ 'luks', 'luks2' ] }
+  'data': [ 'luks', 'luks2', 'luks-any' ] }
 
 ##
 # @RbdEncryptionOptionsLUKSBase:
   'base': 'RbdEncryptionOptionsLUKSBase',
   'data': { } }
 
+##
+# @RbdEncryptionOptionsLUKSAny:
+#
+# Since: 8.0
+##
+{ 'struct': 'RbdEncryptionOptionsLUKSAny',
+  'base': 'RbdEncryptionOptionsLUKSBase',
+  'data': { } }
+
 ##
 # @RbdEncryptionCreateOptionsLUKS:
 #
 ##
 # @RbdEncryptionOptions:
 #
+# @format: Encryption format.
+#
+# @parent: Parent image encryption options (for cloned images).
+#          Can be left unspecified if this cloned image is encrypted
+#          using the same format and secret as its parent image (i.e.
+#          not explicitly formatted) or if its parent image is not
+#          encrypted. (Since 8.0)
+#
 # Since: 6.1
 ##
 { 'union': 'RbdEncryptionOptions',
-  'base': { 'format': 'RbdImageEncryptionFormat' },
+  'base': { 'format': 'RbdImageEncryptionFormat',
+            '*parent': 'RbdEncryptionOptions' },
   'discriminator': 'format',
   'data': { 'luks': 'RbdEncryptionOptionsLUKS',
-            'luks2': 'RbdEncryptionOptionsLUKS2' } }
+            'luks2': 'RbdEncryptionOptionsLUKS2',
+            'luks-any': 'RbdEncryptionOptionsLUKSAny'} }
 
 ##
 # @RbdEncryptionCreateOptions:
index 522ac582edebfd54eb2df8bafd33c72edfb30b49..d6eb30008be017ddc7b3f9f631c39ad78940e24e 100644 (file)
 # @addr: socket address to listen on (server=true)
 #        or connect to (server=false)
 # @server: create server socket (default: false)
+# @reconnect: For a client socket, if a socket is disconnected,
+#             then attempt a reconnect after the given number of seconds.
+#             Setting this to zero disables this function. (default: 0)
+#             (since 8.0)
 #
 # Only SocketAddress types 'unix', 'inet' and 'fd' are supported.
 #
 { 'struct': 'NetdevStreamOptions',
   'data': {
     'addr':   'SocketAddress',
-    '*server': 'bool' } }
+    '*server': 'bool',
+    '*reconnect': 'uint32' } }
 
 ##
 # @NetdevDgramOptions:
index 7c059318669a9265356557850e990a12ca1cceb2..9aeac69fa66e52b5e56340d726d6ffe55a58b8b3 100644 (file)
@@ -1991,7 +1991,9 @@ static void coroutine_fn convert_co_do_copy(void *opaque)
             qemu_co_mutex_unlock(&s->lock);
             break;
         }
-        n = convert_iteration_sectors(s, s->sector_num);
+        WITH_GRAPH_RDLOCK_GUARD() {
+            n = convert_iteration_sectors(s, s->sector_num);
+        }
         if (n < 0) {
             qemu_co_mutex_unlock(&s->lock);
             s->ret = n;
@@ -2039,7 +2041,9 @@ retry:
 
         if (s->ret == -EINPROGRESS) {
             if (copy_range) {
-                ret = convert_co_copy_range(s, sector_num, n);
+                WITH_GRAPH_RDLOCK_GUARD() {
+                    ret = convert_co_copy_range(s, sector_num, n);
+                }
                 if (ret) {
                     s->copy_range = false;
                     goto retry;
index a061031615da76be732c6b82c67d1961358176dd..e7a02f5b99ca81e0a1a7e1f603e8385e660f3f25 100644 (file)
@@ -338,7 +338,8 @@ static int parse_pattern(const char *arg)
  */
 
 #define MISALIGN_OFFSET     16
-static void *qemu_io_alloc(BlockBackend *blk, size_t len, int pattern)
+static void *qemu_io_alloc(BlockBackend *blk, size_t len, int pattern,
+                           bool register_buf)
 {
     void *buf;
 
@@ -347,16 +348,24 @@ static void *qemu_io_alloc(BlockBackend *blk, size_t len, int pattern)
     }
     buf = blk_blockalign(blk, len);
     memset(buf, pattern, len);
+    if (register_buf) {
+        blk_register_buf(blk, buf, len, &error_abort);
+    }
     if (qemuio_misalign) {
         buf += MISALIGN_OFFSET;
     }
     return buf;
 }
 
-static void qemu_io_free(void *p)
+static void qemu_io_free(BlockBackend *blk, void *p, size_t len,
+                         bool unregister_buf)
 {
     if (qemuio_misalign) {
         p -= MISALIGN_OFFSET;
+        len += MISALIGN_OFFSET;
+    }
+    if (unregister_buf) {
+        blk_unregister_buf(blk, p, len);
     }
     qemu_vfree(p);
 }
@@ -371,14 +380,16 @@ static void qemu_io_free(void *p)
  * @blk - the block backend where the buffer content is going to be written to
  * @len - the buffer length
  * @file_name - the file to read the content from
+ * @register_buf - call blk_register_buf()
  *
  * Returns: the buffer pointer on success
  *          NULL on error
  */
 static void *qemu_io_alloc_from_file(BlockBackend *blk, size_t len,
-                                     const char *file_name)
+                                     const char *file_name, bool register_buf)
 {
-    char *buf, *buf_origin;
+    size_t alloc_len = len + (qemuio_misalign ? MISALIGN_OFFSET : 0);
+    char *alloc_buf, *buf, *end;
     FILE *f = fopen(file_name, "r");
     int pattern_len;
 
@@ -387,19 +398,13 @@ static void *qemu_io_alloc_from_file(BlockBackend *blk, size_t len,
         return NULL;
     }
 
-    if (qemuio_misalign) {
-        len += MISALIGN_OFFSET;
-    }
-
-    buf_origin = buf = blk_blockalign(blk, len);
+    alloc_buf = buf = blk_blockalign(blk, alloc_len);
 
     if (qemuio_misalign) {
-        buf_origin += MISALIGN_OFFSET;
         buf += MISALIGN_OFFSET;
-        len -= MISALIGN_OFFSET;
     }
 
-    pattern_len = fread(buf_origin, 1, len, f);
+    pattern_len = fread(buf, 1, len, f);
 
     if (ferror(f)) {
         perror(file_name);
@@ -414,24 +419,23 @@ static void *qemu_io_alloc_from_file(BlockBackend *blk, size_t len,
     fclose(f);
     f = NULL;
 
-    if (len > pattern_len) {
-        len -= pattern_len;
-        buf += pattern_len;
-
-        while (len > 0) {
-            size_t len_to_copy = MIN(pattern_len, len);
-
-            memcpy(buf, buf_origin, len_to_copy);
+    if (register_buf) {
+        blk_register_buf(blk, alloc_buf, alloc_len, &error_abort);
+    }
 
-            len -= len_to_copy;
-            buf += len_to_copy;
-        }
+    end = buf + len;
+    for (char *p = buf + pattern_len; p < end; p += pattern_len) {
+        memcpy(p, buf, MIN(pattern_len, end - p));
     }
 
-    return buf_origin;
+    return buf;
 
 error:
-    qemu_io_free(buf_origin);
+    /*
+     * This code path is only taken before blk_register_buf() is called, so
+     * hardcode the qemu_io_free() unregister_buf argument to false.
+     */
+    qemu_io_free(blk, alloc_buf, alloc_len, false);
     if (f) {
         fclose(f);
     }
@@ -490,7 +494,7 @@ static void print_report(const char *op, struct timespec *t, int64_t offset,
  */
 static void *
 create_iovec(BlockBackend *blk, QEMUIOVector *qiov, char **argv, int nr_iov,
-             int pattern)
+             int pattern, bool register_buf)
 {
     size_t *sizes = g_new0(size_t, nr_iov);
     size_t count = 0;
@@ -526,7 +530,7 @@ create_iovec(BlockBackend *blk, QEMUIOVector *qiov, char **argv, int nr_iov,
 
     qemu_iovec_init(qiov, nr_iov);
 
-    buf = p = qemu_io_alloc(blk, count, pattern);
+    buf = p = qemu_io_alloc(blk, count, pattern, register_buf);
 
     for (i = 0; i < nr_iov; i++) {
         qemu_iovec_add(qiov, p, sizes[i]);
@@ -539,7 +543,7 @@ fail:
 }
 
 static int do_pread(BlockBackend *blk, char *buf, int64_t offset,
-                    int64_t bytes, int64_t *total)
+                    int64_t bytes, BdrvRequestFlags flags, int64_t *total)
 {
     int ret;
 
@@ -547,7 +551,7 @@ static int do_pread(BlockBackend *blk, char *buf, int64_t offset,
         return -ERANGE;
     }
 
-    ret = blk_pread(blk, offset, bytes, (uint8_t *)buf, 0);
+    ret = blk_pread(blk, offset, bytes, (uint8_t *)buf, flags);
     if (ret < 0) {
         return ret;
     }
@@ -556,7 +560,7 @@ static int do_pread(BlockBackend *blk, char *buf, int64_t offset,
 }
 
 static int do_pwrite(BlockBackend *blk, char *buf, int64_t offset,
-                     int64_t bytes, int flags, int64_t *total)
+                     int64_t bytes, BdrvRequestFlags flags, int64_t *total)
 {
     int ret;
 
@@ -573,7 +577,8 @@ static int do_pwrite(BlockBackend *blk, char *buf, int64_t offset,
 }
 
 static int do_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                               int64_t bytes, int flags, int64_t *total)
+                               int64_t bytes, BdrvRequestFlags flags,
+                               int64_t *total)
 {
     int ret = blk_pwrite_zeroes(blk, offset, bytes,
                                 flags | BDRV_REQ_ZERO_WRITE);
@@ -637,11 +642,11 @@ static void aio_rw_done(void *opaque, int ret)
 }
 
 static int do_aio_readv(BlockBackend *blk, QEMUIOVector *qiov,
-                        int64_t offset, int *total)
+                        int64_t offset, BdrvRequestFlags flags, int *total)
 {
     int async_ret = NOT_DONE;
 
-    blk_aio_preadv(blk, offset, qiov, 0, aio_rw_done, &async_ret);
+    blk_aio_preadv(blk, offset, qiov, flags, aio_rw_done, &async_ret);
     while (async_ret == NOT_DONE) {
         main_loop_wait(false);
     }
@@ -651,7 +656,7 @@ static int do_aio_readv(BlockBackend *blk, QEMUIOVector *qiov,
 }
 
 static int do_aio_writev(BlockBackend *blk, QEMUIOVector *qiov,
-                         int64_t offset, int flags, int *total)
+                         int64_t offset, BdrvRequestFlags flags, int *total)
 {
     int async_ret = NOT_DONE;
 
@@ -681,6 +686,7 @@ static void read_help(void)
 " -p, -- ignored for backwards compatibility\n"
 " -P, -- use a pattern to verify read data\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
 " -s, -- start offset for pattern verification (only with -P)\n"
 " -v, -- dump buffer to standard output\n"
 "\n");
@@ -694,7 +700,7 @@ static const cmdinfo_t read_cmd = {
     .cfunc      = read_f,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-abCqv] [-P pattern [-s off] [-l len]] off len",
+    .args       = "[-abCqrv] [-P pattern [-s off] [-l len]] off len",
     .oneline    = "reads a number of bytes at a specified offset",
     .help       = read_help,
 };
@@ -712,8 +718,9 @@ static int read_f(BlockBackend *blk, int argc, char **argv)
     int64_t total = 0;
     int pattern = 0;
     int64_t pattern_offset = 0, pattern_count = 0;
+    BdrvRequestFlags flags = 0;
 
-    while ((c = getopt(argc, argv, "bCl:pP:qs:v")) != -1) {
+    while ((c = getopt(argc, argv, "bCl:pP:qrs:v")) != -1) {
         switch (c) {
         case 'b':
             bflag = true;
@@ -742,6 +749,9 @@ static int read_f(BlockBackend *blk, int argc, char **argv)
         case 'q':
             qflag = true;
             break;
+        case 'r':
+            flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 's':
             sflag = true;
             pattern_offset = cvtnum(optarg);
@@ -806,15 +816,20 @@ static int read_f(BlockBackend *blk, int argc, char **argv)
                    count);
             return -EINVAL;
         }
+        if (flags & BDRV_REQ_REGISTERED_BUF) {
+            printf("I/O buffer registration is not supported when reading "
+                    "from vmstate\n");
+            return -EINVAL;
+        }
     }
 
-    buf = qemu_io_alloc(blk, count, 0xab);
+    buf = qemu_io_alloc(blk, count, 0xab, flags & BDRV_REQ_REGISTERED_BUF);
 
     clock_gettime(CLOCK_MONOTONIC, &t1);
     if (bflag) {
         ret = do_load_vmstate(blk, buf, offset, count, &total);
     } else {
-        ret = do_pread(blk, buf, offset, count, &total);
+        ret = do_pread(blk, buf, offset, count, flags, &total);
     }
     clock_gettime(CLOCK_MONOTONIC, &t2);
 
@@ -851,7 +866,7 @@ static int read_f(BlockBackend *blk, int argc, char **argv)
     print_report("read", &t2, offset, count, total, cnt, Cflag);
 
 out:
-    qemu_io_free(buf);
+    qemu_io_free(blk, buf, count, flags & BDRV_REQ_REGISTERED_BUF);
     return ret;
 }
 
@@ -869,8 +884,9 @@ static void readv_help(void)
 " Uses multiple iovec buffers if more than one byte range is specified.\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -P, -- use a pattern to verify read data\n"
-" -v, -- dump buffer to standard output\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
+" -v, -- dump buffer to standard output\n"
 "\n");
 }
 
@@ -881,7 +897,7 @@ static const cmdinfo_t readv_cmd = {
     .cfunc      = readv_f,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-Cqv] [-P pattern] off len [len..]",
+    .args       = "[-Cqrv] [-P pattern] off len [len..]",
     .oneline    = "reads a number of bytes at a specified offset",
     .help       = readv_help,
 };
@@ -899,8 +915,9 @@ static int readv_f(BlockBackend *blk, int argc, char **argv)
     QEMUIOVector qiov;
     int pattern = 0;
     bool Pflag = false;
+    BdrvRequestFlags flags = 0;
 
-    while ((c = getopt(argc, argv, "CP:qv")) != -1) {
+    while ((c = getopt(argc, argv, "CP:qrv")) != -1) {
         switch (c) {
         case 'C':
             Cflag = true;
@@ -915,6 +932,9 @@ static int readv_f(BlockBackend *blk, int argc, char **argv)
         case 'q':
             qflag = true;
             break;
+        case 'r':
+            flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 'v':
             vflag = true;
             break;
@@ -938,13 +958,14 @@ static int readv_f(BlockBackend *blk, int argc, char **argv)
     optind++;
 
     nr_iov = argc - optind;
-    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, 0xab);
+    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, 0xab,
+                       flags & BDRV_REQ_REGISTERED_BUF);
     if (buf == NULL) {
         return -EINVAL;
     }
 
     clock_gettime(CLOCK_MONOTONIC, &t1);
-    ret = do_aio_readv(blk, &qiov, offset, &total);
+    ret = do_aio_readv(blk, &qiov, offset, flags, &total);
     clock_gettime(CLOCK_MONOTONIC, &t2);
 
     if (ret < 0) {
@@ -979,8 +1000,8 @@ static int readv_f(BlockBackend *blk, int argc, char **argv)
     print_report("read", &t2, offset, qiov.size, total, cnt, Cflag);
 
 out:
+    qemu_io_free(blk, buf, qiov.size, flags & BDRV_REQ_REGISTERED_BUF);
     qemu_iovec_destroy(&qiov);
-    qemu_io_free(buf);
     return ret;
 }
 
@@ -997,13 +1018,14 @@ static void write_help(void)
 " filled with a set pattern (0xcdcdcdcd).\n"
 " -b, -- write to the VM state rather than the virtual disk\n"
 " -c, -- write compressed data with blk_write_compressed\n"
+" -C, -- report statistics in a machine parsable format\n"
 " -f, -- use Force Unit Access semantics\n"
 " -n, -- with -z, don't allow slow fallback\n"
 " -p, -- ignored for backwards compatibility\n"
 " -P, -- use different pattern to fill file\n"
-" -s, -- use a pattern file to fill the write buffer\n"
-" -C, -- report statistics in a machine parsable format\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
+" -s, -- use a pattern file to fill the write buffer\n"
 " -u, -- with -z, allow unmapping\n"
 " -z, -- write zeroes using blk_pwrite_zeroes\n"
 "\n");
@@ -1018,7 +1040,7 @@ static const cmdinfo_t write_cmd = {
     .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-bcCfnquz] [-P pattern | -s source_file] off len",
+    .args       = "[-bcCfnqruz] [-P pattern | -s source_file] off len",
     .oneline    = "writes a number of bytes at a specified offset",
     .help       = write_help,
 };
@@ -1028,7 +1050,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
     struct timespec t1, t2;
     bool Cflag = false, qflag = false, bflag = false;
     bool Pflag = false, zflag = false, cflag = false, sflag = false;
-    int flags = 0;
+    BdrvRequestFlags flags = 0;
     int c, cnt, ret;
     char *buf = NULL;
     int64_t offset;
@@ -1038,7 +1060,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
     int pattern = 0xcd;
     const char *file_name = NULL;
 
-    while ((c = getopt(argc, argv, "bcCfnpP:qs:uz")) != -1) {
+    while ((c = getopt(argc, argv, "bcCfnpP:qrs:uz")) != -1) {
         switch (c) {
         case 'b':
             bflag = true;
@@ -1068,6 +1090,9 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
         case 'q':
             qflag = true;
             break;
+        case 'r':
+            flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 's':
             sflag = true;
             file_name = optarg;
@@ -1147,14 +1172,21 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
         }
     }
 
-    if (!zflag) {
+    if (zflag) {
+        if (flags & BDRV_REQ_REGISTERED_BUF) {
+            printf("cannot combine zero write with registered I/O buffer\n");
+            return -EINVAL;
+        }
+    } else {
         if (sflag) {
-            buf = qemu_io_alloc_from_file(blk, count, file_name);
+            buf = qemu_io_alloc_from_file(blk, count, file_name,
+                                          flags & BDRV_REQ_REGISTERED_BUF);
             if (!buf) {
                 return -EINVAL;
             }
         } else {
-            buf = qemu_io_alloc(blk, count, pattern);
+            buf = qemu_io_alloc(blk, count, pattern,
+                                flags & BDRV_REQ_REGISTERED_BUF);
         }
     }
 
@@ -1188,7 +1220,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
 
 out:
     if (!zflag) {
-        qemu_io_free(buf);
+        qemu_io_free(blk, buf, count, flags & BDRV_REQ_REGISTERED_BUF);
     }
     return ret;
 }
@@ -1205,10 +1237,11 @@ writev_help(void)
 "\n"
 " Writes into a segment of the currently open file, using a buffer\n"
 " filled with a set pattern (0xcdcdcdcd).\n"
-" -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -f, -- use Force Unit Access semantics\n"
+" -P, -- use different pattern to fill file\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
 "\n");
 }
 
@@ -1220,7 +1253,7 @@ static const cmdinfo_t writev_cmd = {
     .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-Cfq] [-P pattern] off len [len..]",
+    .args       = "[-Cfqr] [-P pattern] off len [len..]",
     .oneline    = "writes a number of bytes at a specified offset",
     .help       = writev_help,
 };
@@ -1229,7 +1262,7 @@ static int writev_f(BlockBackend *blk, int argc, char **argv)
 {
     struct timespec t1, t2;
     bool Cflag = false, qflag = false;
-    int flags = 0;
+    BdrvRequestFlags flags = 0;
     int c, cnt, ret;
     char *buf;
     int64_t offset;
@@ -1239,7 +1272,7 @@ static int writev_f(BlockBackend *blk, int argc, char **argv)
     int pattern = 0xcd;
     QEMUIOVector qiov;
 
-    while ((c = getopt(argc, argv, "CfqP:")) != -1) {
+    while ((c = getopt(argc, argv, "CfP:qr")) != -1) {
         switch (c) {
         case 'C':
             Cflag = true;
@@ -1250,6 +1283,9 @@ static int writev_f(BlockBackend *blk, int argc, char **argv)
         case 'q':
             qflag = true;
             break;
+        case 'r':
+            flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 'P':
             pattern = parse_pattern(optarg);
             if (pattern < 0) {
@@ -1275,7 +1311,8 @@ static int writev_f(BlockBackend *blk, int argc, char **argv)
     optind++;
 
     nr_iov = argc - optind;
-    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, pattern);
+    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, pattern,
+                       flags & BDRV_REQ_REGISTERED_BUF);
     if (buf == NULL) {
         return -EINVAL;
     }
@@ -1300,8 +1337,8 @@ static int writev_f(BlockBackend *blk, int argc, char **argv)
     t2 = tsub(t2, t1);
     print_report("wrote", &t2, offset, qiov.size, total, cnt, Cflag);
 out:
+    qemu_io_free(blk, buf, qiov.size, flags & BDRV_REQ_REGISTERED_BUF);
     qemu_iovec_destroy(&qiov);
-    qemu_io_free(buf);
     return ret;
 }
 
@@ -1317,6 +1354,7 @@ struct aio_ctx {
     bool zflag;
     BlockAcctCookie acct;
     int pattern;
+    BdrvRequestFlags flags;
     struct timespec t1;
 };
 
@@ -1346,7 +1384,8 @@ static void aio_write_done(void *opaque, int ret)
                  ctx->qiov.size, 1, ctx->Cflag);
 out:
     if (!ctx->zflag) {
-        qemu_io_free(ctx->buf);
+        qemu_io_free(ctx->blk, ctx->buf, ctx->qiov.size,
+                     ctx->flags & BDRV_REQ_REGISTERED_BUF);
         qemu_iovec_destroy(&ctx->qiov);
     }
     g_free(ctx);
@@ -1391,7 +1430,8 @@ static void aio_read_done(void *opaque, int ret)
     print_report("read", &t2, ctx->offset, ctx->qiov.size,
                  ctx->qiov.size, 1, ctx->Cflag);
 out:
-    qemu_io_free(ctx->buf);
+    qemu_io_free(ctx->blk, ctx->buf, ctx->qiov.size,
+                 ctx->flags & BDRV_REQ_REGISTERED_BUF);
     qemu_iovec_destroy(&ctx->qiov);
     g_free(ctx);
 }
@@ -1413,10 +1453,11 @@ static void aio_read_help(void)
 " considered successful once the request is submitted, independently\n"
 " of potential I/O errors or pattern mismatches.\n"
 " -C, -- report statistics in a machine parsable format\n"
-" -P, -- use a pattern to verify read data\n"
 " -i, -- treat request as invalid, for exercising stats\n"
-" -v, -- dump buffer to standard output\n"
+" -P, -- use a pattern to verify read data\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
+" -v, -- dump buffer to standard output\n"
 "\n");
 }
 
@@ -1427,7 +1468,7 @@ static const cmdinfo_t aio_read_cmd = {
     .cfunc      = aio_read_f,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-Ciqv] [-P pattern] off len [len..]",
+    .args       = "[-Ciqrv] [-P pattern] off len [len..]",
     .oneline    = "asynchronously reads a number of bytes",
     .help       = aio_read_help,
 };
@@ -1438,7 +1479,7 @@ static int aio_read_f(BlockBackend *blk, int argc, char **argv)
     struct aio_ctx *ctx = g_new0(struct aio_ctx, 1);
 
     ctx->blk = blk;
-    while ((c = getopt(argc, argv, "CP:iqv")) != -1) {
+    while ((c = getopt(argc, argv, "CiP:qrv")) != -1) {
         switch (c) {
         case 'C':
             ctx->Cflag = true;
@@ -1459,6 +1500,9 @@ static int aio_read_f(BlockBackend *blk, int argc, char **argv)
         case 'q':
             ctx->qflag = true;
             break;
+        case 'r':
+            ctx->flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 'v':
             ctx->vflag = true;
             break;
@@ -1485,7 +1529,8 @@ static int aio_read_f(BlockBackend *blk, int argc, char **argv)
     optind++;
 
     nr_iov = argc - optind;
-    ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov, 0xab);
+    ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov, 0xab,
+                            ctx->flags & BDRV_REQ_REGISTERED_BUF);
     if (ctx->buf == NULL) {
         block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ);
         g_free(ctx);
@@ -1495,7 +1540,8 @@ static int aio_read_f(BlockBackend *blk, int argc, char **argv)
     clock_gettime(CLOCK_MONOTONIC, &ctx->t1);
     block_acct_start(blk_get_stats(blk), &ctx->acct, ctx->qiov.size,
                      BLOCK_ACCT_READ);
-    blk_aio_preadv(blk, ctx->offset, &ctx->qiov, 0, aio_read_done, ctx);
+    blk_aio_preadv(blk, ctx->offset, &ctx->qiov, ctx->flags, aio_read_done,
+                   ctx);
     return 0;
 }
 
@@ -1516,11 +1562,12 @@ static void aio_write_help(void)
 " Note that due to its asynchronous nature, this command will be\n"
 " considered successful once the request is submitted, independently\n"
 " of potential I/O errors or pattern mismatches.\n"
-" -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -f, -- use Force Unit Access semantics\n"
 " -i, -- treat request as invalid, for exercising stats\n"
+" -P, -- use different pattern to fill file\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
 " -u, -- with -z, allow unmapping\n"
 " -z, -- write zeroes using blk_aio_pwrite_zeroes\n"
 "\n");
@@ -1534,7 +1581,7 @@ static const cmdinfo_t aio_write_cmd = {
     .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-Cfiquz] [-P pattern] off len [len..]",
+    .args       = "[-Cfiqruz] [-P pattern] off len [len..]",
     .oneline    = "asynchronously writes a number of bytes",
     .help       = aio_write_help,
 };
@@ -1544,22 +1591,24 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv)
     int nr_iov, c;
     int pattern = 0xcd;
     struct aio_ctx *ctx = g_new0(struct aio_ctx, 1);
-    int flags = 0;
 
     ctx->blk = blk;
-    while ((c = getopt(argc, argv, "CfiqP:uz")) != -1) {
+    while ((c = getopt(argc, argv, "CfiP:qruz")) != -1) {
         switch (c) {
         case 'C':
             ctx->Cflag = true;
             break;
         case 'f':
-            flags |= BDRV_REQ_FUA;
+            ctx->flags |= BDRV_REQ_FUA;
             break;
         case 'q':
             ctx->qflag = true;
             break;
+        case 'r':
+            ctx->flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 'u':
-            flags |= BDRV_REQ_MAY_UNMAP;
+            ctx->flags |= BDRV_REQ_MAY_UNMAP;
             break;
         case 'P':
             pattern = parse_pattern(optarg);
@@ -1595,7 +1644,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv)
         return -EINVAL;
     }
 
-    if ((flags & BDRV_REQ_MAY_UNMAP) && !ctx->zflag) {
+    if ((ctx->flags & BDRV_REQ_MAY_UNMAP) && !ctx->zflag) {
         printf("-u requires -z to be specified\n");
         g_free(ctx);
         return -EINVAL;
@@ -1607,6 +1656,12 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv)
         return -EINVAL;
     }
 
+    if (ctx->zflag && (ctx->flags & BDRV_REQ_REGISTERED_BUF)) {
+        printf("cannot combine zero write with registered I/O buffer\n");
+        g_free(ctx);
+        return -EINVAL;
+    }
+
     ctx->offset = cvtnum(argv[optind]);
     if (ctx->offset < 0) {
         int ret = ctx->offset;
@@ -1625,12 +1680,12 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv)
         }
 
         ctx->qiov.size = count;
-        blk_aio_pwrite_zeroes(blk, ctx->offset, count, flags, aio_write_done,
-                              ctx);
+        blk_aio_pwrite_zeroes(blk, ctx->offset, count, ctx->flags,
+                              aio_write_done, ctx);
     } else {
         nr_iov = argc - optind;
         ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov,
-                                pattern);
+                                pattern, ctx->flags & BDRV_REQ_REGISTERED_BUF);
         if (ctx->buf == NULL) {
             block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE);
             g_free(ctx);
@@ -1641,8 +1696,8 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv)
         block_acct_start(blk_get_stats(blk), &ctx->acct, ctx->qiov.size,
                          BLOCK_ACCT_WRITE);
 
-        blk_aio_pwritev(blk, ctx->offset, &ctx->qiov, flags, aio_write_done,
-                        ctx);
+        blk_aio_pwritev(blk, ctx->offset, &ctx->qiov, ctx->flags,
+                        aio_write_done, ctx);
     }
 
     return 0;
index 88e93c610314be217528286912b0a826a1269dde..beeb4475ba168db867b2d3f627162d81d3a1fd79 100644 (file)
@@ -1892,8 +1892,8 @@ SRST
 ERST
 
 DEF("iscsi", HAS_ARG, QEMU_OPTION_iscsi,
-    "-iscsi [user=user][,password=password]\n"
-    "       [,header-digest=CRC32C|CR32C-NONE|NONE-CRC32C|NONE\n"
+    "-iscsi [user=user][,password=password][,password-secret=secret-id]\n"
+    "       [,header-digest=CRC32C|CR32C-NONE|NONE-CRC32C|NONE]\n"
     "       [,initiator-name=initiator-iqn][,id=target-iqn]\n"
     "       [,timeout=timeout]\n"
     "                iSCSI session parameters\n", QEMU_ARCH_ALL)
@@ -2135,7 +2135,7 @@ DEF("spice", HAS_ARG, QEMU_OPTION_spice,
     "       [,tls-channel=[main|display|cursor|inputs|record|playback]]\n"
     "       [,plaintext-channel=[main|display|cursor|inputs|record|playback]]\n"
     "       [,sasl=on|off][,disable-ticketing=on|off]\n"
-    "       [,password=<string>][,password-secret=<secret-id>]\n"
+    "       [,password-secret=<secret-id>]\n"
     "       [,image-compression=[auto_glz|auto_lz|quic|glz|lz|off]]\n"
     "       [,jpeg-wan-compression=[auto|never|always]]\n"
     "       [,zlib-glz-wan-compression=[auto|never|always]]\n"
@@ -2161,13 +2161,6 @@ SRST
     ``ipv4=on|off``; \ ``ipv6=on|off``; \ ``unix=on|off``
         Force using the specified IP version.
 
-    ``password=<string>``
-        Set the password you need to authenticate.
-
-        This option is deprecated and insecure because it leaves the
-        password visible in the process listing. Use ``password-secret``
-        instead.
-
     ``password-secret=<secret-id>``
         Set the ID of the ``secret`` object containing the password
         you need to authenticate.
@@ -2769,9 +2762,9 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
     "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n"
     "                configure a network backend to connect to another network\n"
     "                using an UDP tunnel\n"
-    "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n"
-    "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n"
-    "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n"
+    "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n"
+    "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n"
+    "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n"
     "                configure a network backend to connect to another network\n"
     "                using a socket connection in stream mode.\n"
     "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n"
index 360077364e39e863c931a0ba748a69a54582d174..172826f8f8edd1ed420d7ffe0478e73e818e2201 100644 (file)
@@ -32,9 +32,8 @@
 #define GUEST_FILE_READ_COUNT_MAX (48 * MiB)
 
 /* Note: in some situations, like with the fsfreeze, logging may be
- * temporarilly disabled. if it is necessary that a command be able
- * to log for accounting purposes, check ga_logging_enabled() beforehand,
- * and use the QERR_QGA_LOGGING_DISABLED to generate an error
+ * temporarily disabled. if it is necessary that a command be able
+ * to log for accounting purposes, check ga_logging_enabled() beforehand.
  */
 void slog(const gchar *fmt, ...)
 {
index 85b7d6ced504bb1ae1303371205b2cdd5ac65845..2b992a55b35b7e1ab2172945e7e39979bcda3623 100644 (file)
@@ -24,7 +24,6 @@
 #include "qapi/qmp/qjson.h"
 #include "guest-agent-core.h"
 #include "qga-qapi-init-commands.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/error.h"
 #include "channel.h"
 #include "qemu/cutils.h"
index 9a0dc1cf4447a54d4941a09150c782fe5417a053..c39156c52221bd3fd0a4e96abb4d5f7ecc64d80e 100644 (file)
@@ -376,8 +376,12 @@ void replay_finish(void)
     replay_mode = REPLAY_MODE_NONE;
 }
 
-void replay_add_blocker(Error *reason)
+void replay_add_blocker(const char *feature)
 {
+    Error *reason = NULL;
+
+    error_setg(&reason, "Record/replay feature is not supported for '%s'",
+               feature);
     replay_blockers = g_slist_prepend(replay_blockers, reason);
 }
 
index 5c262b08f156fc28ef3f9cb98f42b23db4edea67..50cefdb2d690ffdeb44c43e201d5f5b24c4eecf7 100644 (file)
@@ -12,7 +12,7 @@ void replay_input_sync_event(void)
     qemu_input_event_sync_impl();
 }
 
-void replay_add_blocker(Error *reason)
+void replay_add_blocker(const char *feature)
 {
 }
 void replay_audio_in(size_t *recorded, void *samples, size_t *wpos, size_t size)
index 5e44d9789001979a0e9ef3183f90beac528a6bf4..955f92286de0604d74c7a91096986ce64222e4a8 100644 (file)
@@ -57,7 +57,6 @@ default help:
        @echo "available build targets:"
        @echo "  bios               -- update bios.bin (seabios)"
        @echo "  vgabios            -- update vgabios binaries (seabios)"
-       @echo "  sgabios            -- update sgabios binaries"
        @echo "  pxerom             -- update nic roms (bios only)"
        @echo "  efirom             -- update nic roms (bios+efi)"
        @echo "  slof               -- update slof.bin"
@@ -102,11 +101,7 @@ build-seabios-config-%: config.%
                OUT=$(CURDIR)/seabios/builds/$*/ all
 
 
-.PHONY: sgabios skiboot qboot
-sgabios:
-       $(MAKE) -C sgabios
-       cp sgabios/sgabios.bin ../pc-bios
-
+.PHONY: skiboot qboot
 
 pxerom: $(patsubst %,pxe-rom-%,$(pxerom_variants))
 
@@ -199,8 +194,6 @@ npcm7xx_bootrom:
 
 clean:
        rm -rf seabios/.config seabios/out seabios/builds
-       $(MAKE) -C sgabios clean
-       rm -f sgabios/.depend
        $(MAKE) -C ipxe/src veryclean
        $(MAKE) -C edk2/BaseTools clean
        $(MAKE) -C SLOF clean
diff --git a/roms/sgabios b/roms/sgabios
deleted file mode 160000 (submodule)
index cbaee52..0000000
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit cbaee52287e5f32373181cff50a00b6c4ac9015a
index e82b6481277f0cd6c64586f463426074fa4b0c0b..60e9b3107c238af13479a16cfac736bb2cabcfa0 100644 (file)
@@ -63,8 +63,8 @@ class ParamDecl:
 
 
 class FuncDecl:
-    def __init__(self, return_type: str, name: str, args: str,
-                 variant: str) -> None:
+    def __init__(self, wrapper_type: str, return_type: str, name: str,
+                 args: str, variant: str) -> None:
         self.return_type = return_type.strip()
         self.name = name.strip()
         self.struct_name = snake_to_camel(self.name)
@@ -72,8 +72,21 @@ class FuncDecl:
         self.create_only_co = 'mixed' not in variant
         self.graph_rdlock = 'bdrv_rdlock' in variant
 
-        subsystem, subname = self.name.split('_', 1)
-        self.co_name = f'{subsystem}_co_{subname}'
+        self.wrapper_type = wrapper_type
+
+        if wrapper_type == 'co':
+            subsystem, subname = self.name.split('_', 1)
+            self.target_name = f'{subsystem}_co_{subname}'
+        else:
+            assert wrapper_type == 'no_co'
+            subsystem, co_infix, subname = self.name.split('_', 2)
+            if co_infix != 'co':
+                raise ValueError(f"Invalid no_co function name: {self.name}")
+            if not self.create_only_co:
+                raise ValueError(f"no_co function can't be mixed: {self.name}")
+            if self.graph_rdlock:
+                raise ValueError(f"no_co function can't be rdlock: {self.name}")
+            self.target_name = f'{subsystem}_{subname}'
 
         t = self.args[0].type
         if t == 'BlockDriverState *':
@@ -105,7 +118,8 @@ class FuncDecl:
 
 # Match wrappers declared with a co_wrapper mark
 func_decl_re = re.compile(r'^(?P<return_type>[a-zA-Z][a-zA-Z0-9_]* [\*]?)'
-                          r'\s*co_wrapper'
+                          r'(\s*coroutine_fn)?'
+                          r'\s*(?P<wrapper_type>(no_)?co)_wrapper'
                           r'(?P<variant>(_[a-z][a-z0-9_]*)?)\s*'
                           r'(?P<wrapper_name>[a-z][a-z0-9_]*)'
                           r'\((?P<args>[^)]*)\);$', re.MULTILINE)
@@ -113,7 +127,8 @@ func_decl_re = re.compile(r'^(?P<return_type>[a-zA-Z][a-zA-Z0-9_]* [\*]?)'
 
 def func_decl_iter(text: str) -> Iterator:
     for m in func_decl_re.finditer(text):
-        yield FuncDecl(return_type=m.group('return_type'),
+        yield FuncDecl(wrapper_type=m.group('wrapper_type'),
+                       return_type=m.group('return_type'),
                        name=m.group('wrapper_name'),
                        args=m.group('args'),
                        variant=m.group('variant'))
@@ -133,7 +148,7 @@ def create_mixed_wrapper(func: FuncDecl) -> str:
     """
     Checks if we are already in coroutine
     """
-    name = func.co_name
+    name = func.target_name
     struct_name = func.struct_name
     graph_assume_lock = 'assume_graph_lock();' if func.graph_rdlock else ''
 
@@ -163,7 +178,7 @@ def create_co_wrapper(func: FuncDecl) -> str:
     """
     Assumes we are not in coroutine, and creates one
     """
-    name = func.co_name
+    name = func.target_name
     struct_name = func.struct_name
     return f"""\
 {func.return_type} {func.name}({ func.gen_list('{decl}') })
@@ -183,10 +198,11 @@ def create_co_wrapper(func: FuncDecl) -> str:
 }}"""
 
 
-def gen_wrapper(func: FuncDecl) -> str:
+def gen_co_wrapper(func: FuncDecl) -> str:
     assert not '_co_' in func.name
+    assert func.wrapper_type == 'co'
 
-    name = func.co_name
+    name = func.target_name
     struct_name = func.struct_name
 
     graph_lock=''
@@ -225,11 +241,56 @@ static void coroutine_fn {name}_entry(void *opaque)
 {creation_function(func)}"""
 
 
+def gen_no_co_wrapper(func: FuncDecl) -> str:
+    assert '_co_' in func.name
+    assert func.wrapper_type == 'no_co'
+
+    name = func.target_name
+    struct_name = func.struct_name
+
+    return f"""\
+/*
+ * Wrappers for {name}
+ */
+
+typedef struct {struct_name} {{
+    Coroutine *co;
+    {func.return_field}
+{ func.gen_block('    {decl};') }
+}} {struct_name};
+
+static void {name}_bh(void *opaque)
+{{
+    {struct_name} *s = opaque;
+
+    {func.get_result}{name}({ func.gen_list('s->{name}') });
+
+    aio_co_wake(s->co);
+}}
+
+{func.return_type} coroutine_fn {func.name}({ func.gen_list('{decl}') })
+{{
+    {struct_name} s = {{
+        .co = qemu_coroutine_self(),
+{ func.gen_block('        .{name} = {name},') }
+    }};
+    assert(qemu_in_coroutine());
+
+    aio_bh_schedule_oneshot(qemu_get_aio_context(), {name}_bh, &s);
+    qemu_coroutine_yield();
+
+    {func.ret}
+}}"""
+
+
 def gen_wrappers(input_code: str) -> str:
     res = ''
     for func in func_decl_iter(input_code):
         res += '\n\n\n'
-        res += gen_wrapper(func)
+        if func.wrapper_type == 'co':
+            res += gen_co_wrapper(func)
+        else:
+            res += gen_no_co_wrapper(func)
 
     return res
 
index 65eacf3c56bbb274b63099baddfe8942577077c3..6e8983f39cd6a4f0e80a696030307eed1211a2fc 100755 (executable)
 --disable-vhost-vdpa \
 --disable-virglrenderer \
 --disable-virtfs \
---disable-virtiofsd \
 --disable-vnc \
 --disable-vnc-jpeg \
 --disable-png \
 --enable-tpm \
 --enable-trace-backends=dtrace \
 --enable-usb-redir \
---enable-virtiofsd \
 --enable-vhost-kernel \
 --enable-vhost-net \
 --enable-vhost-user \
index 7aeecbcfb8235bd2cc38c4ca4a43b3557b03bf96..f403e4e7ec1d2c887016b6a5bfcd2004c4483cc9 100755 (executable)
 # * Require machine type "x-remote":
 #   - tests/avocado/multiprocess.py:Multiprocess.test_multiprocess_x86_64
 #
-# * Needs superuser privileges:
-#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_virtiofsd_set_up
-#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_launch_set_up
-#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_launch_set_up
-#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_mount_set_up
-#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_two_runs
-#
 # * Requires display type "egl-headless":
 #   - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_virtio_vga_virgl
 #   - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_vhost_user_vga_virgl
index 0e6ab4936e5daf83701a975b04fe244640781ca9..639dcee45a01c5251d0a662a3d867e512b65d780 100644 (file)
@@ -132,9 +132,6 @@ util
 xen
   ~ (/qemu)?(.*/xen.*)
 
-virtiofsd
-  ~ (/qemu)?(/tools/virtiofsd/.*)
-
 (headers)
   ~ (/qemu)?(/include/.*)
 
index 3e2b4785388ffbb815882beb1173173d4449f79f..a04dcc70a5b7a52d8989bd8e9b7bd593d139b153 100755 (executable)
@@ -61,7 +61,10 @@ LINE_WIDTH = 76
 
 # Convert the default value of an option to the string used in
 # the help message
-def value_to_help(value):
+def get_help(opt):
+    if opt["name"] == "libdir":
+        return 'system default'
+    value = opt["value"]
     if isinstance(value, list):
         return ",".join(value)
     if isinstance(value, bool):
@@ -88,7 +91,7 @@ def sh_print(line=""):
 def help_line(left, opt, indent, long):
     right = f'{opt["description"]}'
     if long:
-        value = value_to_help(opt["value"])
+        value = get_help(opt)
         if value != "auto" and value != "":
             right += f" [{value}]"
     if "choices" in opt and long:
index 0f71e92dcba6692f35c2a4f2d587f2875894aeca..5d969a94c08e63aa4ba30a2b9a0e03fb7dc8cd59 100644 (file)
@@ -49,7 +49,7 @@ meson_options_help() {
   printf "%s\n" '  --includedir=VALUE       Header file directory [include]'
   printf "%s\n" '  --interp-prefix=VALUE    where to find shared libraries etc., use %M for'
   printf "%s\n" '                           cpu name [/usr/gnemul/qemu-%M]'
-  printf "%s\n" '  --libdir=VALUE           Library directory [lib64]'
+  printf "%s\n" '  --libdir=VALUE           Library directory [system default]'
   printf "%s\n" '  --libexecdir=VALUE       Library executable directory [libexec]'
   printf "%s\n" '  --localedir=VALUE        Locale data directory [share/locale]'
   printf "%s\n" '  --localstatedir=VALUE    Localstate data directory [/var/local]'
@@ -70,6 +70,7 @@ meson_options_help() {
   printf "%s\n" '  attr            attr/xattr support'
   printf "%s\n" '  auth-pam        PAM access control'
   printf "%s\n" '  avx2            AVX2 optimizations'
+  printf "%s\n" '  avx512bw        AVX512BW optimizations'
   printf "%s\n" '  avx512f         AVX512F optimizations'
   printf "%s\n" '  blkio           libblkio block device driver'
   printf "%s\n" '  bochs           bochs image format support'
@@ -108,6 +109,7 @@ meson_options_help() {
   printf "%s\n" '  kvm             KVM acceleration support'
   printf "%s\n" '  l2tpv3          l2tpv3 network backend support'
   printf "%s\n" '  libdaxctl       libdaxctl support'
+  printf "%s\n" '  libdw           debuginfo support'
   printf "%s\n" '  libiscsi        libiscsi userspace initiator'
   printf "%s\n" '  libnfs          libnfs block device driver'
   printf "%s\n" '  libpmem         libpmem support'
@@ -173,7 +175,6 @@ meson_options_help() {
   printf "%s\n" '  vhost-vdpa      vhost-vdpa kernel backend support'
   printf "%s\n" '  virglrenderer   virgl rendering support'
   printf "%s\n" '  virtfs          virtio-9p support'
-  printf "%s\n" '  virtiofsd       build virtiofs daemon (virtiofsd)'
   printf "%s\n" '  vmnet           vmnet.framework network backend support'
   printf "%s\n" '  vnc             VNC server'
   printf "%s\n" '  vnc-jpeg        JPEG lossy compression for VNC server'
@@ -198,6 +199,8 @@ _meson_option_parse() {
     --disable-auth-pam) printf "%s" -Dauth_pam=disabled ;;
     --enable-avx2) printf "%s" -Davx2=enabled ;;
     --disable-avx2) printf "%s" -Davx2=disabled ;;
+    --enable-avx512bw) printf "%s" -Davx512bw=enabled ;;
+    --disable-avx512bw) printf "%s" -Davx512bw=disabled ;;
     --enable-avx512f) printf "%s" -Davx512f=enabled ;;
     --disable-avx512f) printf "%s" -Davx512f=disabled ;;
     --enable-gcov) printf "%s" -Db_coverage=true ;;
@@ -309,6 +312,8 @@ _meson_option_parse() {
     --enable-libdaxctl) printf "%s" -Dlibdaxctl=enabled ;;
     --disable-libdaxctl) printf "%s" -Dlibdaxctl=disabled ;;
     --libdir=*) quote_sh "-Dlibdir=$2" ;;
+    --enable-libdw) printf "%s" -Dlibdw=enabled ;;
+    --disable-libdw) printf "%s" -Dlibdw=disabled ;;
     --libexecdir=*) quote_sh "-Dlibexecdir=$2" ;;
     --enable-libiscsi) printf "%s" -Dlibiscsi=enabled ;;
     --disable-libiscsi) printf "%s" -Dlibiscsi=disabled ;;
@@ -455,8 +460,6 @@ _meson_option_parse() {
     --disable-virglrenderer) printf "%s" -Dvirglrenderer=disabled ;;
     --enable-virtfs) printf "%s" -Dvirtfs=enabled ;;
     --disable-virtfs) printf "%s" -Dvirtfs=disabled ;;
-    --enable-virtiofsd) printf "%s" -Dvirtiofsd=enabled ;;
-    --disable-virtiofsd) printf "%s" -Dvirtiofsd=disabled ;;
     --enable-vmnet) printf "%s" -Dvmnet=enabled ;;
     --disable-vmnet) printf "%s" -Dvmnet=disabled ;;
     --enable-vnc) printf "%s" -Dvnc=enabled ;;
index 6b158c68b84a4281ff7e5aadc2e6f84bd0df5198..a873ff6730924d2c1778b841bd49dfabea57a168 100644 (file)
@@ -1,2 +1,3 @@
 [flake8]
-extend-ignore = E722  # Prefer pylint's bare-except checks to flake8's
+# Prefer pylint's bare-except checks to flake8's
+extend-ignore = E722
index 5a1782b57eaee8251f832ab8578cd947ba961f8b..ca01ea6f4aade0fc145adacb34c1b32b1f366a18 100644 (file)
@@ -33,7 +33,6 @@ structures and contextual semantic validation.
 
 import re
 from typing import (
-    Collection,
     Dict,
     Iterable,
     List,
@@ -44,18 +43,10 @@ from typing import (
 
 from .common import c_name
 from .error import QAPISemError
-from .parser import QAPIDoc
+from .parser import QAPIExpression
 from .source import QAPISourceInfo
 
 
-# Deserialized JSON objects as returned by the parser.
-# The values of this mapping are not necessary to exhaustively type
-# here (and also not practical as long as mypy lacks recursive
-# types), because the purpose of this module is to interrogate that
-# type.
-_JSONObject = Dict[str, object]
-
-
 # See check_name_str(), below.
 valid_name = re.compile(r'(__[a-z0-9.-]+_)?'
                         r'(x-)?'
@@ -192,11 +183,11 @@ def check_defn_name_str(name: str, info: QAPISourceInfo, meta: str) -> None:
                 info, "%s name should not end in 'List'" % meta)
 
 
-def check_keys(value: _JSONObject,
+def check_keys(value: Dict[str, object],
                info: QAPISourceInfo,
                source: str,
-               required: Collection[str],
-               optional: Collection[str]) -> None:
+               required: List[str],
+               optional: List[str]) -> None:
     """
     Ensure that a dict has a specific set of keys.
 
@@ -229,12 +220,11 @@ def check_keys(value: _JSONObject,
                pprint(unknown), pprint(allowed)))
 
 
-def check_flags(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_flags(expr: QAPIExpression) -> None:
     """
     Ensure flag members (if present) have valid values.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError:
         When certain flags have an invalid value, or when
@@ -243,21 +233,22 @@ def check_flags(expr: _JSONObject, info: QAPISourceInfo) -> None:
     for key in ('gen', 'success-response'):
         if key in expr and expr[key] is not False:
             raise QAPISemError(
-                info, "flag '%s' may only use false value" % key)
+                expr.info, "flag '%s' may only use false value" % key)
     for key in ('boxed', 'allow-oob', 'allow-preconfig', 'coroutine'):
         if key in expr and expr[key] is not True:
             raise QAPISemError(
-                info, "flag '%s' may only use true value" % key)
+                expr.info, "flag '%s' may only use true value" % key)
     if 'allow-oob' in expr and 'coroutine' in expr:
         # This is not necessarily a fundamental incompatibility, but
         # we don't have a use case and the desired semantics isn't
         # obvious.  The simplest solution is to forbid it until we get
         # a use case for it.
-        raise QAPISemError(info, "flags 'allow-oob' and 'coroutine' "
-                                 "are incompatible")
+        raise QAPISemError(
+            expr.info, "flags 'allow-oob' and 'coroutine' are incompatible")
 
 
-def check_if(expr: _JSONObject, info: QAPISourceInfo, source: str) -> None:
+def check_if(expr: Dict[str, object],
+             info: QAPISourceInfo, source: str) -> None:
     """
     Validate the ``if`` member of an object.
 
@@ -447,12 +438,11 @@ def check_features(features: Optional[object],
         check_if(feat, info, source)
 
 
-def check_enum(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_enum(expr: QAPIExpression) -> None:
     """
     Normalize and validate this expression as an ``enum`` definition.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError: When ``expr`` is not a valid ``enum``.
     :return: None, ``expr`` is normalized in-place as needed.
@@ -460,6 +450,7 @@ def check_enum(expr: _JSONObject, info: QAPISourceInfo) -> None:
     name = expr['enum']
     members = expr['data']
     prefix = expr.get('prefix')
+    info = expr.info
 
     if not isinstance(members, list):
         raise QAPISemError(info, "'data' must be an array")
@@ -486,12 +477,11 @@ def check_enum(expr: _JSONObject, info: QAPISourceInfo) -> None:
         check_features(member.get('features'), info)
 
 
-def check_struct(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_struct(expr: QAPIExpression) -> None:
     """
     Normalize and validate this expression as a ``struct`` definition.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError: When ``expr`` is not a valid ``struct``.
     :return: None, ``expr`` is normalized in-place as needed.
@@ -499,16 +489,15 @@ def check_struct(expr: _JSONObject, info: QAPISourceInfo) -> None:
     name = cast(str, expr['struct'])  # Checked in check_exprs
     members = expr['data']
 
-    check_type(members, info, "'data'", allow_dict=name)
-    check_type(expr.get('base'), info, "'base'")
+    check_type(members, expr.info, "'data'", allow_dict=name)
+    check_type(expr.get('base'), expr.info, "'base'")
 
 
-def check_union(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_union(expr: QAPIExpression) -> None:
     """
     Normalize and validate this expression as a ``union`` definition.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError: when ``expr`` is not a valid ``union``.
     :return: None, ``expr`` is normalized in-place as needed.
@@ -517,6 +506,7 @@ def check_union(expr: _JSONObject, info: QAPISourceInfo) -> None:
     base = expr['base']
     discriminator = expr['discriminator']
     members = expr['data']
+    info = expr.info
 
     check_type(base, info, "'base'", allow_dict=name)
     check_name_is_str(discriminator, info, "'discriminator'")
@@ -531,17 +521,17 @@ def check_union(expr: _JSONObject, info: QAPISourceInfo) -> None:
         check_type(value['type'], info, source, allow_array=not base)
 
 
-def check_alternate(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_alternate(expr: QAPIExpression) -> None:
     """
     Normalize and validate this expression as an ``alternate`` definition.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError: When ``expr`` is not a valid ``alternate``.
     :return: None, ``expr`` is normalized in-place as needed.
     """
     members = expr['data']
+    info = expr.info
 
     if not members:
         raise QAPISemError(info, "'data' must not be empty")
@@ -557,12 +547,11 @@ def check_alternate(expr: _JSONObject, info: QAPISourceInfo) -> None:
         check_type(value['type'], info, source, allow_array=True)
 
 
-def check_command(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_command(expr: QAPIExpression) -> None:
     """
     Normalize and validate this expression as a ``command`` definition.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError: When ``expr`` is not a valid ``command``.
     :return: None, ``expr`` is normalized in-place as needed.
@@ -572,17 +561,16 @@ def check_command(expr: _JSONObject, info: QAPISourceInfo) -> None:
     boxed = expr.get('boxed', False)
 
     if boxed and args is None:
-        raise QAPISemError(info, "'boxed': true requires 'data'")
-    check_type(args, info, "'data'", allow_dict=not boxed)
-    check_type(rets, info, "'returns'", allow_array=True)
+        raise QAPISemError(expr.info, "'boxed': true requires 'data'")
+    check_type(args, expr.info, "'data'", allow_dict=not boxed)
+    check_type(rets, expr.info, "'returns'", allow_array=True)
 
 
-def check_event(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_event(expr: QAPIExpression) -> None:
     """
     Normalize and validate this expression as an ``event`` definition.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError: When ``expr`` is not a valid ``event``.
     :return: None, ``expr`` is normalized in-place as needed.
@@ -591,11 +579,11 @@ def check_event(expr: _JSONObject, info: QAPISourceInfo) -> None:
     boxed = expr.get('boxed', False)
 
     if boxed and args is None:
-        raise QAPISemError(info, "'boxed': true requires 'data'")
-    check_type(args, info, "'data'", allow_dict=not boxed)
+        raise QAPISemError(expr.info, "'boxed': true requires 'data'")
+    check_type(args, expr.info, "'data'", allow_dict=not boxed)
 
 
-def check_exprs(exprs: List[_JSONObject]) -> List[_JSONObject]:
+def check_exprs(exprs: List[QAPIExpression]) -> List[QAPIExpression]:
     """
     Validate and normalize a list of parsed QAPI schema expressions.
 
@@ -607,21 +595,9 @@ def check_exprs(exprs: List[_JSONObject]) -> List[_JSONObject]:
     :raise QAPISemError: When any expression fails validation.
     :return: The same list of expressions (now modified).
     """
-    for expr_elem in exprs:
-        # Expression
-        assert isinstance(expr_elem['expr'], dict)
-        for key in expr_elem['expr'].keys():
-            assert isinstance(key, str)
-        expr: _JSONObject = expr_elem['expr']
-
-        # QAPISourceInfo
-        assert isinstance(expr_elem['info'], QAPISourceInfo)
-        info: QAPISourceInfo = expr_elem['info']
-
-        # Optional[QAPIDoc]
-        tmp = expr_elem.get('doc')
-        assert tmp is None or isinstance(tmp, QAPIDoc)
-        doc: Optional[QAPIDoc] = tmp
+    for expr in exprs:
+        info = expr.info
+        doc = expr.doc
 
         if 'include' in expr:
             continue
@@ -653,24 +629,24 @@ def check_exprs(exprs: List[_JSONObject]) -> List[_JSONObject]:
         if meta == 'enum':
             check_keys(expr, info, meta,
                        ['enum', 'data'], ['if', 'features', 'prefix'])
-            check_enum(expr, info)
+            check_enum(expr)
         elif meta == 'union':
             check_keys(expr, info, meta,
                        ['union', 'base', 'discriminator', 'data'],
                        ['if', 'features'])
             normalize_members(expr.get('base'))
             normalize_members(expr['data'])
-            check_union(expr, info)
+            check_union(expr)
         elif meta == 'alternate':
             check_keys(expr, info, meta,
                        ['alternate', 'data'], ['if', 'features'])
             normalize_members(expr['data'])
-            check_alternate(expr, info)
+            check_alternate(expr)
         elif meta == 'struct':
             check_keys(expr, info, meta,
                        ['struct', 'data'], ['base', 'if', 'features'])
             normalize_members(expr['data'])
-            check_struct(expr, info)
+            check_struct(expr)
         elif meta == 'command':
             check_keys(expr, info, meta,
                        ['command'],
@@ -678,17 +654,17 @@ def check_exprs(exprs: List[_JSONObject]) -> List[_JSONObject]:
                         'gen', 'success-response', 'allow-oob',
                         'allow-preconfig', 'coroutine'])
             normalize_members(expr.get('data'))
-            check_command(expr, info)
+            check_command(expr)
         elif meta == 'event':
             check_keys(expr, info, meta,
                        ['event'], ['data', 'boxed', 'if', 'features'])
             normalize_members(expr.get('data'))
-            check_event(expr, info)
+            check_event(expr)
         else:
             assert False, 'unexpected meta type'
 
         check_if(expr, info, meta)
         check_features(expr.get('features'), info)
-        check_flags(expr, info)
+        check_flags(expr)
 
     return exprs
index 1b006cdc133be4b60fe54ceca8fa2d1f52bc6a71..878f90b458302d2112dd8d5d1d76b258c82687c2 100644 (file)
@@ -21,6 +21,7 @@ from typing import (
     TYPE_CHECKING,
     Dict,
     List,
+    Mapping,
     Optional,
     Set,
     Union,
@@ -37,15 +38,19 @@ if TYPE_CHECKING:
     from .schema import QAPISchemaFeature, QAPISchemaMember
 
 
-#: Represents a single Top Level QAPI schema expression.
-TopLevelExpr = Dict[str, object]
-
 # Return value alias for get_expr().
 _ExprValue = Union[List[object], Dict[str, object], str, bool]
 
-# FIXME: Consolidate and centralize definitions for TopLevelExpr,
-# _ExprValue, _JSONValue, and _JSONObject; currently scattered across
-# several modules.
+
+class QAPIExpression(Dict[str, object]):
+    # pylint: disable=too-few-public-methods
+    def __init__(self,
+                 data: Mapping[str, object],
+                 info: QAPISourceInfo,
+                 doc: Optional['QAPIDoc'] = None):
+        super().__init__(data)
+        self.info = info
+        self.doc: Optional['QAPIDoc'] = doc
 
 
 class QAPIParseError(QAPISourceError):
@@ -100,7 +105,7 @@ class QAPISchemaParser:
         self.line_pos = 0
 
         # Parser output:
-        self.exprs: List[Dict[str, object]] = []
+        self.exprs: List[QAPIExpression] = []
         self.docs: List[QAPIDoc] = []
 
         # Showtime!
@@ -147,8 +152,7 @@ class QAPISchemaParser:
                                        "value of 'include' must be a string")
                 incl_fname = os.path.join(os.path.dirname(self._fname),
                                           include)
-                self.exprs.append({'expr': {'include': incl_fname},
-                                   'info': info})
+                self._add_expr(OrderedDict({'include': incl_fname}), info)
                 exprs_include = self._include(include, info, incl_fname,
                                               self._included)
                 if exprs_include:
@@ -165,17 +169,18 @@ class QAPISchemaParser:
                 for name, value in pragma.items():
                     self._pragma(name, value, info)
             else:
-                expr_elem = {'expr': expr,
-                             'info': info}
-                if cur_doc:
-                    if not cur_doc.symbol:
-                        raise QAPISemError(
-                            cur_doc.info, "definition documentation required")
-                    expr_elem['doc'] = cur_doc
-                self.exprs.append(expr_elem)
+                if cur_doc and not cur_doc.symbol:
+                    raise QAPISemError(
+                        cur_doc.info, "definition documentation required")
+                self._add_expr(expr, info, cur_doc)
             cur_doc = None
         self.reject_expr_doc(cur_doc)
 
+    def _add_expr(self, expr: Mapping[str, object],
+                  info: QAPISourceInfo,
+                  doc: Optional['QAPIDoc'] = None) -> None:
+        self.exprs.append(QAPIExpression(expr, info, doc))
+
     @staticmethod
     def reject_expr_doc(doc: Optional['QAPIDoc']) -> None:
         if doc and doc.symbol:
@@ -784,7 +789,7 @@ class QAPIDoc:
                                % feature.name)
         self.features[feature.name].connect(feature)
 
-    def check_expr(self, expr: TopLevelExpr) -> None:
+    def check_expr(self, expr: QAPIExpression) -> None:
         if self.has_section('Returns') and 'command' not in expr:
             raise QAPISemError(self.info,
                                "'Returns:' is only valid for commands")
index a72462820308c0b72c5c070937332e9137514a2c..90546df534589f10e15c1e67353f40d79faf44df 100644 (file)
@@ -23,6 +23,7 @@ disable=fixme,
         too-many-statements,
         too-many-instance-attributes,
         consider-using-f-string,
+        useless-option-value,
 
 [REPORTS]
 
index cd8661125cd274878709a37648dce44b530aa1a6..207e4d71f39a6d16b3a46445d2d2516bc173cfeb 100644 (file)
@@ -17,7 +17,7 @@
 from collections import OrderedDict
 import os
 import re
-from typing import Optional
+from typing import List, Optional
 
 from .common import (
     POINTER_SUFFIX,
@@ -29,7 +29,7 @@ from .common import (
 )
 from .error import QAPIError, QAPISemError, QAPISourceError
 from .expr import check_exprs
-from .parser import QAPISchemaParser
+from .parser import QAPIExpression, QAPISchemaParser
 
 
 class QAPISchemaIfCond:
@@ -964,10 +964,11 @@ class QAPISchema:
         name = self._module_name(fname)
         return self._module_dict[name]
 
-    def _def_include(self, expr, info, doc):
+    def _def_include(self, expr: QAPIExpression):
         include = expr['include']
-        assert doc is None
-        self._def_entity(QAPISchemaInclude(self._make_module(include), info))
+        assert expr.doc is None
+        self._def_entity(
+            QAPISchemaInclude(self._make_module(include), expr.info))
 
     def _def_builtin_type(self, name, json_type, c_type):
         self._def_entity(QAPISchemaBuiltinType(name, json_type, c_type))
@@ -1045,14 +1046,15 @@ class QAPISchema:
                 name, info, None, ifcond, None, None, members, None))
         return name
 
-    def _def_enum_type(self, expr, info, doc):
+    def _def_enum_type(self, expr: QAPIExpression):
         name = expr['enum']
         data = expr['data']
         prefix = expr.get('prefix')
         ifcond = QAPISchemaIfCond(expr.get('if'))
+        info = expr.info
         features = self._make_features(expr.get('features'), info)
         self._def_entity(QAPISchemaEnumType(
-            name, info, doc, ifcond, features,
+            name, info, expr.doc, ifcond, features,
             self._make_enum_members(data, info), prefix))
 
     def _make_member(self, name, typ, ifcond, features, info):
@@ -1072,14 +1074,15 @@ class QAPISchema:
                                   value.get('features'), info)
                 for (key, value) in data.items()]
 
-    def _def_struct_type(self, expr, info, doc):
+    def _def_struct_type(self, expr: QAPIExpression):
         name = expr['struct']
         base = expr.get('base')
         data = expr['data']
+        info = expr.info
         ifcond = QAPISchemaIfCond(expr.get('if'))
         features = self._make_features(expr.get('features'), info)
         self._def_entity(QAPISchemaObjectType(
-            name, info, doc, ifcond, features, base,
+            name, info, expr.doc, ifcond, features, base,
             self._make_members(data, info),
             None))
 
@@ -1089,11 +1092,13 @@ class QAPISchema:
             typ = self._make_array_type(typ[0], info)
         return QAPISchemaVariant(case, info, typ, ifcond)
 
-    def _def_union_type(self, expr, info, doc):
+    def _def_union_type(self, expr: QAPIExpression):
         name = expr['union']
         base = expr['base']
         tag_name = expr['discriminator']
         data = expr['data']
+        assert isinstance(data, dict)
+        info = expr.info
         ifcond = QAPISchemaIfCond(expr.get('if'))
         features = self._make_features(expr.get('features'), info)
         if isinstance(base, dict):
@@ -1105,17 +1110,19 @@ class QAPISchema:
                                QAPISchemaIfCond(value.get('if')),
                                info)
             for (key, value) in data.items()]
-        members = []
+        members: List[QAPISchemaObjectTypeMember] = []
         self._def_entity(
-            QAPISchemaObjectType(name, info, doc, ifcond, features,
+            QAPISchemaObjectType(name, info, expr.doc, ifcond, features,
                                  base, members,
                                  QAPISchemaVariants(
                                      tag_name, info, None, variants)))
 
-    def _def_alternate_type(self, expr, info, doc):
+    def _def_alternate_type(self, expr: QAPIExpression):
         name = expr['alternate']
         data = expr['data']
+        assert isinstance(data, dict)
         ifcond = QAPISchemaIfCond(expr.get('if'))
+        info = expr.info
         features = self._make_features(expr.get('features'), info)
         variants = [
             self._make_variant(key, value['type'],
@@ -1124,11 +1131,11 @@ class QAPISchema:
             for (key, value) in data.items()]
         tag_member = QAPISchemaObjectTypeMember('type', info, 'QType', False)
         self._def_entity(
-            QAPISchemaAlternateType(name, info, doc, ifcond, features,
-                                    QAPISchemaVariants(
-                                        None, info, tag_member, variants)))
+            QAPISchemaAlternateType(
+                name, info, expr.doc, ifcond, features,
+                QAPISchemaVariants(None, info, tag_member, variants)))
 
-    def _def_command(self, expr, info, doc):
+    def _def_command(self, expr: QAPIExpression):
         name = expr['command']
         data = expr.get('data')
         rets = expr.get('returns')
@@ -1139,6 +1146,7 @@ class QAPISchema:
         allow_preconfig = expr.get('allow-preconfig', False)
         coroutine = expr.get('coroutine', False)
         ifcond = QAPISchemaIfCond(expr.get('if'))
+        info = expr.info
         features = self._make_features(expr.get('features'), info)
         if isinstance(data, OrderedDict):
             data = self._make_implicit_object_type(
@@ -1147,44 +1155,42 @@ class QAPISchema:
         if isinstance(rets, list):
             assert len(rets) == 1
             rets = self._make_array_type(rets[0], info)
-        self._def_entity(QAPISchemaCommand(name, info, doc, ifcond, features,
-                                           data, rets,
+        self._def_entity(QAPISchemaCommand(name, info, expr.doc, ifcond,
+                                           features, data, rets,
                                            gen, success_response,
                                            boxed, allow_oob, allow_preconfig,
                                            coroutine))
 
-    def _def_event(self, expr, info, doc):
+    def _def_event(self, expr: QAPIExpression):
         name = expr['event']
         data = expr.get('data')
         boxed = expr.get('boxed', False)
         ifcond = QAPISchemaIfCond(expr.get('if'))
+        info = expr.info
         features = self._make_features(expr.get('features'), info)
         if isinstance(data, OrderedDict):
             data = self._make_implicit_object_type(
                 name, info, ifcond,
                 'arg', self._make_members(data, info))
-        self._def_entity(QAPISchemaEvent(name, info, doc, ifcond, features,
-                                         data, boxed))
+        self._def_entity(QAPISchemaEvent(name, info, expr.doc, ifcond,
+                                         features, data, boxed))
 
     def _def_exprs(self, exprs):
-        for expr_elem in exprs:
-            expr = expr_elem['expr']
-            info = expr_elem['info']
-            doc = expr_elem.get('doc')
+        for expr in exprs:
             if 'enum' in expr:
-                self._def_enum_type(expr, info, doc)
+                self._def_enum_type(expr)
             elif 'struct' in expr:
-                self._def_struct_type(expr, info, doc)
+                self._def_struct_type(expr)
             elif 'union' in expr:
-                self._def_union_type(expr, info, doc)
+                self._def_union_type(expr)
             elif 'alternate' in expr:
-                self._def_alternate_type(expr, info, doc)
+                self._def_alternate_type(expr)
             elif 'command' in expr:
-                self._def_command(expr, info, doc)
+                self._def_command(expr)
             elif 'event' in expr:
-                self._def_event(expr, info, doc)
+                self._def_event(expr)
             elif 'include' in expr:
-                self._def_include(expr, info, doc)
+                self._def_include(expr)
             else:
                 assert False
 
index 7820fec54c4960c2ec3c54d6b35903f2f1462b75..2463964805648ed23b41284b68cc94033a89101b 100644 (file)
@@ -113,17 +113,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
 static void dma_blk_cb(void *opaque, int ret)
 {
     DMAAIOCB *dbs = (DMAAIOCB *)opaque;
+    AioContext *ctx = dbs->ctx;
     dma_addr_t cur_addr, cur_len;
     void *mem;
 
     trace_dma_blk_cb(dbs, ret);
 
+    aio_context_acquire(ctx);
     dbs->acb = NULL;
     dbs->offset += dbs->iov.size;
 
     if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
         dma_complete(dbs, ret);
-        return;
+        goto out;
     }
     dma_blk_unmap(dbs);
 
@@ -164,9 +166,9 @@ static void dma_blk_cb(void *opaque, int ret)
 
     if (dbs->iov.size == 0) {
         trace_dma_map_wait(dbs);
-        dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
+        dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
         cpu_register_map_client(dbs->bh);
-        return;
+        goto out;
     }
 
     if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
@@ -174,11 +176,11 @@ static void dma_blk_cb(void *opaque, int ret)
                                 QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
     }
 
-    aio_context_acquire(dbs->ctx);
     dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
                             dma_blk_cb, dbs, dbs->io_func_opaque);
-    aio_context_release(dbs->ctx);
     assert(dbs->acb);
+out:
+    aio_context_release(ctx);
 }
 
 static void dma_aio_cancel(BlockAIOCB *acb)
index d3e0ab4eda2650d5183e2b234bee929e94c0a08b..34bd2a33a76ae3b6e9edb35e6bead9578d6bed0e 100644 (file)
@@ -28,7 +28,6 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/cutils.h"
-#include "qapi/qmp/qerror.h"
 #include "qom/object_interfaces.h"
 #include CONFIG_DEVICES
 #ifdef CONFIG_PSERIES
index f7114bed7d51985f94ff6c8ed8ddfd9c8cf04b5e..4b2bf75dd678c3f847122ba9727745f687a1e15c 100644 (file)
@@ -152,11 +152,8 @@ void configure_rtc(QemuOpts *opts)
         if (!strcmp(value, "utc")) {
             rtc_base_type = RTC_BASE_UTC;
         } else if (!strcmp(value, "localtime")) {
-            Error *blocker = NULL;
             rtc_base_type = RTC_BASE_LOCALTIME;
-            error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED,
-                      "-rtc base=localtime");
-            replay_add_blocker(blocker);
+            replay_add_blocker("-rtc base=localtime");
         } else {
             rtc_base_type = RTC_BASE_DATETIME;
             configure_rtc_base_datetime(value);
index b2ee3fee3f0631b0fa58f299689e16a457beb6b0..6e526d95bbe0a21328f1d379523d7273ddeb761f 100644 (file)
@@ -1852,9 +1852,7 @@ static void qemu_apply_machine_options(QDict *qdict)
     }
 
     if (current_machine->smp.cpus > 1) {
-        Error *blocker = NULL;
-        error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp");
-        replay_add_blocker(blocker);
+        replay_add_blocker("smp");
     }
 }
 
@@ -2204,14 +2202,18 @@ static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp)
     int ret;
     bool qtest_with_kvm;
 
+    if (!acc) {
+        error_setg(errp, QERR_MISSING_PARAMETER, "accel");
+        goto bad;
+    }
+
     qtest_with_kvm = g_str_equal(acc, "kvm") && qtest_chrdev != NULL;
 
     if (!ac) {
-        *p_init_failed = true;
         if (!qtest_with_kvm) {
             error_report("invalid accelerator %s", acc);
         }
-        return 0;
+        goto bad;
     }
     accel = ACCEL(object_new_with_class(OBJECT_CLASS(ac)));
     object_apply_compat_props(OBJECT(accel));
@@ -2221,14 +2223,17 @@ static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp)
 
     ret = accel_init_machine(accel, current_machine);
     if (ret < 0) {
-        *p_init_failed = true;
         if (!qtest_with_kvm || ret != -ENOENT) {
             error_report("failed to initialize %s: %s", acc, strerror(-ret));
         }
-        return 0;
+        goto bad;
     }
 
     return 1;
+
+bad:
+    *p_init_failed = true;
+    return 0;
 }
 
 static void configure_accelerators(const char *progname)
@@ -2767,13 +2772,8 @@ void qemu_init(int argc, char **argv)
                 drive_add(IF_PFLASH, -1, optarg, PFLASH_OPTS);
                 break;
             case QEMU_OPTION_snapshot:
-                {
-                    Error *blocker = NULL;
-                    snapshot = 1;
-                    error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED,
-                               "-snapshot");
-                    replay_add_blocker(blocker);
-                }
+                snapshot = 1;
+                replay_add_blocker("-snapshot");
                 break;
             case QEMU_OPTION_numa:
                 opts = qemu_opts_parse_noisily(qemu_find_opts("numa"),
index 981585cbdf0e312533546425556fd37fa96e6f3a..7657467a5d7e187b2fc058bf4bbc7b632159df14 100644 (file)
@@ -45,7 +45,6 @@ stub_ss.add(files('target-get-monitor-def.c'))
 stub_ss.add(files('target-monitor-defs.c'))
 stub_ss.add(files('trace-control.c'))
 stub_ss.add(files('uuid.c'))
-stub_ss.add(files('vmgenid.c'))
 stub_ss.add(files('vmstate.c'))
 stub_ss.add(files('vm-stop.c'))
 stub_ss.add(files('win32-kbd-hook.c'))
diff --git a/stubs/vmgenid.c b/stubs/vmgenid.c
deleted file mode 100644 (file)
index bfad656..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qapi/qapi-commands-machine.h"
-#include "qapi/qmp/qerror.h"
-
-GuidInfo *qmp_query_vm_generation_id(Error **errp)
-{
-    error_setg(errp, QERR_UNSUPPORTED);
-    return NULL;
-}
index efcf9181b972cd11ee2f85e3f1ac5ddc3ecc207c..1ee64e99de807612cf5ab247cf7139c5234121d5 100644 (file)
@@ -120,6 +120,104 @@ enum {
     ARM_CP_SME                   = 1 << 19,
 };
 
+/*
+ * Interface for defining coprocessor registers.
+ * Registers are defined in tables of arm_cp_reginfo structs
+ * which are passed to define_arm_cp_regs().
+ */
+
+/*
+ * When looking up a coprocessor register we look for it
+ * via an integer which encodes all of:
+ *  coprocessor number
+ *  Crn, Crm, opc1, opc2 fields
+ *  32 or 64 bit register (ie is it accessed via MRC/MCR
+ *    or via MRRC/MCRR?)
+ *  non-secure/secure bank (AArch32 only)
+ * We allow 4 bits for opc1 because MRRC/MCRR have a 4 bit field.
+ * (In this case crn and opc2 should be zero.)
+ * For AArch64, there is no 32/64 bit size distinction;
+ * instead all registers have a 2 bit op0, 3 bit op1 and op2,
+ * and 4 bit CRn and CRm. The encoding patterns are chosen
+ * to be easy to convert to and from the KVM encodings, and also
+ * so that the hashtable can contain both AArch32 and AArch64
+ * registers (to allow for interprocessing where we might run
+ * 32 bit code on a 64 bit core).
+ */
+/*
+ * This bit is private to our hashtable cpreg; in KVM register
+ * IDs the AArch64/32 distinction is the KVM_REG_ARM/ARM64
+ * in the upper bits of the 64 bit ID.
+ */
+#define CP_REG_AA64_SHIFT 28
+#define CP_REG_AA64_MASK (1 << CP_REG_AA64_SHIFT)
+
+/*
+ * To enable banking of coprocessor registers depending on ns-bit we
+ * add a bit to distinguish between secure and non-secure cpregs in the
+ * hashtable.
+ */
+#define CP_REG_NS_SHIFT 29
+#define CP_REG_NS_MASK (1 << CP_REG_NS_SHIFT)
+
+#define ENCODE_CP_REG(cp, is64, ns, crn, crm, opc1, opc2)   \
+    ((ns) << CP_REG_NS_SHIFT | ((cp) << 16) | ((is64) << 15) |   \
+     ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2))
+
+#define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \
+    (CP_REG_AA64_MASK |                                 \
+     ((cp) << CP_REG_ARM_COPROC_SHIFT) |                \
+     ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) |         \
+     ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) |         \
+     ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) |         \
+     ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) |         \
+     ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT))
+
+/*
+ * Convert a full 64 bit KVM register ID to the truncated 32 bit
+ * version used as a key for the coprocessor register hashtable
+ */
+static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid)
+{
+    uint32_t cpregid = kvmid;
+    if ((kvmid & CP_REG_ARCH_MASK) == CP_REG_ARM64) {
+        cpregid |= CP_REG_AA64_MASK;
+    } else {
+        if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) {
+            cpregid |= (1 << 15);
+        }
+
+        /*
+         * KVM is always non-secure so add the NS flag on AArch32 register
+         * entries.
+         */
+         cpregid |= 1 << CP_REG_NS_SHIFT;
+    }
+    return cpregid;
+}
+
+/*
+ * Convert a truncated 32 bit hashtable key into the full
+ * 64 bit KVM register ID.
+ */
+static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
+{
+    uint64_t kvmid;
+
+    if (cpregid & CP_REG_AA64_MASK) {
+        kvmid = cpregid & ~CP_REG_AA64_MASK;
+        kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM64;
+    } else {
+        kvmid = cpregid & ~(1 << 15);
+        if (cpregid & (1 << 15)) {
+            kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM;
+        } else {
+            kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM;
+        }
+    }
+    return kvmid;
+}
+
 /*
  * Valid values for ARMCPRegInfo state field, indicating which of
  * the AArch32 and AArch64 execution states this register is visible in.
index 5f63316dbf22450c73c18f50ccbf8fb4a85dc88d..876ab8f3bf8a2cb813fcf463a9c40c775a62f797 100644 (file)
 #if !defined(CONFIG_USER_ONLY)
 #include "hw/loader.h"
 #include "hw/boards.h"
-#endif
+#ifdef CONFIG_TCG
+#include "hw/intc/armv7m_nvic.h"
+#endif /* CONFIG_TCG */
+#endif /* !CONFIG_USER_ONLY */
 #include "sysemu/tcg.h"
 #include "sysemu/qtest.h"
 #include "sysemu/hw_accel.h"
index 7bc97fece973ab3749e42b378b18feee75efd3a9..12b1082537c546ce5694191f9a2bd1596aca9267 100644 (file)
@@ -227,6 +227,8 @@ typedef struct CPUARMTBFlags {
 
 typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
 
+typedef struct NVICState NVICState;
+
 typedef struct CPUArchState {
     /* Regs for current mode.  */
     uint32_t regs[16];
@@ -721,11 +723,6 @@ typedef struct CPUArchState {
     ARMVectorReg zarray[ARM_MAX_VQ * 16];
 #endif
 
-#if defined(CONFIG_USER_ONLY)
-    /* For usermode syscall translation.  */
-    int eabi;
-#endif
-
     struct CPUBreakpoint *cpu_breakpoint[16];
     struct CPUWatchpoint *cpu_watchpoint[16];
 
@@ -772,10 +769,15 @@ typedef struct CPUArchState {
         uint32_t ctrl;
     } sau;
 
-    void *nvic;
+#if !defined(CONFIG_USER_ONLY)
+    NVICState *nvic;
     const struct arm_boot_info *boot_info;
     /* Store GICv3CPUState to access from this struct */
     void *gicv3state;
+#else /* CONFIG_USER_ONLY */
+    /* For usermode syscall translation.  */
+    bool eabi;
+#endif /* CONFIG_USER_ONLY */
 
 #ifdef TARGET_TAGGED_ADDRESSES
     /* Linux syscall tagged address support */
@@ -2557,220 +2559,6 @@ void arm_cpu_list(void);
 uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
                                  uint32_t cur_el, bool secure);
 
-/* Interface between CPU and Interrupt controller.  */
-#ifndef CONFIG_USER_ONLY
-bool armv7m_nvic_can_take_pending_exception(void *opaque);
-#else
-static inline bool armv7m_nvic_can_take_pending_exception(void *opaque)
-{
-    return true;
-}
-#endif
-/**
- * armv7m_nvic_set_pending: mark the specified exception as pending
- * @opaque: the NVIC
- * @irq: the exception number to mark pending
- * @secure: false for non-banked exceptions or for the nonsecure
- * version of a banked exception, true for the secure version of a banked
- * exception.
- *
- * Marks the specified exception as pending. Note that we will assert()
- * if @secure is true and @irq does not specify one of the fixed set
- * of architecturally banked exceptions.
- */
-void armv7m_nvic_set_pending(void *opaque, int irq, bool secure);
-/**
- * armv7m_nvic_set_pending_derived: mark this derived exception as pending
- * @opaque: the NVIC
- * @irq: the exception number to mark pending
- * @secure: false for non-banked exceptions or for the nonsecure
- * version of a banked exception, true for the secure version of a banked
- * exception.
- *
- * Similar to armv7m_nvic_set_pending(), but specifically for derived
- * exceptions (exceptions generated in the course of trying to take
- * a different exception).
- */
-void armv7m_nvic_set_pending_derived(void *opaque, int irq, bool secure);
-/**
- * armv7m_nvic_set_pending_lazyfp: mark this lazy FP exception as pending
- * @opaque: the NVIC
- * @irq: the exception number to mark pending
- * @secure: false for non-banked exceptions or for the nonsecure
- * version of a banked exception, true for the secure version of a banked
- * exception.
- *
- * Similar to armv7m_nvic_set_pending(), but specifically for exceptions
- * generated in the course of lazy stacking of FP registers.
- */
-void armv7m_nvic_set_pending_lazyfp(void *opaque, int irq, bool secure);
-/**
- * armv7m_nvic_get_pending_irq_info: return highest priority pending
- *    exception, and whether it targets Secure state
- * @opaque: the NVIC
- * @pirq: set to pending exception number
- * @ptargets_secure: set to whether pending exception targets Secure
- *
- * This function writes the number of the highest priority pending
- * exception (the one which would be made active by
- * armv7m_nvic_acknowledge_irq()) to @pirq, and sets @ptargets_secure
- * to true if the current highest priority pending exception should
- * be taken to Secure state, false for NS.
- */
-void armv7m_nvic_get_pending_irq_info(void *opaque, int *pirq,
-                                      bool *ptargets_secure);
-/**
- * armv7m_nvic_acknowledge_irq: make highest priority pending exception active
- * @opaque: the NVIC
- *
- * Move the current highest priority pending exception from the pending
- * state to the active state, and update v7m.exception to indicate that
- * it is the exception currently being handled.
- */
-void armv7m_nvic_acknowledge_irq(void *opaque);
-/**
- * armv7m_nvic_complete_irq: complete specified interrupt or exception
- * @opaque: the NVIC
- * @irq: the exception number to complete
- * @secure: true if this exception was secure
- *
- * Returns: -1 if the irq was not active
- *           1 if completing this irq brought us back to base (no active irqs)
- *           0 if there is still an irq active after this one was completed
- * (Ignoring -1, this is the same as the RETTOBASE value before completion.)
- */
-int armv7m_nvic_complete_irq(void *opaque, int irq, bool secure);
-/**
- * armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure)
- * @opaque: the NVIC
- * @irq: the exception number to mark pending
- * @secure: false for non-banked exceptions or for the nonsecure
- * version of a banked exception, true for the secure version of a banked
- * exception.
- *
- * Return whether an exception is "ready", i.e. whether the exception is
- * enabled and is configured at a priority which would allow it to
- * interrupt the current execution priority. This controls whether the
- * RDY bit for it in the FPCCR is set.
- */
-bool armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure);
-/**
- * armv7m_nvic_raw_execution_priority: return the raw execution priority
- * @opaque: the NVIC
- *
- * Returns: the raw execution priority as defined by the v8M architecture.
- * This is the execution priority minus the effects of AIRCR.PRIS,
- * and minus any PRIMASK/FAULTMASK/BASEPRI priority boosting.
- * (v8M ARM ARM I_PKLD.)
- */
-int armv7m_nvic_raw_execution_priority(void *opaque);
-/**
- * armv7m_nvic_neg_prio_requested: return true if the requested execution
- * priority is negative for the specified security state.
- * @opaque: the NVIC
- * @secure: the security state to test
- * This corresponds to the pseudocode IsReqExecPriNeg().
- */
-#ifndef CONFIG_USER_ONLY
-bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure);
-#else
-static inline bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure)
-{
-    return false;
-}
-#endif
-
-/* Interface for defining coprocessor registers.
- * Registers are defined in tables of arm_cp_reginfo structs
- * which are passed to define_arm_cp_regs().
- */
-
-/* When looking up a coprocessor register we look for it
- * via an integer which encodes all of:
- *  coprocessor number
- *  Crn, Crm, opc1, opc2 fields
- *  32 or 64 bit register (ie is it accessed via MRC/MCR
- *    or via MRRC/MCRR?)
- *  non-secure/secure bank (AArch32 only)
- * We allow 4 bits for opc1 because MRRC/MCRR have a 4 bit field.
- * (In this case crn and opc2 should be zero.)
- * For AArch64, there is no 32/64 bit size distinction;
- * instead all registers have a 2 bit op0, 3 bit op1 and op2,
- * and 4 bit CRn and CRm. The encoding patterns are chosen
- * to be easy to convert to and from the KVM encodings, and also
- * so that the hashtable can contain both AArch32 and AArch64
- * registers (to allow for interprocessing where we might run
- * 32 bit code on a 64 bit core).
- */
-/* This bit is private to our hashtable cpreg; in KVM register
- * IDs the AArch64/32 distinction is the KVM_REG_ARM/ARM64
- * in the upper bits of the 64 bit ID.
- */
-#define CP_REG_AA64_SHIFT 28
-#define CP_REG_AA64_MASK (1 << CP_REG_AA64_SHIFT)
-
-/* To enable banking of coprocessor registers depending on ns-bit we
- * add a bit to distinguish between secure and non-secure cpregs in the
- * hashtable.
- */
-#define CP_REG_NS_SHIFT 29
-#define CP_REG_NS_MASK (1 << CP_REG_NS_SHIFT)
-
-#define ENCODE_CP_REG(cp, is64, ns, crn, crm, opc1, opc2)   \
-    ((ns) << CP_REG_NS_SHIFT | ((cp) << 16) | ((is64) << 15) |   \
-     ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2))
-
-#define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \
-    (CP_REG_AA64_MASK |                                 \
-     ((cp) << CP_REG_ARM_COPROC_SHIFT) |                \
-     ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) |         \
-     ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) |         \
-     ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) |         \
-     ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) |         \
-     ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT))
-
-/* Convert a full 64 bit KVM register ID to the truncated 32 bit
- * version used as a key for the coprocessor register hashtable
- */
-static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid)
-{
-    uint32_t cpregid = kvmid;
-    if ((kvmid & CP_REG_ARCH_MASK) == CP_REG_ARM64) {
-        cpregid |= CP_REG_AA64_MASK;
-    } else {
-        if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) {
-            cpregid |= (1 << 15);
-        }
-
-        /* KVM is always non-secure so add the NS flag on AArch32 register
-         * entries.
-         */
-         cpregid |= 1 << CP_REG_NS_SHIFT;
-    }
-    return cpregid;
-}
-
-/* Convert a truncated 32 bit hashtable key into the full
- * 64 bit KVM register ID.
- */
-static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
-{
-    uint64_t kvmid;
-
-    if (cpregid & CP_REG_AA64_MASK) {
-        kvmid = cpregid & ~CP_REG_AA64_MASK;
-        kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM64;
-    } else {
-        kvmid = cpregid & ~(1 << 15);
-        if (cpregid & (1 << 15)) {
-            kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM;
-        } else {
-            kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM;
-        }
-    }
-    return kvmid;
-}
-
 /* Return the highest implemented Exception Level */
 static inline int arm_highest_el(CPUARMState *env)
 {
index ccde5080eb708bbddca51fde502c2dba58c1af85..df0c45e523b1860042975dda2ef0193457c130e0 100644 (file)
@@ -19,6 +19,9 @@
 #include "hw/boards.h"
 #endif
 #include "cpregs.h"
+#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
+#include "hw/intc/armv7m_nvic.h"
+#endif
 
 
 /* Share AArch32 -cpu max features with AArch64. */
index c62ed05c1229d047d85eb9e429f02ea47a84ef98..07d410036545fcb85c2a84866a83d41f396d16ba 100644 (file)
@@ -22,6 +22,7 @@
 #include "hw/irq.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/kvm.h"
+#include "sysemu/tcg.h"
 #include "qapi/qapi-commands-machine-target.h"
 #include "qapi/error.h"
 #include "qemu/guest-random.h"
@@ -7021,6 +7022,7 @@ static void define_pmu_regs(ARMCPU *cpu)
     }
 }
 
+#ifndef CONFIG_USER_ONLY
 /*
  * We don't know until after realize whether there's a GICv3
  * attached, and that is what registers the gicv3 sysregs.
@@ -7038,7 +7040,6 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri)
     return pfr1;
 }
 
-#ifndef CONFIG_USER_ONLY
 static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     ARMCPU *cpu = env_archcpu(env);
@@ -7998,8 +7999,16 @@ void register_cp_regs_for_features(ARMCPU *cpu)
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 1,
               .access = PL1_R, .type = ARM_CP_NO_RAW,
               .accessfn = access_aa32_tid3,
+#ifdef CONFIG_USER_ONLY
+              .type = ARM_CP_CONST,
+              .resetvalue = cpu->isar.id_pfr1,
+#else
+              .type = ARM_CP_NO_RAW,
+              .accessfn = access_aa32_tid3,
               .readfn = id_pfr1_read,
-              .writefn = arm_cp_write_ignore },
+              .writefn = arm_cp_write_ignore
+#endif
+            },
             { .name = "ID_DFR0", .state = ARM_CP_STATE_BOTH,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2,
               .access = PL1_R, .type = ARM_CP_CONST,
@@ -10818,11 +10827,13 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
     unsigned int cur_el = arm_current_el(env);
     int rt;
 
-    /*
-     * Note that new_el can never be 0.  If cur_el is 0, then
-     * el0_a64 is is_a64(), else el0_a64 is ignored.
-     */
-    aarch64_sve_change_el(env, cur_el, new_el, is_a64(env));
+    if (tcg_enabled()) {
+        /*
+         * Note that new_el can never be 0.  If cur_el is 0, then
+         * el0_a64 is is_a64(), else el0_a64 is ignored.
+         */
+        aarch64_sve_change_el(env, cur_el, new_el, is_a64(env));
+    }
 
     if (cur_el < new_el) {
         /*
@@ -11006,7 +11017,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
  * trapped to the hypervisor in KVM.
  */
 #ifdef CONFIG_TCG
-static void handle_semihosting(CPUState *cs)
+static void tcg_handle_semihosting(CPUState *cs)
 {
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
@@ -11055,7 +11066,7 @@ void arm_cpu_do_interrupt(CPUState *cs)
                       env->exception.syndrome);
     }
 
-    if (arm_is_psci_call(cpu, cs->exception_index)) {
+    if (tcg_enabled() && arm_is_psci_call(cpu, cs->exception_index)) {
         arm_handle_psci_call(cpu);
         qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n");
         return;
@@ -11068,7 +11079,7 @@ void arm_cpu_do_interrupt(CPUState *cs)
      */
 #ifdef CONFIG_TCG
     if (cs->exception_index == EXCP_SEMIHOST) {
-        handle_semihosting(cs);
+        tcg_handle_semihosting(cs);
         return;
     }
 #endif
index e1e018da4631b579916cec69c35b161dd60858f4..759b70c646f83dbcdcb30cb5b592a9dd7a950425 100644 (file)
@@ -597,20 +597,6 @@ static inline ARMMMUIdx core_to_aa64_mmu_idx(int mmu_idx)
 
 int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx);
 
-/*
- * Return the MMU index for a v7M CPU with all relevant information
- * manually specified.
- */
-ARMMMUIdx arm_v7m_mmu_idx_all(CPUARMState *env,
-                              bool secstate, bool priv, bool negpri);
-
-/*
- * Return the MMU index for a v7M CPU in the specified security and
- * privilege state.
- */
-ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
-                                                bool secstate, bool priv);
-
 /* Return the MMU index for a v7M CPU in the specified security state */
 ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate);
 
index e7e746ea182687c391b6b5b848ca17e9fd51b4b2..f94e87e7289fa6581e4b6d4960b37888f7b53d66 100644 (file)
@@ -18,6 +18,9 @@
 #include "exec/cpu_ldst.h"
 #include "semihosting/common-semi.h"
 #endif
+#if !defined(CONFIG_USER_ONLY)
+#include "hw/intc/armv7m_nvic.h"
+#endif
 
 static void v7m_msr_xpsr(CPUARMState *env, uint32_t mask,
                          uint32_t reg, uint32_t val)
@@ -150,7 +153,49 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op)
     return 0;
 }
 
-#else
+ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
+{
+    return ARMMMUIdx_MUser;
+}
+
+#else /* !CONFIG_USER_ONLY */
+
+static ARMMMUIdx arm_v7m_mmu_idx_all(CPUARMState *env,
+                                     bool secstate, bool priv, bool negpri)
+{
+    ARMMMUIdx mmu_idx = ARM_MMU_IDX_M;
+
+    if (priv) {
+        mmu_idx |= ARM_MMU_IDX_M_PRIV;
+    }
+
+    if (negpri) {
+        mmu_idx |= ARM_MMU_IDX_M_NEGPRI;
+    }
+
+    if (secstate) {
+        mmu_idx |= ARM_MMU_IDX_M_S;
+    }
+
+    return mmu_idx;
+}
+
+static ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
+                                                       bool secstate, bool priv)
+{
+    bool negpri = armv7m_nvic_neg_prio_requested(env->nvic, secstate);
+
+    return arm_v7m_mmu_idx_all(env, secstate, priv, negpri);
+}
+
+/* Return the MMU index for a v7M CPU in the specified security state */
+ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
+{
+    bool priv = arm_v7m_is_handler_mode(env) ||
+        !(env->v7m.control[secstate] & 1);
+
+    return arm_v7m_mmu_idx_for_secstate_and_priv(env, secstate, priv);
+}
 
 /*
  * What kind of stack write are we doing? This affects how exceptions
@@ -973,7 +1018,7 @@ static void v7m_update_fpccr(CPUARMState *env, uint32_t frameptr,
      * that we will need later in order to do lazy FP reg stacking.
      */
     bool is_secure = env->v7m.secure;
-    void *nvic = env->nvic;
+    NVICState *nvic = env->nvic;
     /*
      * Some bits are unbanked and live always in fpccr[M_REG_S]; some bits
      * are banked and we want to update the bit in the bank for the
@@ -2855,40 +2900,3 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op)
 }
 
 #endif /* !CONFIG_USER_ONLY */
-
-ARMMMUIdx arm_v7m_mmu_idx_all(CPUARMState *env,
-                              bool secstate, bool priv, bool negpri)
-{
-    ARMMMUIdx mmu_idx = ARM_MMU_IDX_M;
-
-    if (priv) {
-        mmu_idx |= ARM_MMU_IDX_M_PRIV;
-    }
-
-    if (negpri) {
-        mmu_idx |= ARM_MMU_IDX_M_NEGPRI;
-    }
-
-    if (secstate) {
-        mmu_idx |= ARM_MMU_IDX_M_S;
-    }
-
-    return mmu_idx;
-}
-
-ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
-                                                bool secstate, bool priv)
-{
-    bool negpri = armv7m_nvic_neg_prio_requested(env->nvic, secstate);
-
-    return arm_v7m_mmu_idx_all(env, secstate, priv, negpri);
-}
-
-/* Return the MMU index for a v7M CPU in the specified security state */
-ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
-{
-    bool priv = arm_v7m_is_handler_mode(env) ||
-        !(env->v7m.control[secstate] & 1);
-
-    return arm_v7m_mmu_idx_for_secstate_and_priv(env, secstate, priv);
-}
index 5f261526525635f90e88c6346e80957f2f879a03..b4c3850570cbb8bdedf441e11580f7b6499d1fa2 100644 (file)
@@ -839,6 +839,15 @@ static int cpu_post_load(void *opaque, int version_id)
         }
     }
 
+    /*
+     * Misaligned thumb pc is architecturally impossible. Fail the
+     * incoming migration. For TCG it would trigger the assert in
+     * thumb_tr_translate_insn().
+     */
+    if (!is_a64(env) && env->thumb && (env->regs[15] & 1)) {
+        return -1;
+    }
+
     hw_breakpoint_update_all(cpu);
     hw_watchpoint_update_all(cpu);
 
@@ -856,15 +865,6 @@ static int cpu_post_load(void *opaque, int version_id)
         }
     }
 
-    /*
-     * Misaligned thumb pc is architecturally impossible.
-     * We have an assert in thumb_tr_translate_insn to verify this.
-     * Fail an incoming migrate to avoid this assert.
-     */
-    if (!is_a64(env) && env->thumb && (env->regs[15] & 1)) {
-        return -1;
-    }
-
     if (!kvm_enabled()) {
         pmu_op_finish(&cpu->env);
     }
index c3a2cf6f28250f7d0a7cd68fa1b0d3d42ed09897..786971284ae7ea6a79339bc14b390bcc127db37f 100644 (file)
@@ -121,7 +121,9 @@ int x86_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
             return gdb_get_reg32(mem_buf, env->regs[gpr_map32[n]]);
         }
     } else if (n >= IDX_FP_REGS && n < IDX_FP_REGS + 8) {
-        floatx80 *fp = (floatx80 *) &env->fpregs[n - IDX_FP_REGS];
+        int st_index = n - IDX_FP_REGS;
+        int r_index = (st_index + env->fpstt) % 8;
+        floatx80 *fp = &env->fpregs[r_index].d;
         int len = gdb_get_reg64(mem_buf, cpu_to_le64(fp->low));
         len += gdb_get_reg16(mem_buf, cpu_to_le16(fp->high));
         return len;
index ad5b7b8bb5b87c6e06e2bcdd889668c1ffaba6ef..65128463270e141c919c314b54a1062cfc5b64ff 100644 (file)
@@ -28,7 +28,6 @@
 #include "monitor/hmp-target.h"
 #include "monitor/hmp.h"
 #include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qerror.h"
 #include "sysemu/kvm.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-misc-target.h"
index 7a29295d1edc74884b6efbd1b41c5aecdd2ba865..96e1c15cc3fa13b3d0dcf542632ab4074fee3501 100644 (file)
@@ -15,7 +15,6 @@
 #include "monitor/monitor.h"
 #include "monitor/hmp-target.h"
 #include "qapi/qapi-commands-misc-target.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/error.h"
 #include "sev.h"
 
index 32f7dbac4efa4c0a4a141a4f2962db5a497868a5..0ec970496e5dd4e49f7e7a3feef5c9a1ba39398e 100644 (file)
@@ -34,7 +34,6 @@
 #include "monitor/monitor.h"
 #include "monitor/hmp-target.h"
 #include "qapi/qapi-commands-misc-target.h"
-#include "qapi/qmp/qerror.h"
 #include "exec/confidential-guest-support.h"
 #include "hw/i386/pc.h"
 #include "exec/address-spaces.h"
index 7037ff91c612b3f03b151dbc2eec56558a9f75ca..e61ae9a2e92d2bab4de9b04278372a0c24a10a99 100644 (file)
@@ -1015,6 +1015,7 @@ VSIB_AVX(VPGATHERQ, vpgatherq)
 
 static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
 {
+    int opposite_cc_op;
     TCGv carry_in = NULL;
     TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
     TCGv zero;
@@ -1022,14 +1023,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
     if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) {
         /* Re-use the carry-out from a previous round.  */
         carry_in = carry_out;
-        cc_op = s->cc_op;
-    } else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) {
-        /* Merge with the carry-out from the opposite instruction.  */
-        cc_op = CC_OP_ADCOX;
-    }
-
-    /* If we don't have a carry-in, get it out of EFLAGS.  */
-    if (!carry_in) {
+    } else {
+        /* We don't have a carry-in, get it out of EFLAGS.  */
         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
             gen_compute_eflags(s);
         }
@@ -1042,6 +1037,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
 #ifdef TARGET_X86_64
     case MO_32:
         /* If TL is 64-bit just do everything in 64-bit arithmetic.  */
+        tcg_gen_ext32u_tl(s->T0, s->T0);
+        tcg_gen_ext32u_tl(s->T1, s->T1);
         tcg_gen_add_i64(s->T0, s->T0, s->T1);
         tcg_gen_add_i64(s->T0, s->T0, carry_in);
         tcg_gen_shri_i64(carry_out, s->T0, 32);
@@ -1053,7 +1050,14 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
         tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero);
         break;
     }
-    set_cc_op(s, cc_op);
+
+    opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX;
+    if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) {
+        /* Merge with the carry-out from the opposite instruction.  */
+        set_cc_op(s, CC_OP_ADCOX);
+    } else {
+        set_cc_op(s, cc_op);
+    }
 }
 
 static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
@@ -1078,30 +1082,30 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
-    TCGv bound, zero;
+    TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
+    TCGv zero = tcg_constant_tl(0);
+    TCGv mone = tcg_constant_tl(-1);
 
     /*
      * Extract START, and shift the operand.
      * Shifts larger than operand size get zeros.
      */
     tcg_gen_ext8u_tl(s->A0, s->T1);
+    if (TARGET_LONG_BITS == 64 && ot == MO_32) {
+        tcg_gen_ext32u_tl(s->T0, s->T0);
+    }
     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
 
-    bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
-    zero = tcg_constant_tl(0);
     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero);
 
     /*
-     * Extract the LEN into a mask.  Lengths larger than
-     * operand size get all ones.
+     * Extract the LEN into an inverse mask.  Lengths larger than
+     * operand size get all zeros, length 0 gets all ones.
      */
     tcg_gen_extract_tl(s->A0, s->T1, 8, 8);
-    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound);
-
-    tcg_gen_movi_tl(s->T1, 1);
-    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
-    tcg_gen_subi_tl(s->T1, s->T1, 1);
-    tcg_gen_and_tl(s->T0, s->T0, s->T1);
+    tcg_gen_shl_tl(s->T1, mone, s->A0);
+    tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero);
+    tcg_gen_andc_tl(s->T0, s->T0, s->T1);
 
     gen_op_update1_cc(s);
     set_cc_op(s, CC_OP_LOGICB + ot);
@@ -1111,6 +1115,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
 
+    tcg_gen_mov_tl(cpu_cc_src, s->T0);
     tcg_gen_neg_tl(s->T1, s->T0);
     tcg_gen_and_tl(s->T0, s->T0, s->T1);
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
@@ -1121,6 +1126,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
 {
     MemOp ot = decode->op[0].ot;
 
+    tcg_gen_mov_tl(cpu_cc_src, s->T0);
     tcg_gen_subi_tl(s->T1, s->T0, 1);
     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
@@ -1131,6 +1137,7 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
 
+    tcg_gen_mov_tl(cpu_cc_src, s->T0);
     tcg_gen_subi_tl(s->T1, s->T0, 1);
     tcg_gen_and_tl(s->T0, s->T0, s->T1);
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
index 817681f9b218138409cbddce04c1d3066efeb90b..a2d2f5c34090f463a923c968165867969234b635 100644 (file)
@@ -28,6 +28,7 @@
 #include "qemu/module.h"
 #include "hw/qdev-properties.h"
 #include "exec/exec-all.h"
+#include "exec/gdbstub.h"
 #include "fpu/softfloat-helpers.h"
 
 static const struct {
@@ -294,6 +295,9 @@ static void mb_cpu_initfn(Object *obj)
     CPUMBState *env = &cpu->env;
 
     cpu_set_cpustate_pointers(cpu);
+    gdb_register_coprocessor(CPU(cpu), mb_cpu_gdb_read_stack_protect,
+                             mb_cpu_gdb_write_stack_protect, 2,
+                             "microblaze-stack-protect.xml", 0);
 
     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
 
@@ -422,7 +426,8 @@ static void mb_cpu_class_init(ObjectClass *oc, void *data)
     cc->sysemu_ops = &mb_sysemu_ops;
 #endif
     device_class_set_props(dc, mb_properties);
-    cc->gdb_num_core_regs = 32 + 27;
+    cc->gdb_num_core_regs = 32 + 25;
+    cc->gdb_core_xml_file = "microblaze-core.xml";
 
     cc->disas_set_info = mb_disas_set_info;
     cc->tcg_ops = &mb_tcg_ops;
index 1e84dd8f47bbfaadf96b79e45867cc9bd0e14243..e541fbb0b36637710c60e0f10ada62d58452a9cf 100644 (file)
@@ -367,6 +367,8 @@ hwaddr mb_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
                                         MemTxAttrs *attrs);
 int mb_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int mb_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+int mb_cpu_gdb_read_stack_protect(CPUArchState *cpu, GByteArray *buf, int reg);
+int mb_cpu_gdb_write_stack_protect(CPUArchState *cpu, uint8_t *buf, int reg);
 
 static inline uint32_t mb_cpu_read_msr(const CPUMBState *env)
 {
index 2e6e070051ffa6b6df44c8dd33b94221259f773a..8143fcae88aee5f903d577b9f55c4e23d30f0de8 100644 (file)
@@ -39,8 +39,11 @@ enum {
     GDB_PVR0  = 32 + 6,
     GDB_PVR11 = 32 + 17,
     GDB_EDR   = 32 + 18,
-    GDB_SLR   = 32 + 25,
-    GDB_SHR   = 32 + 26,
+};
+
+enum {
+    GDB_SP_SHL,
+    GDB_SP_SHR,
 };
 
 int mb_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
@@ -83,16 +86,27 @@ int mb_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
     case GDB_EDR:
         val = env->edr;
         break;
-    case GDB_SLR:
+    default:
+        /* Other SRegs aren't modeled, so report a value of 0 */
+        val = 0;
+        break;
+    }
+    return gdb_get_reg32(mem_buf, val);
+}
+
+int mb_cpu_gdb_read_stack_protect(CPUMBState *env, GByteArray *mem_buf, int n)
+{
+    uint32_t val;
+
+    switch (n) {
+    case GDB_SP_SHL:
         val = env->slr;
         break;
-    case GDB_SHR:
+    case GDB_SP_SHR:
         val = env->shr;
         break;
     default:
-        /* Other SRegs aren't modeled, so report a value of 0 */
-        val = 0;
-        break;
+        return 0;
     }
     return gdb_get_reg32(mem_buf, val);
 }
@@ -135,12 +149,21 @@ int mb_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
     case GDB_EDR:
         env->edr = tmp;
         break;
-    case GDB_SLR:
-        env->slr = tmp;
+    }
+    return 4;
+}
+
+int mb_cpu_gdb_write_stack_protect(CPUMBState *env, uint8_t *mem_buf, int n)
+{
+    switch (n) {
+    case GDB_SP_SHL:
+        env->slr = ldl_p(mem_buf);
         break;
-    case GDB_SHR:
-        env->shr = tmp;
+    case GDB_SP_SHR:
+        env->shr = ldl_p(mem_buf);
         break;
+    default:
+        return 0;
     }
     return 4;
 }
index 25a546842fab23e79c24e2099d2a02551a894f24..a313e88c07e3ccf8bf027e98f8097c671b520a5f 100644 (file)
@@ -274,6 +274,10 @@ class QemuSystemTest(QemuBaseTest):
 
         super().setUp('qemu-system-')
 
+        accel_required = self._get_unique_tag_val('accel')
+        if accel_required:
+            self.require_accelerator(accel_required)
+
         self.machine = self.params.get('machine',
                                        default=self._get_unique_tag_val('machine'))
 
index b3e58fa309363e42ff1f57d53f6142c1d82ca393..fe0bb180d9075dd73aef336158de7df7337e0cb3 100644 (file)
@@ -58,52 +58,16 @@ class BootLinuxX8664(LinuxTest):
         self.launch_and_wait(set_up_ssh_connection=False)
 
 
-# For Aarch64 we only boot KVM tests in CI as the TCG tests are very
-# heavyweight. There are lighter weight distros which we use in the
-# machine_aarch64_virt.py tests.
+# For Aarch64 we only boot KVM tests in CI as booting the current
+# Fedora OS in TCG tests is very heavyweight. There are lighter weight
+# distros which we use in the machine_aarch64_virt.py tests.
 class BootLinuxAarch64(LinuxTest):
     """
     :avocado: tags=arch:aarch64
     :avocado: tags=machine:virt
-    :avocado: tags=machine:gic-version=2
     """
     timeout = 720
 
-    def add_common_args(self):
-        self.vm.add_args('-bios',
-                         os.path.join(BUILD_DIR, 'pc-bios',
-                                      'edk2-aarch64-code.fd'))
-        self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0')
-        self.vm.add_args('-object', 'rng-random,id=rng0,filename=/dev/urandom')
-
-    @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
-    def test_fedora_cloud_tcg_gicv2(self):
-        """
-        :avocado: tags=accel:tcg
-        :avocado: tags=cpu:max
-        :avocado: tags=device:gicv2
-        """
-        self.require_accelerator("tcg")
-        self.vm.add_args("-accel", "tcg")
-        self.vm.add_args("-cpu", "max,lpa2=off")
-        self.vm.add_args("-machine", "virt,gic-version=2")
-        self.add_common_args()
-        self.launch_and_wait(set_up_ssh_connection=False)
-
-    @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
-    def test_fedora_cloud_tcg_gicv3(self):
-        """
-        :avocado: tags=accel:tcg
-        :avocado: tags=cpu:max
-        :avocado: tags=device:gicv3
-        """
-        self.require_accelerator("tcg")
-        self.vm.add_args("-accel", "tcg")
-        self.vm.add_args("-cpu", "max,lpa2=off")
-        self.vm.add_args("-machine", "virt,gic-version=3")
-        self.add_common_args()
-        self.launch_and_wait(set_up_ssh_connection=False)
-
     def test_virt_kvm(self):
         """
         :avocado: tags=accel:kvm
@@ -112,7 +76,11 @@ class BootLinuxAarch64(LinuxTest):
         self.require_accelerator("kvm")
         self.vm.add_args("-accel", "kvm")
         self.vm.add_args("-machine", "virt,gic-version=host")
-        self.add_common_args()
+        self.vm.add_args('-bios',
+                         os.path.join(BUILD_DIR, 'pc-bios',
+                                      'edk2-aarch64-code.fd'))
+        self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0')
+        self.vm.add_args('-object', 'rng-random,id=rng0,filename=/dev/urandom')
         self.launch_and_wait(set_up_ssh_connection=False)
 
 
index be60f8cda9aa3da0f024e70ba5c3432a2bba9b99..574609bf43baa950d37bf5026f218302af1897f3 100644 (file)
@@ -997,6 +997,7 @@ class BootLinuxConsole(LinuxKernelTest):
 
     def test_aarch64_raspi3_atf(self):
         """
+        :avocado: tags=accel:tcg
         :avocado: tags=arch:aarch64
         :avocado: tags=machine:raspi3b
         :avocado: tags=cpu:cortex-a53
index c2b2ba2cf8724f9ab72fb349efceb6cf0340a6d8..25dab8dc00a3d26eed72b3db9d19f06615b0054d 100644 (file)
 
 import time
 import os
+import logging
 
 from avocado_qemu import QemuSystemTest
 from avocado_qemu import wait_for_console_pattern
 from avocado_qemu import exec_command
 from avocado_qemu import BUILD_DIR
+from avocado.utils import process
+from avocado.utils.path import find_command
 
 class Aarch64VirtMachine(QemuSystemTest):
     KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 '
@@ -65,16 +68,15 @@ class Aarch64VirtMachine(QemuSystemTest):
         self.wait_for_console_pattern('Welcome to Alpine Linux 3.16')
 
 
-    def test_aarch64_virt(self):
+    def common_aarch64_virt(self, machine):
         """
-        :avocado: tags=arch:aarch64
-        :avocado: tags=machine:virt
-        :avocado: tags=accel:tcg
-        :avocado: tags=cpu:max
+        Common code to launch basic virt machine with kernel+initrd
+        and a scratch disk.
         """
+        logger = logging.getLogger('aarch64_virt')
+
         kernel_url = ('https://fileserver.linaro.org/s/'
                       'z6B2ARM7DQT3HWN/download')
-
         kernel_hash = 'ed11daab50c151dde0e1e9c9cb8b2d9bd3215347'
         kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
 
@@ -83,13 +85,62 @@ class Aarch64VirtMachine(QemuSystemTest):
                                'console=ttyAMA0')
         self.require_accelerator("tcg")
         self.vm.add_args('-cpu', 'max,pauth-impdef=on',
+                         '-machine', machine,
                          '-accel', 'tcg',
                          '-kernel', kernel_path,
                          '-append', kernel_command_line)
+
+        # A RNG offers an easy way to generate a few IRQs
+        self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0')
+        self.vm.add_args('-object',
+                         'rng-random,id=rng0,filename=/dev/urandom')
+
+        # Also add a scratch block device
+        logger.info('creating scratch qcow2 image')
+        image_path = os.path.join(self.workdir, 'scratch.qcow2')
+        qemu_img = os.path.join(BUILD_DIR, 'qemu-img')
+        if not os.path.exists(qemu_img):
+            qemu_img = find_command('qemu-img', False)
+        if qemu_img is False:
+            self.cancel('Could not find "qemu-img", which is required to '
+                        'create the temporary qcow2 image')
+        cmd = '%s create -f qcow2 %s 8M' % (qemu_img, image_path)
+        process.run(cmd)
+
+        # Add the device
+        self.vm.add_args('-blockdev',
+                         f"driver=qcow2,file.driver=file,file.filename={image_path},node-name=scratch")
+        self.vm.add_args('-device',
+                         'virtio-blk-device,drive=scratch')
+
         self.vm.launch()
         self.wait_for_console_pattern('Welcome to Buildroot')
         time.sleep(0.1)
         exec_command(self, 'root')
         time.sleep(0.1)
+        exec_command(self, 'dd if=/dev/hwrng of=/dev/vda bs=512 count=4')
+        time.sleep(0.1)
+        exec_command(self, 'md5sum /dev/vda')
+        time.sleep(0.1)
+        exec_command(self, 'cat /proc/interrupts')
+        time.sleep(0.1)
         exec_command(self, 'cat /proc/self/maps')
         time.sleep(0.1)
+
+    def test_aarch64_virt_gicv3(self):
+        """
+        :avocado: tags=arch:aarch64
+        :avocado: tags=machine:virt
+        :avocado: tags=accel:tcg
+        :avocado: tags=cpu:max
+        """
+        self.common_aarch64_virt("virt,gic_version=3")
+
+    def test_aarch64_virt_gicv2(self):
+        """
+        :avocado: tags=arch:aarch64
+        :avocado: tags=machine:virt
+        :avocado: tags=accel:tcg
+        :avocado: tags=cpu:max
+        """
+        self.common_aarch64_virt("virt,gic-version=2")
index d2921e70c3b4bc472684b430b9eb85ad6101bf7e..680c314cfccfd668bd95ff1fa1991251cda9c4a8 100644 (file)
@@ -173,6 +173,10 @@ class ReverseDebugging(LinuxKernelTest):
         vm.shutdown()
 
 class ReverseDebugging_X86_64(ReverseDebugging):
+    """
+    :avocado: tags=accel:tcg
+    """
+
     REG_PC = 0x10
     REG_CS = 0x12
     def get_pc(self, g):
@@ -190,6 +194,10 @@ class ReverseDebugging_X86_64(ReverseDebugging):
         self.reverse_debugging()
 
 class ReverseDebugging_AArch64(ReverseDebugging):
+    """
+    :avocado: tags=accel:tcg
+    """
+
     REG_PC = 32
 
     # unidentified gitlab timeout problem
diff --git a/tests/avocado/virtiofs_submounts.py b/tests/avocado/virtiofs_submounts.py
deleted file mode 100644 (file)
index e6dc32f..0000000
+++ /dev/null
@@ -1,217 +0,0 @@
-import logging
-import re
-import os
-import subprocess
-import time
-
-from avocado import skipUnless
-from avocado_qemu import LinuxTest, BUILD_DIR
-from avocado_qemu import has_cmds
-from avocado_qemu import run_cmd
-from avocado_qemu import wait_for_console_pattern
-from avocado.utils import ssh
-
-
-class VirtiofsSubmountsTest(LinuxTest):
-    """
-    :avocado: tags=arch:x86_64
-    :avocado: tags=accel:kvm
-    """
-
-    def run(self, args, ignore_error=False):
-        stdout, stderr, ret = run_cmd(args)
-
-        if ret != 0:
-            cmdline = ' '.join(args)
-            if not ignore_error:
-                self.fail(f'{cmdline}: Returned {ret}: {stderr}')
-            else:
-                self.log.warn(f'{cmdline}: Returned {ret}: {stderr}')
-
-        return (stdout, stderr, ret)
-
-    def set_up_shared_dir(self):
-        self.shared_dir = os.path.join(self.workdir, 'virtiofs-shared')
-
-        os.mkdir(self.shared_dir)
-
-        self.run(('cp', self.get_data('guest.sh'),
-                 os.path.join(self.shared_dir, 'check.sh')))
-
-        self.run(('cp', self.get_data('guest-cleanup.sh'),
-                 os.path.join(self.shared_dir, 'cleanup.sh')))
-
-    def set_up_virtiofs(self):
-        attmp = os.getenv('AVOCADO_TESTS_COMMON_TMPDIR')
-        self.vfsdsock = os.path.join(attmp, 'vfsdsock')
-
-        self.run(('sudo', '-n', 'rm', '-f', self.vfsdsock), ignore_error=True)
-
-        self.virtiofsd = \
-            subprocess.Popen(('sudo', '-n',
-                              'tools/virtiofsd/virtiofsd',
-                              f'--socket-path={self.vfsdsock}',
-                              '-o', f'source={self.shared_dir}',
-                              '-o', 'cache=always',
-                              '-o', 'xattr',
-                              '-o', 'announce_submounts',
-                              '-f'),
-                             stdout=subprocess.DEVNULL,
-                             stderr=subprocess.PIPE,
-                             universal_newlines=True)
-
-        while not os.path.exists(self.vfsdsock):
-            if self.virtiofsd.poll() is not None:
-                self.fail('virtiofsd exited prematurely: ' +
-                          self.virtiofsd.communicate()[1])
-            time.sleep(0.1)
-
-        self.run(('sudo', '-n', 'chmod', 'go+rw', self.vfsdsock))
-
-        self.vm.add_args('-chardev',
-                         f'socket,id=vfsdsock,path={self.vfsdsock}',
-                         '-device',
-                         'vhost-user-fs-pci,queue-size=1024,chardev=vfsdsock' \
-                             ',tag=host',
-                         '-object',
-                         'memory-backend-file,id=mem,size=1G,' \
-                             'mem-path=/dev/shm,share=on',
-                         '-numa',
-                         'node,memdev=mem')
-
-    def set_up_nested_mounts(self):
-        scratch_dir = os.path.join(self.shared_dir, 'scratch')
-        try:
-            os.mkdir(scratch_dir)
-        except FileExistsError:
-            pass
-
-        args = ['bash', self.get_data('host.sh'), scratch_dir]
-        if self.seed:
-            args += [self.seed]
-
-        out, _, _ = self.run(args)
-        seed = re.search(r'^Seed: \d+', out)
-        self.log.info(seed[0])
-
-    def mount_in_guest(self):
-        self.ssh_command('mkdir -p /mnt/host')
-        self.ssh_command('mount -t virtiofs host /mnt/host')
-
-    def check_in_guest(self):
-        self.ssh_command('bash /mnt/host/check.sh /mnt/host/scratch/share')
-
-    def live_cleanup(self):
-        self.ssh_command('bash /mnt/host/cleanup.sh /mnt/host/scratch')
-
-        # It would be nice if the above was sufficient to make virtiofsd clear
-        # all references to the mounted directories (so they can be unmounted
-        # on the host), but unfortunately it is not.  To do so, we have to
-        # resort to a remount.
-        self.ssh_command('mount -o remount /mnt/host')
-
-        scratch_dir = os.path.join(self.shared_dir, 'scratch')
-        self.run(('bash', self.get_data('cleanup.sh'), scratch_dir))
-
-    @skipUnless(*has_cmds(('sudo -n', ('sudo', '-n', 'true')),
-                          'ssh-keygen', 'bash', 'losetup', 'mkfs.xfs', 'mount'))
-    def setUp(self):
-        vmlinuz = self.params.get('vmlinuz')
-        if vmlinuz is None:
-            """
-            The Linux kernel supports FUSE auto-submounts only as of 5.10.
-            boot_linux.py currently provides Fedora 31, whose kernel is too
-            old, so this test cannot pass with the on-image kernel (you are
-            welcome to try, hence the option to force such a test with
-            -p vmlinuz='').  Therefore, for now the user must provide a
-            sufficiently new custom kernel, or effectively explicitly
-            request failure with -p vmlinuz=''.
-            Once an image with a sufficiently new kernel is available
-            (probably Fedora 34), we can make -p vmlinuz='' the default, so
-            that this parameter no longer needs to be specified.
-            """
-            self.cancel('vmlinuz parameter not set; you must point it to a '
-                        'Linux kernel binary to test (to run this test with ' \
-                        'the on-image kernel, set it to an empty string)')
-
-        self.seed = self.params.get('seed')
-
-        self.ssh_key = os.path.join(self.workdir, 'id_ed25519')
-
-        self.run(('ssh-keygen', '-N', '', '-t', 'ed25519', '-f', self.ssh_key))
-
-        pubkey = self.ssh_key + '.pub'
-
-        super(VirtiofsSubmountsTest, self).setUp(pubkey)
-
-        if vmlinuz:
-            self.vm.add_args('-kernel', vmlinuz,
-                             '-append', 'console=ttyS0 root=/dev/sda1')
-
-        self.require_accelerator("kvm")
-        self.vm.add_args('-accel', 'kvm')
-
-    def tearDown(self):
-        try:
-            self.vm.shutdown()
-        except:
-            pass
-
-        scratch_dir = os.path.join(self.shared_dir, 'scratch')
-        self.run(('bash', self.get_data('cleanup.sh'), scratch_dir),
-                 ignore_error=True)
-
-    def test_pre_virtiofsd_set_up(self):
-        self.set_up_shared_dir()
-
-        self.set_up_nested_mounts()
-
-        self.set_up_virtiofs()
-        self.launch_and_wait()
-        self.mount_in_guest()
-        self.check_in_guest()
-
-    def test_pre_launch_set_up(self):
-        self.set_up_shared_dir()
-        self.set_up_virtiofs()
-
-        self.set_up_nested_mounts()
-
-        self.launch_and_wait()
-        self.mount_in_guest()
-        self.check_in_guest()
-
-    def test_post_launch_set_up(self):
-        self.set_up_shared_dir()
-        self.set_up_virtiofs()
-        self.launch_and_wait()
-
-        self.set_up_nested_mounts()
-
-        self.mount_in_guest()
-        self.check_in_guest()
-
-    def test_post_mount_set_up(self):
-        self.set_up_shared_dir()
-        self.set_up_virtiofs()
-        self.launch_and_wait()
-        self.mount_in_guest()
-
-        self.set_up_nested_mounts()
-
-        self.check_in_guest()
-
-    def test_two_runs(self):
-        self.set_up_shared_dir()
-
-        self.set_up_nested_mounts()
-
-        self.set_up_virtiofs()
-        self.launch_and_wait()
-        self.mount_in_guest()
-        self.check_in_guest()
-
-        self.live_cleanup()
-        self.set_up_nested_mounts()
-
-        self.check_in_guest()
index 279a8fcc33903caaa9dcd9efe0cd63f5c7a037a8..7477a1f40101ec9284296882d5e05d8e89b6f47e 100644 (file)
@@ -3,6 +3,12 @@ qht_bench = executable('qht-bench',
                        sources: 'qht-bench.c',
                        dependencies: [qemuutil])
 
+if have_system
+xbzrle_bench = executable('xbzrle-bench',
+                       sources: 'xbzrle-bench.c',
+                       dependencies: [qemuutil,migration])
+endif
+
 executable('atomic_add-bench',
            sources: files('atomic_add-bench.c'),
            dependencies: [qemuutil],
diff --git a/tests/bench/xbzrle-bench.c b/tests/bench/xbzrle-bench.c
new file mode 100644 (file)
index 0000000..8848a3a
--- /dev/null
@@ -0,0 +1,469 @@
+/*
+ * Xor Based Zero Run Length Encoding unit tests.
+ *
+ * Copyright 2013 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *  Orit Wasserman  <owasserm@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "../migration/xbzrle.h"
+
+#if defined(CONFIG_AVX512BW_OPT)
+#define XBZRLE_PAGE_SIZE 4096
+static bool is_cpu_support_avx512bw;
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+    unsigned max = __get_cpuid_max(0, NULL);
+    int a, b, c, d;
+    is_cpu_support_avx512bw = false;
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+         /* We must check that AVX is not just available, but usable.  */
+        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+            int bv;
+            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+            __cpuid_count(7, 0, a, b, c, d);
+           /* 0xe6:
+            *  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+            *                    and ZMM16-ZMM31 state are enabled by OS)
+            *  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+            */
+            if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+                is_cpu_support_avx512bw = true;
+            }
+        }
+    }
+    return ;
+}
+
+struct ResTime {
+    float t_raw;
+    float t_512;
+};
+
+
+/* Function prototypes
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+                                uint8_t *dst, int dlen);
+*/
+static void encode_decode_zero(struct ResTime *res)
+{
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0;
+    int dlen = 0, dlen512 = 0;
+    int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
+
+    for (i = diff_len; i > 0; i--) {
+        buffer[1000 + i] = i;
+        buffer512[1000 + i] = i;
+    }
+
+    buffer[1000 + diff_len + 3] = 103;
+    buffer[1000 + diff_len + 5] = 105;
+
+    buffer512[1000 + diff_len + 3] = 103;
+    buffer512[1000 + diff_len + 5] = 105;
+
+    /* encode zero page */
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
+                       XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    g_assert(dlen == 0);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(buffer512, buffer512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    g_assert(dlen512 == 0);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(buffer512);
+    g_free(compressed512);
+
+}
+
+static void test_encode_decode_zero_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_zero(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Zero test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_unchanged(struct ResTime *res)
+{
+    uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0;
+    int dlen = 0, dlen512 = 0;
+    int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
+
+    for (i = diff_len; i > 0; i--) {
+        test[1000 + i] = i + 4;
+        test512[1000 + i] = i + 4;
+    }
+
+    test[1000 + diff_len + 3] = 107;
+    test[1000 + diff_len + 5] = 109;
+
+    test512[1000 + diff_len + 3] = 107;
+    test512[1000 + diff_len + 5] = 109;
+
+    /* test unchanged buffer */
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed,
+                                XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    g_assert(dlen == 0);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(test512, test512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    g_assert(dlen512 == 0);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(test);
+    g_free(compressed);
+    g_free(test512);
+    g_free(compressed512);
+
+}
+
+static void test_encode_decode_unchanged_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_unchanged(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Unchanged test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_1_byte(struct ResTime *res)
+{
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
+    int dlen = 0, rc = 0, dlen512 = 0, rc512 = 0;
+    uint8_t buf[2];
+    uint8_t buf512[2];
+
+    test[XBZRLE_PAGE_SIZE - 1] = 1;
+    test512[XBZRLE_PAGE_SIZE - 1] = 1;
+
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+                       XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
+
+    rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE);
+    g_assert(rc == XBZRLE_PAGE_SIZE);
+    g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    g_assert(dlen512 == (uleb128_encode_small(&buf512[0], 4095) + 2));
+
+    rc512 = xbzrle_decode_buffer(compressed512, dlen512, buffer512,
+                                 XBZRLE_PAGE_SIZE);
+    g_assert(rc512 == XBZRLE_PAGE_SIZE);
+    g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(test);
+    g_free(buffer512);
+    g_free(compressed512);
+    g_free(test512);
+
+}
+
+static void test_encode_decode_1_byte_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_1_byte(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("1 byte test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_overflow(struct ResTime *res)
+{
+    uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0, rc = 0, rc512 = 0;
+
+    for (i = 0; i < XBZRLE_PAGE_SIZE / 2 - 1; i++) {
+        test[i * 2] = 1;
+        test512[i * 2] = 1;
+    }
+
+    /* encode overflow */
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+                              XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    g_assert(rc == -1);
+
+    t_start512 = clock();
+    rc512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
+                                     compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    g_assert(rc512 == -1);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(test);
+    g_free(buffer512);
+    g_free(compressed512);
+    g_free(test512);
+
+}
+
+static void test_encode_decode_overflow_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_overflow(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Overflow test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_range_avx512(struct ResTime *res)
+{
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0, rc = 0, rc512 = 0;
+    int dlen = 0, dlen512 = 0;
+
+    int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
+
+    for (i = diff_len; i > 0; i--) {
+        buffer[1000 + i] = i;
+        test[1000 + i] = i + 4;
+        buffer512[1000 + i] = i;
+        test512[1000 + i] = i + 4;
+    }
+
+    buffer[1000 + diff_len + 3] = 103;
+    test[1000 + diff_len + 3] = 107;
+
+    buffer[1000 + diff_len + 5] = 105;
+    test[1000 + diff_len + 5] = 109;
+
+    buffer512[1000 + diff_len + 3] = 103;
+    test512[1000 + diff_len + 3] = 107;
+
+    buffer512[1000 + diff_len + 5] = 105;
+    test512[1000 + diff_len + 5] = 109;
+
+    /* test encode/decode */
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
+                                XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
+    g_assert(rc < XBZRLE_PAGE_SIZE);
+    g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
+    g_assert(rc512 < XBZRLE_PAGE_SIZE);
+    g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(test);
+    g_free(buffer512);
+    g_free(compressed512);
+    g_free(test512);
+
+}
+
+static void test_encode_decode_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_range_avx512(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Encode decode test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_random(struct ResTime *res)
+{
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0, rc = 0, rc512 = 0;
+    int dlen = 0, dlen512 = 0;
+
+    int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
+    /* store the index of diff */
+    int dirty_index[diff_len];
+    for (int j = 0; j < diff_len; j++) {
+        dirty_index[j] = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
+    }
+    for (i = diff_len - 1; i >= 0; i--) {
+        buffer[dirty_index[i]] = i;
+        test[dirty_index[i]] = i + 4;
+        buffer512[dirty_index[i]] = i;
+        test512[dirty_index[i]] = i + 4;
+    }
+
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
+                                XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
+    g_assert(rc < XBZRLE_PAGE_SIZE);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
+    g_assert(rc512 < XBZRLE_PAGE_SIZE);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(test);
+    g_free(buffer512);
+    g_free(compressed512);
+    g_free(test512);
+
+}
+
+static void test_encode_decode_random_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_random(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Random test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+#endif
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    g_test_rand_int();
+    #if defined(CONFIG_AVX512BW_OPT)
+    if (likely(is_cpu_support_avx512bw)) {
+        g_test_add_func("/xbzrle/encode_decode_zero", test_encode_decode_zero_avx512);
+        g_test_add_func("/xbzrle/encode_decode_unchanged",
+                        test_encode_decode_unchanged_avx512);
+        g_test_add_func("/xbzrle/encode_decode_1_byte", test_encode_decode_1_byte_avx512);
+        g_test_add_func("/xbzrle/encode_decode_overflow",
+                        test_encode_decode_overflow_avx512);
+        g_test_add_func("/xbzrle/encode_decode", test_encode_decode_avx512);
+        g_test_add_func("/xbzrle/encode_decode_random", test_encode_decode_random_avx512);
+    }
+    #endif
+    return g_test_run();
+}
index 56d88417df4d11d96608c5b03171d80c6f8bf873..175c10a34e89108e35b0523a56baa71938e3f380 100644 (file)
@@ -7,7 +7,6 @@ MAINTAINER John Snow <jsnow@redhat.com>
 ENV PACKAGES \
     gcc \
     make \
-    pipenv \
     python3 \
     python3-pip \
     python3-tox \
index cc06fac5925cdbbbfb78a14968e925cac3c67916..e69d16a62c8b1d2fbadbf2fcf81ce0813dc1cca8 100644 (file)
@@ -337,7 +337,7 @@ class Engine(object):
         argv.extend(self._get_qemu_serial_args())
 
         if self._debug:
-            argv.extend(["-device", "sga"])
+            argv.extend(["-machine", "graphics=off"])
 
         if hardware._prealloc_pages:
             argv_source += ["-mem-path", "/dev/shm",
index 072e54e62bd2c2f73d3ae3f2abf4744cc64f3cb4..eaf13c7a3340623513bb81d95b0e56a251bb137f 100755 (executable)
@@ -40,6 +40,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 _supported_fmt qcow2
 _supported_proto file fuse
 _require_drivers null-co
+_require_devices virtio-scsi-pci
 
 if [ "$QEMU_DEFAULT_MACHINE" != "pc" ]; then
     _notrun "Requires a PC machine"
index 94aeb3f3b2000d0e24bbfdd2cf5d719961a3c68f..3e82c634cfef9ccaeb624d5e804897e6cee0bf9d 100644 (file)
@@ -720,7 +720,7 @@ class Timeout:
         signal.setitimer(signal.ITIMER_REAL, 0)
         return False
     def timeout(self, signum, frame):
-        raise Exception(self.errmsg)
+        raise TimeoutError(self.errmsg)
 
 def file_pattern(name):
     return "{0}-{1}".format(os.getpid(), name)
@@ -804,7 +804,7 @@ def remote_filename(path):
     elif imgproto == 'ssh':
         return "ssh://%s@127.0.0.1:22%s" % (os.environ.get('USER'), path)
     else:
-        raise Exception("Protocol %s not supported" % (imgproto))
+        raise ValueError("Protocol %s not supported" % (imgproto))
 
 class VM(qtest.QEMUQtestMachine):
     '''A QEMU VM'''
diff --git a/tests/qemu-iotests/tests/detect-zeroes-registered-buf b/tests/qemu-iotests/tests/detect-zeroes-registered-buf
new file mode 100755 (executable)
index 0000000..edb5f2c
--- /dev/null
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# group: rw auto quick
+#
+# Check that detect-zeroes=unmap works on writes with registered I/O buffers.
+# This is a regression test for
+# https://gitlab.com/qemu-project/qemu/-/issues/1404 where I/O requests failed
+# unexpectedly.
+#
+# Copyright Red Hat
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=stefanha@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1       # failure is the default!
+
+_cleanup()
+{
+       _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+cd ..
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto generic
+
+size=128M
+_make_test_img $size
+IMGSPEC="driver=$IMGFMT,file.filename=$TEST_IMG,discard=unmap,detect-zeroes=unmap"
+
+echo
+echo "== writing zero buffer to image =="
+QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO -c "write -r -P 0 0 4k" --image-opts "$IMGSPEC" | _filter_qemu_io
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/tests/detect-zeroes-registered-buf.out b/tests/qemu-iotests/tests/detect-zeroes-registered-buf.out
new file mode 100644 (file)
index 0000000..42c56fc
--- /dev/null
@@ -0,0 +1,7 @@
+QA output created by detect-zeroes-registered-buf
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+
+== writing zero buffer to image ==
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
index fc9c4b4ef411dc93e1abba0e58d2cbb0b122731d..dda55fad2840acf189b294952109493f27d57f6f 100755 (executable)
@@ -84,7 +84,7 @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase):
                 e['vm'] = 'SRC'
             for e in self.vm_b_events:
                 e['vm'] = 'DST'
-            events = (self.vm_a_events + self.vm_b_events)
+            events = self.vm_a_events + self.vm_b_events
             events = [(e['timestamp']['seconds'],
                        e['timestamp']['microseconds'],
                        e['vm'],
index 8691802950ca53b3653b0909c8b39438a4fd9b29..1cb08138ad1cd168d001d9a233608570fac3c363 100644 (file)
@@ -21,7 +21,7 @@
 #define SVE_MAX_VQ 16
 
 #define MACHINE     "-machine virt,gic-version=max -accel tcg "
-#define MACHINE_KVM "-machine virt,gic-version=max -accel kvm -accel tcg "
+#define MACHINE_KVM "-machine virt,gic-version=max -accel kvm "
 #define QUERY_HEAD  "{ 'execute': 'query-cpu-model-expansion', " \
                     "  'arguments': { 'type': 'full', "
 #define QUERY_TAIL  "}}"
@@ -607,31 +607,39 @@ int main(int argc, char **argv)
 {
     g_test_init(&argc, &argv, NULL);
 
-    qtest_add_data_func("/arm/query-cpu-model-expansion",
-                        NULL, test_query_cpu_model_expansion);
+    if (qtest_has_accel("tcg")) {
+        qtest_add_data_func("/arm/query-cpu-model-expansion",
+                            NULL, test_query_cpu_model_expansion);
+    }
+
+    if (!g_str_equal(qtest_get_arch(), "aarch64")) {
+        goto out;
+    }
 
     /*
      * For now we only run KVM specific tests with AArch64 QEMU in
      * order avoid attempting to run an AArch32 QEMU with KVM on
      * AArch64 hosts. That won't work and isn't easy to detect.
      */
-    if (g_str_equal(qtest_get_arch(), "aarch64") && qtest_has_accel("kvm")) {
+    if (qtest_has_accel("kvm")) {
         /*
          * This tests target the 'host' CPU type, so register it only if
          * KVM is available.
          */
         qtest_add_data_func("/arm/kvm/query-cpu-model-expansion",
                             NULL, test_query_cpu_model_expansion_kvm);
+
+        qtest_add_data_func("/arm/kvm/query-cpu-model-expansion/sve-off",
+                            NULL, sve_tests_sve_off_kvm);
     }
 
-    if (g_str_equal(qtest_get_arch(), "aarch64")) {
+    if (qtest_has_accel("tcg")) {
         qtest_add_data_func("/arm/max/query-cpu-model-expansion/sve-max-vq-8",
                             NULL, sve_tests_sve_max_vq_8);
         qtest_add_data_func("/arm/max/query-cpu-model-expansion/sve-off",
                             NULL, sve_tests_sve_off);
-        qtest_add_data_func("/arm/kvm/query-cpu-model-expansion/sve-off",
-                            NULL, sve_tests_sve_off_kvm);
     }
 
+out:
     return g_test_run();
 }
index d8c8cda58e633f45e4238821581ba4e2b663875a..d29a4e47afffd1c038acb548148b08227353c3f2 100644 (file)
@@ -1008,6 +1008,12 @@ static void test_acpi_q35_multif_bridge(void)
         .machine = MACHINE_Q35,
         .variant = ".multi-bridge",
     };
+
+    if (!qtest_has_device("pcie-root-port")) {
+        g_test_skip("Device pcie-root-port is not available");
+        goto out;
+    }
+
     test_vm_prepare("-S"
         " -device virtio-balloon,id=balloon0,addr=0x4.0x2"
         " -device pcie-root-port,id=rp0,multifunction=on,"
@@ -1043,6 +1049,7 @@ static void test_acpi_q35_multif_bridge(void)
     /* check that reboot/reset doesn't change any ACPI tables  */
     qtest_qmp_send(data.qts, "{'execute':'system_reset' }");
     process_acpi_tables(&data);
+out:
     free_test_data(&data);
 }
 
@@ -1396,6 +1403,11 @@ static void test_acpi_tcg_dimm_pxm(const char *machine)
 {
     test_data data;
 
+    if (!qtest_has_device("nvdimm")) {
+        g_test_skip("Device nvdimm is not available");
+        return;
+    }
+
     memset(&data, 0, sizeof(data));
     data.machine = machine;
     data.variant = ".dimmpxm";
@@ -1444,6 +1456,11 @@ static void test_acpi_virt_tcg_memhp(void)
         .scan_len = 256ULL * 1024 * 1024,
     };
 
+    if (!qtest_has_device("nvdimm")) {
+        g_test_skip("Device nvdimm is not available");
+        goto out;
+    }
+
     data.variant = ".memhp";
     test_acpi_one(" -machine nvdimm=on"
                   " -cpu cortex-a57"
@@ -1457,7 +1474,7 @@ static void test_acpi_virt_tcg_memhp(void)
                   " -device pc-dimm,id=dimm0,memdev=ram2,node=0"
                   " -device nvdimm,id=dimm1,memdev=nvm0,node=1",
                   &data);
-
+out:
     free_test_data(&data);
 
 }
@@ -1475,6 +1492,11 @@ static void test_acpi_microvm_tcg(void)
 {
     test_data data;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
     test_acpi_one(" -machine microvm,acpi=on,ioapic2=off,rtc=off",
                   &data);
@@ -1485,6 +1507,11 @@ static void test_acpi_microvm_usb_tcg(void)
 {
     test_data data;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
     data.variant = ".usb";
     test_acpi_one(" -machine microvm,acpi=on,ioapic2=off,usb=on,rtc=off",
@@ -1496,6 +1523,11 @@ static void test_acpi_microvm_rtc_tcg(void)
 {
     test_data data;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
     data.variant = ".rtc";
     test_acpi_one(" -machine microvm,acpi=on,ioapic2=off,rtc=on",
@@ -1507,6 +1539,11 @@ static void test_acpi_microvm_pcie_tcg(void)
 {
     test_data data;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
     data.variant = ".pcie";
     data.tcg_only = true; /* need constant host-phys-bits */
@@ -1519,6 +1556,11 @@ static void test_acpi_microvm_ioapic2_tcg(void)
 {
     test_data data;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
     data.variant = ".ioapic2";
     test_acpi_one(" -machine microvm,acpi=on,ioapic2=on,rtc=off",
@@ -1558,6 +1600,12 @@ static void test_acpi_virt_tcg_pxb(void)
         .ram_start = 0x40000000ULL,
         .scan_len = 128ULL * 1024 * 1024,
     };
+
+    if (!qtest_has_device("pcie-root-port")) {
+        g_test_skip("Device pcie-root-port is not available");
+        goto out;
+    }
+
     /*
      * While using -cdrom, the cdrom would auto plugged into pxb-pcie,
      * the reason is the bus of pxb-pcie is also root bus, it would lead
@@ -1576,7 +1624,7 @@ static void test_acpi_virt_tcg_pxb(void)
                   " -cpu cortex-a57"
                   " -device pxb-pcie,bus_nr=128",
                   &data);
-
+out:
     free_test_data(&data);
 }
 
@@ -1764,6 +1812,12 @@ static void test_acpi_microvm_acpi_erst(void)
     gchar *params;
     test_data data;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        g_free(tmp_path);
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
     data.variant = ".pcie";
     data.tcg_only = true; /* need constant host-phys-bits */
@@ -1824,6 +1878,11 @@ static void test_acpi_q35_viot(void)
         .variant = ".viot",
     };
 
+    if (!qtest_has_device("virtio-iommu")) {
+        g_test_skip("Device virtio-iommu is not available");
+        goto out;
+    }
+
     /*
      * To keep things interesting, two buses bypass the IOMMU.
      * VIOT should only describes the other two buses.
@@ -1834,6 +1893,7 @@ static void test_acpi_q35_viot(void)
                   "-device pxb-pcie,bus_nr=0x20,id=pcie.200,bus=pcie.0,bypass_iommu=on "
                   "-device pxb-pcie,bus_nr=0x30,id=pcie.300,bus=pcie.0",
                   &data);
+out:
     free_test_data(&data);
 }
 
@@ -1894,8 +1954,10 @@ static void test_acpi_virt_viot(void)
         .scan_len = 128ULL * 1024 * 1024,
     };
 
-    test_acpi_one("-cpu cortex-a57 "
-                  "-device virtio-iommu-pci", &data);
+    if (qtest_has_device("virtio-iommu")) {
+        test_acpi_one("-cpu cortex-a57 "
+                       "-device virtio-iommu-pci", &data);
+    }
     free_test_data(&data);
 }
 
@@ -2004,6 +2066,11 @@ static void test_acpi_microvm_oem_fields(void)
     test_data data;
     char *args;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
 
     args = test_acpi_create_args(&data,
index 5a6afa2b57fa4e9976d6d47470c27a0882ce040d..01cecd6e202b6fbe04fdab9aaf2638c207502bea 100644 (file)
@@ -64,15 +64,21 @@ static void process_device_remove(QTestState *qtest, const char *id)
 
 static void test_pci_unplug_request(void)
 {
+    QTestState *qtest;
     const char *arch = qtest_get_arch();
     const char *machine_addition = "";
 
+    if (!qtest_has_device("virtio-mouse-pci")) {
+        g_test_skip("Device virtio-mouse-pci not available");
+        return;
+    }
+
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         machine_addition = "-machine pc";
     }
 
-    QTestState *qtest = qtest_initf("%s -device virtio-mouse-pci,id=dev0",
-                                    machine_addition);
+    qtest = qtest_initf("%s -device virtio-mouse-pci,id=dev0",
+                        machine_addition);
 
     process_device_remove(qtest, "dev0");
 
@@ -81,11 +87,17 @@ static void test_pci_unplug_request(void)
 
 static void test_q35_pci_unplug_request(void)
 {
+    QTestState *qtest;
+
+    if (!qtest_has_device("virtio-mouse-pci")) {
+        g_test_skip("Device virtio-mouse-pci not available");
+        return;
+    }
 
-    QTestState *qtest = qtest_initf("-machine q35 "
-                                    "-device pcie-root-port,id=p1 "
-                                    "-device pcie-pci-bridge,bus=p1,id=b1 "
-                                    "-device virtio-mouse-pci,bus=b1,id=dev0");
+    qtest = qtest_initf("-machine q35 "
+                        "-device pcie-root-port,id=p1 "
+                        "-device pcie-pci-bridge,bus=p1,id=b1 "
+                        "-device virtio-mouse-pci,bus=b1,id=dev0");
 
     process_device_remove(qtest, "dev0");
 
@@ -94,14 +106,20 @@ static void test_q35_pci_unplug_request(void)
 
 static void test_pci_unplug_json_request(void)
 {
+    QTestState *qtest;
     const char *arch = qtest_get_arch();
     const char *machine_addition = "";
 
+    if (!qtest_has_device("virtio-mouse-pci")) {
+        g_test_skip("Device virtio-mouse-pci not available");
+        return;
+    }
+
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         machine_addition = "-machine pc";
     }
 
-    QTestState *qtest = qtest_initf(
+    qtest = qtest_initf(
         "%s -device \"{'driver': 'virtio-mouse-pci', 'id': 'dev0'}\"",
         machine_addition);
 
@@ -112,6 +130,7 @@ static void test_pci_unplug_json_request(void)
 
 static void test_q35_pci_unplug_json_request(void)
 {
+    QTestState *qtest;
     const char *port = "-device \"{'driver': 'pcie-root-port', "
                                   "'id': 'p1'}\"";
 
@@ -123,8 +142,12 @@ static void test_q35_pci_unplug_json_request(void)
                                     "'bus': 'b1', "
                                     "'id': 'dev0'}\"";
 
-    QTestState *qtest = qtest_initf("-machine q35 %s %s %s",
-                                    port, bridge, device);
+    if (!qtest_has_device("virtio-mouse-pci")) {
+        g_test_skip("Device virtio-mouse-pci not available");
+        return;
+    }
+
+    qtest = qtest_initf("-machine q35 %s %s %s", port, bridge, device);
 
     process_device_remove(qtest, "dev0");
 
index 9a750395a9d8d921675a2f652090b7e2c86a0742..8a6f3ac963da2046c1c8bd7a5f07562866f3464c 100644 (file)
@@ -16,6 +16,8 @@
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qlist.h"
 
+static const char *qvirtio_get_dev_type(void);
+
 static bool look_for_drive0(QTestState *qts, const char *command, const char *key)
 {
     QDict *response;
@@ -40,6 +42,19 @@ static bool look_for_drive0(QTestState *qts, const char *command, const char *ke
     return found;
 }
 
+/*
+ * This covers the possible absence of a device due to QEMU build
+ * options.
+ */
+static bool has_device_builtin(const char *dev)
+{
+    gchar *device = g_strdup_printf("%s-%s", dev, qvirtio_get_dev_type());
+    bool rc = qtest_has_device(device);
+
+    g_free(device);
+    return rc;
+}
+
 static bool has_drive(QTestState *qts)
 {
     return look_for_drive0(qts, "query-block", "device");
@@ -208,6 +223,11 @@ static void test_drive_del_device_del(void)
 {
     QTestState *qts;
 
+    if (!has_device_builtin("virtio-scsi")) {
+        g_test_skip("Device virtio-scsi is not available");
+        return;
+    }
+
     /* Start with a drive used by a device that unplugs instantaneously */
     qts = qtest_initf("-drive if=none,id=drive0,file=null-co://,"
                       "file.read-zeroes=on,format=raw"
@@ -232,6 +252,11 @@ static void test_cli_device_del(void)
     const char *arch = qtest_get_arch();
     const char *machine_addition = "";
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         machine_addition = "-machine pc";
     }
@@ -256,6 +281,11 @@ static void test_cli_device_del_q35(void)
 {
     QTestState *qts;
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     /*
      * -drive/-device and device_del.  Start with a drive used by a
      * device that unplugs after reset.
@@ -277,6 +307,11 @@ static void test_empty_device_del(void)
 {
     QTestState *qts;
 
+    if (!has_device_builtin("virtio-scsi")) {
+        g_test_skip("Device virtio-scsi is not available");
+        return;
+    }
+
     /* device_del with no drive plugged.  */
     qts = qtest_initf("-device virtio-scsi-%s -device scsi-cd,id=dev0",
                       qvirtio_get_dev_type());
@@ -291,6 +326,11 @@ static void test_device_add_and_del(void)
     const char *arch = qtest_get_arch();
     const char *machine_addition = "";
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         machine_addition = "-machine pc";
     }
@@ -330,6 +370,11 @@ static void test_device_add_and_del_q35(void)
 {
     QTestState *qts;
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     /*
      * -drive/device_add and device_del.  Start with a drive used by a
      * device that unplugs after reset.
@@ -352,6 +397,11 @@ static void test_drive_add_device_add_and_del(void)
     const char *arch = qtest_get_arch();
     const char *machine_addition = "";
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         machine_addition = "-machine pc";
     }
@@ -374,6 +424,11 @@ static void test_drive_add_device_add_and_del_q35(void)
 {
     QTestState *qts;
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     qts = qtest_init("-machine q35 -device pcie-root-port,id=p1 "
                      "-device pcie-pci-bridge,bus=p1,id=b1");
 
@@ -395,6 +450,11 @@ static void test_blockdev_add_device_add_and_del(void)
     const char *arch = qtest_get_arch();
     const char *machine_addition = "";
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         machine_addition = "-machine pc";
     }
@@ -417,6 +477,11 @@ static void test_blockdev_add_device_add_and_del_q35(void)
 {
     QTestState *qts;
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     qts = qtest_init("-machine q35 -device pcie-root-port,id=p1 "
                      "-device pcie-pci-bridge,bus=p1,id=b1");
 
index 392a7ae7edec44e3bd1a339b4286e1f7859f1d63..a9254b455d1b86379326907c0cdee112d61db6ad 100644 (file)
@@ -112,6 +112,10 @@ static void test_lsi_do_dma_empty_queue(void)
 
 int main(int argc, char **argv)
 {
+    if (!qtest_has_device("lsi53c895a")) {
+        return 0;
+    }
+
     g_test_init(&argc, &argv, NULL);
 
     qtest_add_func("fuzz/lsi53c895a/lsi_do_dma_empty_queue",
diff --git a/tests/qtest/fuzz/fork_fuzz.c b/tests/qtest/fuzz/fork_fuzz.c
deleted file mode 100644 (file)
index 6ffb2a7..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Fork-based fuzzing helpers
- *
- * Copyright Red Hat Inc., 2019
- *
- * Authors:
- *  Alexander Bulekov   <alxndr@bu.edu>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "fork_fuzz.h"
-
-
-void counter_shm_init(void)
-{
-    /* Copy what's in the counter region to a temporary buffer.. */
-    void *copy = malloc(&__FUZZ_COUNTERS_END - &__FUZZ_COUNTERS_START);
-    memcpy(copy,
-           &__FUZZ_COUNTERS_START,
-           &__FUZZ_COUNTERS_END - &__FUZZ_COUNTERS_START);
-
-    /* Map a shared region over the counter region */
-    if (mmap(&__FUZZ_COUNTERS_START,
-             &__FUZZ_COUNTERS_END - &__FUZZ_COUNTERS_START,
-             PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS,
-             0, 0) == MAP_FAILED) {
-        perror("Error: ");
-        exit(1);
-    }
-
-    /* Copy the original data back to the counter-region */
-    memcpy(&__FUZZ_COUNTERS_START, copy,
-           &__FUZZ_COUNTERS_END - &__FUZZ_COUNTERS_START);
-    free(copy);
-}
-
-
diff --git a/tests/qtest/fuzz/fork_fuzz.h b/tests/qtest/fuzz/fork_fuzz.h
deleted file mode 100644 (file)
index 9ecb8b5..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Fork-based fuzzing helpers
- *
- * Copyright Red Hat Inc., 2019
- *
- * Authors:
- *  Alexander Bulekov   <alxndr@bu.edu>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef FORK_FUZZ_H
-#define FORK_FUZZ_H
-
-extern uint8_t __FUZZ_COUNTERS_START;
-extern uint8_t __FUZZ_COUNTERS_END;
-
-void counter_shm_init(void);
-
-#endif
-
diff --git a/tests/qtest/fuzz/fork_fuzz.ld b/tests/qtest/fuzz/fork_fuzz.ld
deleted file mode 100644 (file)
index cfb88b7..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * We adjust linker script modification to place all of the stuff that needs to
- * persist across fuzzing runs into a contiguous section of memory. Then, it is
- * easy to re-map the counter-related memory as shared.
- */
-
-SECTIONS
-{
-  .data.fuzz_start : ALIGN(4K)
-  {
-      __FUZZ_COUNTERS_START = .;
-      __start___sancov_cntrs = .;
-      *(_*sancov_cntrs);
-      __stop___sancov_cntrs = .;
-
-      /* Lowest stack counter */
-      *(__sancov_lowest_stack);
-  }
-}
-INSERT AFTER .data;
-
-SECTIONS
-{
-  .data.fuzz_ordered :
-  {
-      /*
-       * Coverage counters. They're not necessary for fuzzing, but are useful
-       * for analyzing the fuzzing performance
-       */
-      __start___llvm_prf_cnts = .;
-      *(*llvm_prf_cnts);
-      __stop___llvm_prf_cnts = .;
-
-      /* Internal Libfuzzer TracePC object which contains the ValueProfileMap */
-      FuzzerTracePC*(.bss*);
-      /*
-       * In case the above line fails, explicitly specify the (mangled) name of
-       * the object we care about
-       */
-       *(.bss._ZN6fuzzer3TPCE);
-  }
-}
-INSERT AFTER .data.fuzz_start;
-
-SECTIONS
-{
-  .data.fuzz_end : ALIGN(4K)
-  {
-      __FUZZ_COUNTERS_END = .;
-  }
-}
-/*
- * Don't overwrite the SECTIONS in the default linker script. Instead insert the
- * above into the default script
- */
-INSERT AFTER .data.fuzz_ordered;
index eb7520544b803ade36df24c0dc72cf6cc0665352..3bedb81b32b2d6ab56e665e3fb6668e804da638d 100644 (file)
@@ -51,6 +51,12 @@ void flush_events(QTestState *s)
     }
 }
 
+void fuzz_reset(QTestState *s)
+{
+    qemu_system_reset(SHUTDOWN_CAUSE_GUEST_RESET);
+    main_loop_wait(true);
+}
+
 static QTestState *qtest_setup(void)
 {
     qtest_server_set_send_handler(&qtest_client_inproc_recv, &fuzz_qts);
index 327c1c5a55b2881e3fb6fc534ec15ef9bdde304d..21d1362d655ecae87646a7b7557b0861c546a588 100644 (file)
@@ -103,7 +103,7 @@ typedef struct FuzzTarget {
 } FuzzTarget;
 
 void flush_events(QTestState *);
-void reboot(QTestState *);
+void fuzz_reset(QTestState *);
 
 /* Use the QTest ASCII protocol or call address_space API directly?*/
 void fuzz_qtest_set_serialize(bool option);
index 7326f6840b4f7e3e4b307ca9c46056f830c87b06..c525d22951e315251cf18b643f397cad6dd8ab3c 100644 (file)
@@ -18,7 +18,6 @@
 #include "tests/qtest/libqtest.h"
 #include "tests/qtest/libqos/pci-pc.h"
 #include "fuzz.h"
-#include "fork_fuzz.h"
 #include "string.h"
 #include "exec/memory.h"
 #include "exec/ramblock.h"
@@ -29,6 +28,8 @@
 #include "generic_fuzz_configs.h"
 #include "hw/mem/sparse-mem.h"
 
+static void pci_enum(gpointer pcidev, gpointer bus);
+
 /*
  * SEPARATOR is used to separate "operations" in the fuzz input
  */
@@ -47,10 +48,10 @@ enum cmds {
     OP_CLOCK_STEP,
 };
 
-#define DEFAULT_TIMEOUT_US 100000
 #define USEC_IN_SEC 1000000000
 
 #define MAX_DMA_FILL_SIZE 0x10000
+#define MAX_TOTAL_DMA_SIZE 0x10000000
 
 #define PCI_HOST_BRIDGE_CFG 0xcf8
 #define PCI_HOST_BRIDGE_DATA 0xcfc
@@ -60,9 +61,8 @@ typedef struct {
     ram_addr_t size; /* The number of bytes until the end of the I/O region */
 } address_range;
 
-static useconds_t timeout = DEFAULT_TIMEOUT_US;
-
 static bool qtest_log_enabled;
+size_t dma_bytes_written;
 
 MemoryRegion *sparse_mem_mr;
 
@@ -196,6 +196,7 @@ void fuzz_dma_read_cb(size_t addr, size_t len, MemoryRegion *mr)
      */
     if (dma_patterns->len == 0
         || len == 0
+        || dma_bytes_written + len > MAX_TOTAL_DMA_SIZE
         || (mr != current_machine->ram && mr != sparse_mem_mr)) {
         return;
     }
@@ -268,6 +269,7 @@ void fuzz_dma_read_cb(size_t addr, size_t len, MemoryRegion *mr)
                 fflush(stderr);
             }
             qtest_memwrite(qts_global, addr, buf, l);
+            dma_bytes_written += l;
         }
         len -= l;
         buf += l;
@@ -589,30 +591,6 @@ static void op_disable_pci(QTestState *s, const unsigned char *data, size_t len)
     pci_disabled = true;
 }
 
-static void handle_timeout(int sig)
-{
-    if (qtest_log_enabled) {
-        fprintf(stderr, "[Timeout]\n");
-        fflush(stderr);
-    }
-
-    /*
-     * If there is a crash, libfuzzer/ASAN forks a child to run an
-     * "llvm-symbolizer" process for printing out a pretty stacktrace. It
-     * communicates with this child using a pipe.  If we timeout+Exit, while
-     * libfuzzer is still communicating with the llvm-symbolizer child, we will
-     * be left with an orphan llvm-symbolizer process. Sometimes, this appears
-     * to lead to a deadlock in the forkserver. Use waitpid to check if there
-     * are any waitable children. If so, exit out of the signal-handler, and
-     * let libfuzzer finish communicating with the child, and exit, on its own.
-     */
-    if (waitpid(-1, NULL, WNOHANG) == 0) {
-        return;
-    }
-
-    _Exit(0);
-}
-
 /*
  * Here, we interpret random bytes from the fuzzer, as a sequence of commands.
  * Some commands can be variable-width, so we use a separator, SEPARATOR, to
@@ -669,64 +647,33 @@ static void generic_fuzz(QTestState *s, const unsigned char *Data, size_t Size)
     size_t cmd_len;
     uint8_t op;
 
-    if (fork() == 0) {
-        struct sigaction sact;
-        struct itimerval timer;
-        sigset_t set;
-        /*
-         * Sometimes the fuzzer will find inputs that take quite a long time to
-         * process. Often times, these inputs do not result in new coverage.
-         * Even if these inputs might be interesting, they can slow down the
-         * fuzzer, overall. Set a timeout for each command to avoid hurting
-         * performance, too much
-         */
-        if (timeout) {
-
-            sigemptyset(&sact.sa_mask);
-            sact.sa_flags   = SA_NODEFER;
-            sact.sa_handler = handle_timeout;
-            sigaction(SIGALRM, &sact, NULL);
+    op_clear_dma_patterns(s, NULL, 0);
+    pci_disabled = false;
+    dma_bytes_written = 0;
 
-            sigemptyset(&set);
-            sigaddset(&set, SIGALRM);
-            pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-
-            memset(&timer, 0, sizeof(timer));
-            timer.it_value.tv_sec = timeout / USEC_IN_SEC;
-            timer.it_value.tv_usec = timeout % USEC_IN_SEC;
-        }
-
-        op_clear_dma_patterns(s, NULL, 0);
-        pci_disabled = false;
-
-        while (cmd && Size) {
-            /* Reset the timeout, each time we run a new command */
-            if (timeout) {
-                setitimer(ITIMER_REAL, &timer, NULL);
-            }
+    QPCIBus *pcibus = qpci_new_pc(s, NULL);
+    g_ptr_array_foreach(fuzzable_pci_devices, pci_enum, pcibus);
+    qpci_free_pc(pcibus);
 
-            /* Get the length until the next command or end of input */
-            nextcmd = memmem(cmd, Size, SEPARATOR, strlen(SEPARATOR));
-            cmd_len = nextcmd ? nextcmd - cmd : Size;
+    while (cmd && Size) {
+        /* Get the length until the next command or end of input */
+        nextcmd = memmem(cmd, Size, SEPARATOR, strlen(SEPARATOR));
+        cmd_len = nextcmd ? nextcmd - cmd : Size;
 
-            if (cmd_len > 0) {
-                /* Interpret the first byte of the command as an opcode */
-                op = *cmd % (sizeof(ops) / sizeof((ops)[0]));
-                ops[op](s, cmd + 1, cmd_len - 1);
+        if (cmd_len > 0) {
+            /* Interpret the first byte of the command as an opcode */
+            op = *cmd % (sizeof(ops) / sizeof((ops)[0]));
+            ops[op](s, cmd + 1, cmd_len - 1);
 
-                /* Run the main loop */
-                flush_events(s);
-            }
-            /* Advance to the next command */
-            cmd = nextcmd ? nextcmd + sizeof(SEPARATOR) - 1 : nextcmd;
-            Size = Size - (cmd_len + sizeof(SEPARATOR) - 1);
-            g_array_set_size(dma_regions, 0);
+            /* Run the main loop */
+            flush_events(s);
         }
-        _Exit(0);
-    } else {
-        flush_events(s);
-        wait(0);
+        /* Advance to the next command */
+        cmd = nextcmd ? nextcmd + sizeof(SEPARATOR) - 1 : nextcmd;
+        Size = Size - (cmd_len + sizeof(SEPARATOR) - 1);
+        g_array_set_size(dma_regions, 0);
     }
+    fuzz_reset(s);
 }
 
 static void usage(void)
@@ -738,8 +685,6 @@ static void usage(void)
     printf("Optionally: QEMU_AVOID_DOUBLE_FETCH= "
             "Try to avoid racy DMA double fetch bugs? %d by default\n",
             avoid_double_fetches);
-    printf("Optionally: QEMU_FUZZ_TIMEOUT= Specify a custom timeout (us). "
-            "0 to disable. %d by default\n", timeout);
     exit(0);
 }
 
@@ -825,7 +770,6 @@ static void generic_pre_fuzz(QTestState *s)
 {
     GHashTableIter iter;
     MemoryRegion *mr;
-    QPCIBus *pcibus;
     char **result;
     GString *name_pattern;
 
@@ -838,9 +782,6 @@ static void generic_pre_fuzz(QTestState *s)
     if (getenv("QEMU_AVOID_DOUBLE_FETCH")) {
         avoid_double_fetches = 1;
     }
-    if (getenv("QEMU_FUZZ_TIMEOUT")) {
-        timeout = g_ascii_strtoll(getenv("QEMU_FUZZ_TIMEOUT"), NULL, 0);
-    }
     qts_global = s;
 
     /*
@@ -883,12 +824,6 @@ static void generic_pre_fuzz(QTestState *s)
         printf("No fuzzable memory regions found...\n");
         exit(1);
     }
-
-    pcibus = qpci_new_pc(s, NULL);
-    g_ptr_array_foreach(fuzzable_pci_devices, pci_enum, pcibus);
-    qpci_free_pc(pcibus);
-
-    counter_shm_init();
 }
 
 /*
index b17fc725dfd6be08b79c08ef22b309ef9a5d302f..155fe018f80b4b1f8e323785b9562b64cab2fce5 100644 (file)
@@ -18,7 +18,6 @@
 #include "tests/qtest/libqos/pci-pc.h"
 #include "fuzz.h"
 #include "qos_fuzz.h"
-#include "fork_fuzz.h"
 
 
 #define I440FX_PCI_HOST_BRIDGE_CFG 0xcf8
@@ -89,6 +88,7 @@ static void i440fx_fuzz_qtest(QTestState *s,
                               size_t Size)
 {
     ioport_fuzz_qtest(s, Data, Size);
+    fuzz_reset(s);
 }
 
 static void pciconfig_fuzz_qos(QTestState *s, QPCIBus *bus,
@@ -145,17 +145,6 @@ static void i440fx_fuzz_qos(QTestState *s,
     pciconfig_fuzz_qos(s, bus, Data, Size);
 }
 
-static void i440fx_fuzz_qos_fork(QTestState *s,
-        const unsigned char *Data, size_t Size) {
-    if (fork() == 0) {
-        i440fx_fuzz_qos(s, Data, Size);
-        _Exit(0);
-    } else {
-        flush_events(s);
-        wait(NULL);
-    }
-}
-
 static const char *i440fx_qtest_argv = TARGET_NAME " -machine accel=qtest"
                                        " -m 0 -display none";
 static GString *i440fx_argv(FuzzTarget *t)
@@ -163,10 +152,6 @@ static GString *i440fx_argv(FuzzTarget *t)
     return g_string_new(i440fx_qtest_argv);
 }
 
-static void fork_init(void)
-{
-    counter_shm_init();
-}
 
 static void register_pci_fuzz_targets(void)
 {
@@ -178,16 +163,6 @@ static void register_pci_fuzz_targets(void)
                 .get_init_cmdline = i440fx_argv,
                 .fuzz = i440fx_fuzz_qtest});
 
-    /* Uses libqos and forks to prevent state leakage */
-    fuzz_add_qos_target(&(FuzzTarget){
-                .name = "i440fx-qos-fork-fuzz",
-                .description = "Fuzz the i440fx using raw qtest commands and "
-                               "rebooting after each run",
-                .pre_vm_init = &fork_init,
-                .fuzz = i440fx_fuzz_qos_fork,},
-                "i440FX-pcihost",
-                &(QOSGraphTestOptions){}
-                );
 
     /*
      * Uses libqos. Doesn't do anything to reset state. Note that if we were to
index 189901d4a26ff19eceb4082161faabf084cf413f..4d10b47b8f9092dbeb73b94e016e9338820c9ba1 100644 (file)
@@ -2,7 +2,7 @@ if not get_option('fuzzing')
   subdir_done()
 endif
 
-specific_fuzz_ss.add(files('fuzz.c', 'fork_fuzz.c', 'qos_fuzz.c',
+specific_fuzz_ss.add(files('fuzz.c', 'qos_fuzz.c',
                            'qtest_wrappers.c'), qos)
 
 # Targets
@@ -12,7 +12,7 @@ specific_fuzz_ss.add(when: 'CONFIG_VIRTIO_SCSI', if_true: files('virtio_scsi_fuz
 specific_fuzz_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio_blk_fuzz.c'))
 specific_fuzz_ss.add(files('generic_fuzz.c'))
 
-fork_fuzz = declare_dependency(
+fuzz_ld = declare_dependency(
   link_args: fuzz_exe_ldflags +
              ['-Wl,-wrap,qtest_inb',
               '-Wl,-wrap,qtest_inw',
@@ -35,4 +35,4 @@ fork_fuzz = declare_dependency(
               '-Wl,-wrap,qtest_memset']
 )
 
-specific_fuzz_ss.add(fork_fuzz)
+specific_fuzz_ss.add(fuzz_ld)
index a9fb9ecf6c1ada5c87a15c7cbf6f2fc6526d7bc9..651fd4f04350695f5ff708d10aaadd8966b780f4 100644 (file)
@@ -19,7 +19,6 @@
 #include "standard-headers/linux/virtio_pci.h"
 #include "standard-headers/linux/virtio_blk.h"
 #include "fuzz.h"
-#include "fork_fuzz.h"
 #include "qos_fuzz.h"
 
 #define TEST_IMAGE_SIZE         (64 * 1024 * 1024)
@@ -128,48 +127,24 @@ static void virtio_blk_fuzz(QTestState *s, QVirtioBlkQueues* queues,
     }
 }
 
-static void virtio_blk_fork_fuzz(QTestState *s,
-        const unsigned char *Data, size_t Size)
-{
-    QVirtioBlk *blk = fuzz_qos_obj;
-    static QVirtioBlkQueues *queues;
-    if (!queues) {
-        queues = qvirtio_blk_init(blk->vdev, 0);
-    }
-    if (fork() == 0) {
-        virtio_blk_fuzz(s, queues, Data, Size);
-        flush_events(s);
-        _Exit(0);
-    } else {
-        flush_events(s);
-        wait(NULL);
-    }
-}
-
 static void virtio_blk_with_flag_fuzz(QTestState *s,
         const unsigned char *Data, size_t Size)
 {
     QVirtioBlk *blk = fuzz_qos_obj;
     static QVirtioBlkQueues *queues;
 
-    if (fork() == 0) {
-        if (Size >= sizeof(uint64_t)) {
-            queues = qvirtio_blk_init(blk->vdev, *(uint64_t *)Data);
-            virtio_blk_fuzz(s, queues,
-                             Data + sizeof(uint64_t), Size - sizeof(uint64_t));
-            flush_events(s);
-        }
-        _Exit(0);
-    } else {
+    if (Size >= sizeof(uint64_t)) {
+        queues = qvirtio_blk_init(blk->vdev, *(uint64_t *)Data);
+        virtio_blk_fuzz(s, queues,
+                Data + sizeof(uint64_t), Size - sizeof(uint64_t));
         flush_events(s);
-        wait(NULL);
     }
+    fuzz_reset(s);
 }
 
 static void virtio_blk_pre_fuzz(QTestState *s)
 {
     qos_init_path(s);
-    counter_shm_init();
 }
 
 static void drive_destroy(void *path)
@@ -208,22 +183,10 @@ static void *virtio_blk_test_setup(GString *cmd_line, void *arg)
 
 static void register_virtio_blk_fuzz_targets(void)
 {
-    fuzz_add_qos_target(&(FuzzTarget){
-                .name = "virtio-blk-fuzz",
-                .description = "Fuzz the virtio-blk virtual queues, forking "
-                                "for each fuzz run",
-                .pre_vm_init = &counter_shm_init,
-                .pre_fuzz = &virtio_blk_pre_fuzz,
-                .fuzz = virtio_blk_fork_fuzz,},
-                "virtio-blk",
-                &(QOSGraphTestOptions){.before = virtio_blk_test_setup}
-                );
-
     fuzz_add_qos_target(&(FuzzTarget){
                 .name = "virtio-blk-flags-fuzz",
-                .description = "Fuzz the virtio-blk virtual queues, forking "
-                "for each fuzz run (also fuzzes the virtio flags)",
-                .pre_vm_init = &counter_shm_init,
+                .description = "Fuzz the virtio-blk virtual queues. "
+                "Also fuzzes the virtio flags)",
                 .pre_fuzz = &virtio_blk_pre_fuzz,
                 .fuzz = virtio_blk_with_flag_fuzz,},
                 "virtio-blk",
index c2c15f07f06203896fdb742cab733c9f3cd3c6ce..e239875e3b42917c6d8ee664812f7e1d6f07b4f9 100644 (file)
@@ -16,7 +16,6 @@
 #include "tests/qtest/libqtest.h"
 #include "tests/qtest/libqos/virtio-net.h"
 #include "fuzz.h"
-#include "fork_fuzz.h"
 #include "qos_fuzz.h"
 
 
@@ -115,36 +114,18 @@ static void virtio_net_fuzz_multi(QTestState *s,
     }
 }
 
-static void virtio_net_fork_fuzz(QTestState *s,
-        const unsigned char *Data, size_t Size)
-{
-    if (fork() == 0) {
-        virtio_net_fuzz_multi(s, Data, Size, false);
-        flush_events(s);
-        _Exit(0);
-    } else {
-        flush_events(s);
-        wait(NULL);
-    }
-}
 
-static void virtio_net_fork_fuzz_check_used(QTestState *s,
+static void virtio_net_fuzz_check_used(QTestState *s,
         const unsigned char *Data, size_t Size)
 {
-    if (fork() == 0) {
-        virtio_net_fuzz_multi(s, Data, Size, true);
-        flush_events(s);
-        _Exit(0);
-    } else {
-        flush_events(s);
-        wait(NULL);
-    }
+    virtio_net_fuzz_multi(s, Data, Size, true);
+    flush_events(s);
+    fuzz_reset(s);
 }
 
 static void virtio_net_pre_fuzz(QTestState *s)
 {
     qos_init_path(s);
-    counter_shm_init();
 }
 
 static void *virtio_net_test_setup_socket(GString *cmd_line, void *arg)
@@ -158,23 +139,8 @@ static void *virtio_net_test_setup_socket(GString *cmd_line, void *arg)
     return arg;
 }
 
-static void *virtio_net_test_setup_user(GString *cmd_line, void *arg)
-{
-    g_string_append_printf(cmd_line, " -netdev user,id=hs0 ");
-    return arg;
-}
-
 static void register_virtio_net_fuzz_targets(void)
 {
-    fuzz_add_qos_target(&(FuzzTarget){
-            .name = "virtio-net-socket",
-            .description = "Fuzz the virtio-net virtual queues. Fuzz incoming "
-            "traffic using the socket backend",
-            .pre_fuzz = &virtio_net_pre_fuzz,
-            .fuzz = virtio_net_fork_fuzz,},
-            "virtio-net",
-            &(QOSGraphTestOptions){.before = virtio_net_test_setup_socket}
-            );
 
     fuzz_add_qos_target(&(FuzzTarget){
             .name = "virtio-net-socket-check-used",
@@ -182,20 +148,10 @@ static void register_virtio_net_fuzz_targets(void)
             "descriptors to be used. Timeout may indicate improperly handled "
             "input",
             .pre_fuzz = &virtio_net_pre_fuzz,
-            .fuzz = virtio_net_fork_fuzz_check_used,},
+            .fuzz = virtio_net_fuzz_check_used,},
             "virtio-net",
             &(QOSGraphTestOptions){.before = virtio_net_test_setup_socket}
             );
-    fuzz_add_qos_target(&(FuzzTarget){
-            .name = "virtio-net-slirp",
-            .description = "Fuzz the virtio-net virtual queues with the slirp "
-            " backend. Warning: May result in network traffic emitted from the "
-            " process. Run in an isolated network environment.",
-            .pre_fuzz = &virtio_net_pre_fuzz,
-            .fuzz = virtio_net_fork_fuzz,},
-            "virtio-net",
-            &(QOSGraphTestOptions){.before = virtio_net_test_setup_user}
-            );
 }
 
 fuzz_target_init(register_virtio_net_fuzz_targets);
index b3220ef6cb201cdda558ac6c41557f160eceba21..b6268efd59191c4e1ea28c6aaae906bdbaa28a8a 100644 (file)
@@ -20,7 +20,6 @@
 #include "standard-headers/linux/virtio_pci.h"
 #include "standard-headers/linux/virtio_scsi.h"
 #include "fuzz.h"
-#include "fork_fuzz.h"
 #include "qos_fuzz.h"
 
 #define PCI_SLOT                0x02
@@ -132,48 +131,24 @@ static void virtio_scsi_fuzz(QTestState *s, QVirtioSCSIQueues* queues,
     }
 }
 
-static void virtio_scsi_fork_fuzz(QTestState *s,
-        const unsigned char *Data, size_t Size)
-{
-    QVirtioSCSI *scsi = fuzz_qos_obj;
-    static QVirtioSCSIQueues *queues;
-    if (!queues) {
-        queues = qvirtio_scsi_init(scsi->vdev, 0);
-    }
-    if (fork() == 0) {
-        virtio_scsi_fuzz(s, queues, Data, Size);
-        flush_events(s);
-        _Exit(0);
-    } else {
-        flush_events(s);
-        wait(NULL);
-    }
-}
-
 static void virtio_scsi_with_flag_fuzz(QTestState *s,
         const unsigned char *Data, size_t Size)
 {
     QVirtioSCSI *scsi = fuzz_qos_obj;
     static QVirtioSCSIQueues *queues;
 
-    if (fork() == 0) {
-        if (Size >= sizeof(uint64_t)) {
-            queues = qvirtio_scsi_init(scsi->vdev, *(uint64_t *)Data);
-            virtio_scsi_fuzz(s, queues,
-                             Data + sizeof(uint64_t), Size - sizeof(uint64_t));
-            flush_events(s);
-        }
-        _Exit(0);
-    } else {
+    if (Size >= sizeof(uint64_t)) {
+        queues = qvirtio_scsi_init(scsi->vdev, *(uint64_t *)Data);
+        virtio_scsi_fuzz(s, queues,
+                Data + sizeof(uint64_t), Size - sizeof(uint64_t));
         flush_events(s);
-        wait(NULL);
     }
+    fuzz_reset(s);
 }
 
 static void virtio_scsi_pre_fuzz(QTestState *s)
 {
     qos_init_path(s);
-    counter_shm_init();
 }
 
 static void *virtio_scsi_test_setup(GString *cmd_line, void *arg)
@@ -189,22 +164,10 @@ static void *virtio_scsi_test_setup(GString *cmd_line, void *arg)
 
 static void register_virtio_scsi_fuzz_targets(void)
 {
-    fuzz_add_qos_target(&(FuzzTarget){
-                .name = "virtio-scsi-fuzz",
-                .description = "Fuzz the virtio-scsi virtual queues, forking "
-                                "for each fuzz run",
-                .pre_vm_init = &counter_shm_init,
-                .pre_fuzz = &virtio_scsi_pre_fuzz,
-                .fuzz = virtio_scsi_fork_fuzz,},
-                "virtio-scsi",
-                &(QOSGraphTestOptions){.before = virtio_scsi_test_setup}
-                );
-
     fuzz_add_qos_target(&(FuzzTarget){
                 .name = "virtio-scsi-flags-fuzz",
-                .description = "Fuzz the virtio-scsi virtual queues, forking "
-                "for each fuzz run (also fuzzes the virtio flags)",
-                .pre_vm_init = &counter_shm_init,
+                .description = "Fuzz the virtio-scsi virtual queues. "
+                "Also fuzzes the virtio flags",
                 .pre_fuzz = &virtio_scsi_pre_fuzz,
                 .fuzz = virtio_scsi_with_flag_fuzz,},
                 "virtio-scsi",
index 4a7628077b23d2815b49ecf915ba00cd58e10bb0..5aa258a2b36a9bb5aae852ca8736cd9aff881c29 100644 (file)
@@ -1090,30 +1090,42 @@ int main(int argc, char **argv)
         qtest_add_func("hd-geo/override/ide", test_override_ide);
         if (qtest_has_device("lsi53c895a")) {
             qtest_add_func("hd-geo/override/scsi", test_override_scsi);
-            qtest_add_func("hd-geo/override/scsi_2_controllers",
-                           test_override_scsi_2_controllers);
+            if (qtest_has_device("virtio-scsi-pci")) {
+                qtest_add_func("hd-geo/override/scsi_2_controllers",
+                               test_override_scsi_2_controllers);
+            }
         }
-        qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk);
         qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs);
-        qtest_add_func("hd-geo/override/scsi_hot_unplug",
-                       test_override_scsi_hot_unplug);
-        qtest_add_func("hd-geo/override/virtio_hot_unplug",
-                       test_override_virtio_hot_unplug);
+        if (qtest_has_device("virtio-scsi-pci")) {
+            qtest_add_func("hd-geo/override/scsi_hot_unplug",
+                           test_override_scsi_hot_unplug);
+        }
+        if (qtest_has_device("virtio-blk-pci")) {
+            qtest_add_func("hd-geo/override/virtio_hot_unplug",
+                           test_override_virtio_hot_unplug);
+            qtest_add_func("hd-geo/override/virtio_blk",
+                           test_override_virtio_blk);
+        }
 
         if (qtest_has_machine("q35")) {
             qtest_add_func("hd-geo/override/sata", test_override_sata);
-            qtest_add_func("hd-geo/override/virtio_blk_q35",
-                           test_override_virtio_blk_q35);
             qtest_add_func("hd-geo/override/zero_chs_q35",
                            test_override_zero_chs_q35);
             if (qtest_has_device("lsi53c895a")) {
                 qtest_add_func("hd-geo/override/scsi_q35",
                                test_override_scsi_q35);
             }
-            qtest_add_func("hd-geo/override/scsi_hot_unplug_q35",
-                           test_override_scsi_hot_unplug_q35);
-            qtest_add_func("hd-geo/override/virtio_hot_unplug_q35",
-                           test_override_virtio_hot_unplug_q35);
+            if (qtest_has_device("virtio-scsi-pci")) {
+                qtest_add_func("hd-geo/override/scsi_hot_unplug_q35",
+                               test_override_scsi_hot_unplug_q35);
+            }
+            if (qtest_has_device("virtio-blk-pci")) {
+                qtest_add_func("hd-geo/override/virtio_hot_unplug_q35",
+                               test_override_virtio_hot_unplug_q35);
+                qtest_add_func("hd-geo/override/virtio_blk_q35",
+                               test_override_virtio_blk_q35);
+            }
+
         }
     } else {
         g_test_message("QTEST_QEMU_IMG not set or qemu-img missing; "
index d658222a191d358aef7a2fa8cb59c7418bd8d13f..2bfd46053156e28e952f477879a9d9238abcadd9 100644 (file)
@@ -158,6 +158,7 @@ bool qtest_probe_child(QTestState *s)
         CloseHandle((HANDLE)pid);
 #endif
         s->qemu_pid = -1;
+        qtest_remove_abrt_handler(s);
     }
     return false;
 }
@@ -169,6 +170,8 @@ void qtest_set_expected_status(QTestState *s, int status)
 
 static void qtest_check_status(QTestState *s)
 {
+    assert(s->qemu_pid == -1);
+
     /*
      * Check whether qemu exited with expected exit status; anything else is
      * fishy and should be logged with as much detail as possible.
@@ -202,36 +205,40 @@ static void qtest_check_status(QTestState *s)
 
 void qtest_wait_qemu(QTestState *s)
 {
+    if (s->qemu_pid != -1) {
 #ifndef _WIN32
-    pid_t pid;
-    uint64_t end;
+        pid_t pid;
+        uint64_t end;
 
-    /* poll for a while until sending SIGKILL */
-    end = g_get_monotonic_time() + WAITPID_TIMEOUT * G_TIME_SPAN_SECOND;
+        /* poll for a while until sending SIGKILL */
+        end = g_get_monotonic_time() + WAITPID_TIMEOUT * G_TIME_SPAN_SECOND;
 
-    do {
-        pid = waitpid(s->qemu_pid, &s->wstatus, WNOHANG);
-        if (pid != 0) {
-            break;
-        }
-        g_usleep(100 * 1000);
-    } while (g_get_monotonic_time() < end);
+        do {
+            pid = waitpid(s->qemu_pid, &s->wstatus, WNOHANG);
+            if (pid != 0) {
+                break;
+            }
+            g_usleep(100 * 1000);
+        } while (g_get_monotonic_time() < end);
 
-    if (pid == 0) {
-        kill(s->qemu_pid, SIGKILL);
-        pid = RETRY_ON_EINTR(waitpid(s->qemu_pid, &s->wstatus, 0));
-    }
+        if (pid == 0) {
+            kill(s->qemu_pid, SIGKILL);
+            pid = RETRY_ON_EINTR(waitpid(s->qemu_pid, &s->wstatus, 0));
+        }
 
-    assert(pid == s->qemu_pid);
+        assert(pid == s->qemu_pid);
 #else
-    DWORD ret;
+        DWORD ret;
 
-    ret = WaitForSingleObject((HANDLE)s->qemu_pid, INFINITE);
-    assert(ret == WAIT_OBJECT_0);
-    GetExitCodeProcess((HANDLE)s->qemu_pid, &s->exit_code);
-    CloseHandle((HANDLE)s->qemu_pid);
+        ret = WaitForSingleObject((HANDLE)s->qemu_pid, INFINITE);
+        assert(ret == WAIT_OBJECT_0);
+        GetExitCodeProcess((HANDLE)s->qemu_pid, &s->exit_code);
+        CloseHandle((HANDLE)s->qemu_pid);
 #endif
 
+        s->qemu_pid = -1;
+        qtest_remove_abrt_handler(s);
+    }
     qtest_check_status(s);
 }
 
@@ -245,7 +252,6 @@ void qtest_kill_qemu(QTestState *s)
         TerminateProcess((HANDLE)s->qemu_pid, s->expected_status);
 #endif
         qtest_wait_qemu(s);
-        s->qemu_pid = -1;
         return;
     }
 
@@ -307,6 +313,11 @@ void qtest_add_abrt_handler(GHookFunc fn, const void *data)
 void qtest_remove_abrt_handler(void *data)
 {
     GHook *hook = g_hook_find_data(&abrt_hooks, TRUE, data);
+
+    if (!hook) {
+        return;
+    }
+
     g_hook_destroy_link(&abrt_hooks, hook);
 
     /* Uninstall SIGABRT handler on last instance */
@@ -360,60 +371,25 @@ static pid_t qtest_create_process(char *cmd)
 }
 #endif /* _WIN32 */
 
-QTestState *qtest_init_without_qmp_handshake(const char *extra_args)
+static QTestState *G_GNUC_PRINTF(1, 2) qtest_spawn_qemu(const char *fmt, ...)
 {
-    QTestState *s;
-    int sock, qmpsock, i;
-    gchar *socket_path;
-    gchar *qmp_socket_path;
-    gchar *command;
-    const char *qemu_binary = qtest_qemu_binary();
+    va_list ap;
+    QTestState *s = g_new0(QTestState, 1);
     const char *trace = g_getenv("QTEST_TRACE");
     g_autofree char *tracearg = trace ?
         g_strdup_printf("-trace %s ", trace) : g_strdup("");
+    g_autoptr(GString) command = g_string_new("");
 
-    s = g_new(QTestState, 1);
-
-    socket_path = g_strdup_printf("%s/qtest-%d.sock",
-                                  g_get_tmp_dir(), getpid());
-    qmp_socket_path = g_strdup_printf("%s/qtest-%d.qmp",
-                                      g_get_tmp_dir(), getpid());
-
-    /* It's possible that if an earlier test run crashed it might
-     * have left a stale unix socket lying around. Delete any
-     * stale old socket to avoid spurious test failures with
-     * tests/libqtest.c:70:init_socket: assertion failed (ret != -1): (-1 != -1)
-     */
-    unlink(socket_path);
-    unlink(qmp_socket_path);
-
-    socket_init();
-    sock = init_socket(socket_path);
-    qmpsock = init_socket(qmp_socket_path);
-
-    qtest_client_set_rx_handler(s, qtest_client_socket_recv_line);
-    qtest_client_set_tx_handler(s, qtest_client_socket_send);
+    va_start(ap, fmt);
+    g_string_append_printf(command, CMD_EXEC "%s %s",
+                           qtest_qemu_binary(), tracearg);
+    g_string_append_vprintf(command, fmt, ap);
+    va_end(ap);
 
     qtest_add_abrt_handler(kill_qemu_hook_func, s);
 
-    command = g_strdup_printf(CMD_EXEC "%s %s"
-                              "-qtest unix:%s "
-                              "-qtest-log %s "
-                              "-chardev socket,path=%s,id=char0 "
-                              "-mon chardev=char0,mode=control "
-                              "-display none "
-                              "%s"
-                              " -accel qtest",
-                              qemu_binary, tracearg, socket_path,
-                              getenv("QTEST_LOG") ? DEV_STDERR : DEV_NULL,
-                              qmp_socket_path,
-                              extra_args ?: "");
-
-    g_test_message("starting QEMU: %s", command);
-
-    s->pending_events = NULL;
-    s->wstatus = 0;
-    s->expected_status = 0;
+    g_test_message("starting QEMU: %s", command->str);
+
 #ifndef _WIN32
     s->qemu_pid = fork();
     if (s->qemu_pid == 0) {
@@ -434,14 +410,56 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args)
         if (!g_setenv("QEMU_AUDIO_DRV", "none", true)) {
             exit(1);
         }
-        execlp("/bin/sh", "sh", "-c", command, NULL);
+        execlp("/bin/sh", "sh", "-c", command->str, NULL);
         exit(1);
     }
 #else
-    s->qemu_pid = qtest_create_process(command);
+    s->qemu_pid = qtest_create_process(command->str);
 #endif /* _WIN32 */
 
-    g_free(command);
+    return s;
+}
+
+QTestState *qtest_init_without_qmp_handshake(const char *extra_args)
+{
+    QTestState *s;
+    int sock, qmpsock, i;
+    gchar *socket_path;
+    gchar *qmp_socket_path;
+
+    socket_path = g_strdup_printf("%s/qtest-%d.sock",
+                                  g_get_tmp_dir(), getpid());
+    qmp_socket_path = g_strdup_printf("%s/qtest-%d.qmp",
+                                      g_get_tmp_dir(), getpid());
+
+    /*
+     * It's possible that if an earlier test run crashed it might
+     * have left a stale unix socket lying around. Delete any
+     * stale old socket to avoid spurious test failures with
+     * tests/libqtest.c:70:init_socket: assertion failed (ret != -1): (-1 != -1)
+     */
+    unlink(socket_path);
+    unlink(qmp_socket_path);
+
+    socket_init();
+    sock = init_socket(socket_path);
+    qmpsock = init_socket(qmp_socket_path);
+
+    s = qtest_spawn_qemu("-qtest unix:%s "
+                         "-qtest-log %s "
+                         "-chardev socket,path=%s,id=char0 "
+                         "-mon chardev=char0,mode=control "
+                         "-display none "
+                         "%s"
+                         " -accel qtest",
+                         socket_path,
+                         getenv("QTEST_LOG") ? DEV_STDERR : DEV_NULL,
+                         qmp_socket_path,
+                         extra_args ?: "");
+
+    qtest_client_set_rx_handler(s, qtest_client_socket_recv_line);
+    qtest_client_set_tx_handler(s, qtest_client_socket_send);
+
     s->fd = socket_accept(sock);
     if (s->fd >= 0) {
         s->qmp_fd = socket_accept(qmpsock);
index e97616d327c0337c2f39a6b73e41414c58b1284a..29a4efb4c24f282a0fa0ae78f767cd04732ec380 100644 (file)
@@ -73,11 +73,14 @@ qtests_i386 = \
   (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) +                 \
   (config_host.has_key('CONFIG_POSIX') and                                                  \
    config_all_devices.has_key('CONFIG_ACPI_ERST') ? ['erst-test'] : []) +                   \
-  (config_all_devices.has_key('CONFIG_VIRTIO_NET') and                                      \
+  (config_all_devices.has_key('CONFIG_PCIE_PORT') and                                       \
+   config_all_devices.has_key('CONFIG_VIRTIO_NET') and                                      \
    config_all_devices.has_key('CONFIG_Q35') and                                             \
    config_all_devices.has_key('CONFIG_VIRTIO_PCI') and                                      \
    slirp.found() ? ['virtio-net-failover'] : []) +                                          \
-  (unpack_edk2_blobs ? ['bios-tables-test'] : []) +                                         \
+  (unpack_edk2_blobs and                                                                    \
+   config_all_devices.has_key('CONFIG_HPET') and                                            \
+   config_all_devices.has_key('CONFIG_PARALLEL') ? ['bios-tables-test'] : []) +             \
   qtests_pci +                                                                              \
   qtests_cxl +                                                                              \
   ['fdc-test',
@@ -196,17 +199,17 @@ qtests_arm = \
   (config_all_devices.has_key('CONFIG_PFLASH_CFI02') ? ['pflash-cfi02-test'] : []) +         \
   (config_all_devices.has_key('CONFIG_ASPEED_SOC') ? qtests_aspeed : []) + \
   (config_all_devices.has_key('CONFIG_NPCM7XX') ? qtests_npcm7xx : []) + \
+  (config_all_devices.has_key('CONFIG_GENERIC_LOADER') ? ['hexloader-test'] : []) + \
   ['arm-cpu-features',
    'microbit-test',
    'test-arm-mptimer',
-   'boot-serial-test',
-   'hexloader-test']
+   'boot-serial-test']
 
 # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional
 qtests_aarch64 = \
   (cpu != 'arm' and unpack_edk2_blobs ? ['bios-tables-test'] : []) +                            \
-  (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) +        \
-  (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) +  \
+  (config_all.has_key('CONFIG_TCG') and config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ?            \
+    ['tpm-tis-device-test', 'tpm-tis-device-swtpm-test'] : []) +                                         \
   (config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 'fuzz-xlnx-dp-test'] : []) + \
   (config_all_devices.has_key('CONFIG_RASPI') ? ['bcm2835-dma-test'] : []) +  \
   ['arm-cpu-features',
@@ -254,10 +257,14 @@ qos_test_ss.add(
   'virtio-net-test.c',
   'virtio-rng-test.c',
   'virtio-scsi-test.c',
-  'virtio-serial-test.c',
   'virtio-iommu-test.c',
   'vmxnet3-test.c',
 )
+
+if config_all_devices.has_key('CONFIG_VIRTIO_SERIAL')
+  qos_test_ss.add(files('virtio-serial-test.c'))
+endif
+
 if config_host.has_key('CONFIG_POSIX')
   qos_test_ss.add(files('e1000e-test.c'))
 endif
index 1d98dca821f1f603de252156c8db19b13c3454e9..270e424beeb7355e3962ee065733b0398bedfbc9 100644 (file)
 #include <glib/gstdio.h>
 #include "../unit/socket-helpers.h"
 #include "libqtest.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/sockets.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qapi-visit-sockets.h"
 
 #define CONNECTION_TIMEOUT    60
 
@@ -142,6 +146,101 @@ static void test_stream_inet_ipv4(void)
     qtest_quit(qts0);
 }
 
+static void wait_stream_connected(QTestState *qts, const char *id,
+                                  SocketAddress **addr)
+{
+    QDict *resp, *data;
+    QString *qstr;
+    QObject *obj;
+    Visitor *v = NULL;
+
+    resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_CONNECTED");
+    g_assert_nonnull(resp);
+    data = qdict_get_qdict(resp, "data");
+    g_assert_nonnull(data);
+
+    qstr = qobject_to(QString, qdict_get(data, "netdev-id"));
+    g_assert_nonnull(data);
+
+    g_assert(!strcmp(qstring_get_str(qstr), id));
+
+    obj = qdict_get(data, "addr");
+
+    v = qobject_input_visitor_new(obj);
+    visit_type_SocketAddress(v, NULL, addr, NULL);
+    visit_free(v);
+    qobject_unref(resp);
+}
+
+static void wait_stream_disconnected(QTestState *qts, const char *id)
+{
+    QDict *resp, *data;
+    QString *qstr;
+
+    resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_DISCONNECTED");
+    g_assert_nonnull(resp);
+    data = qdict_get_qdict(resp, "data");
+    g_assert_nonnull(data);
+
+    qstr = qobject_to(QString, qdict_get(data, "netdev-id"));
+    g_assert_nonnull(data);
+
+    g_assert(!strcmp(qstring_get_str(qstr), id));
+    qobject_unref(resp);
+}
+
+static void test_stream_inet_reconnect(void)
+{
+    QTestState *qts0, *qts1;
+    int port;
+    SocketAddress *addr;
+
+    port = inet_get_free_port(false);
+    qts0 = qtest_initf("-nodefaults -M none "
+                       "-netdev stream,id=st0,server=true,addr.type=inet,"
+                       "addr.ipv4=on,addr.ipv6=off,"
+                       "addr.host=127.0.0.1,addr.port=%d", port);
+
+    EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0);
+
+    qts1 = qtest_initf("-nodefaults -M none "
+                       "-netdev stream,server=false,id=st0,addr.type=inet,"
+                       "addr.ipv4=on,addr.ipv6=off,reconnect=1,"
+                       "addr.host=127.0.0.1,addr.port=%d", port);
+
+    wait_stream_connected(qts0, "st0", &addr);
+    g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET);
+    g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1");
+    qapi_free_SocketAddress(addr);
+
+    /* kill server */
+    qtest_quit(qts0);
+
+    /* check client has been disconnected */
+    wait_stream_disconnected(qts1, "st0");
+
+    /* restart server */
+    qts0 = qtest_initf("-nodefaults -M none "
+                       "-netdev stream,id=st0,server=true,addr.type=inet,"
+                       "addr.ipv4=on,addr.ipv6=off,"
+                       "addr.host=127.0.0.1,addr.port=%d", port);
+
+    /* wait connection events*/
+    wait_stream_connected(qts0, "st0", &addr);
+    g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET);
+    g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1");
+    qapi_free_SocketAddress(addr);
+
+    wait_stream_connected(qts1, "st0", &addr);
+    g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET);
+    g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1");
+    g_assert_cmpint(atoi(addr->u.inet.port), ==, port);
+    qapi_free_SocketAddress(addr);
+
+    qtest_quit(qts1);
+    qtest_quit(qts0);
+}
+
 static void test_stream_inet_ipv6(void)
 {
     QTestState *qts0, *qts1;
@@ -418,6 +517,8 @@ int main(int argc, char **argv)
 #ifndef _WIN32
         qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast);
 #endif
+        qtest_add_func("/netdev/stream/inet/reconnect",
+                       test_stream_inet_reconnect);
     }
     if (has_ipv6) {
         qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6);
index e320a625c4b6127426c1d78f07a267e667a8dbb2..ea4ca1d106e4dc55a1fab27bcd92cb247aa74095 100644 (file)
@@ -20,6 +20,8 @@
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qnum.h"
 
+static int verbosity_level;
+
 #define REF_HZ          25000000
 
 /* Register field definitions. */
@@ -221,7 +223,9 @@ static uint64_t pwm_qom_get(QTestState *qts, const char *path, const char *name)
     QDict *response;
     uint64_t val;
 
-    g_test_message("Getting properties %s from %s", name, path);
+    if (verbosity_level >= 2) {
+        g_test_message("Getting properties %s from %s", name, path);
+    }
     response = qtest_qmp(qts, "{ 'execute': 'qom-get',"
             " 'arguments': { 'path': %s, 'property': %s}}",
             path, name);
@@ -260,8 +264,10 @@ static void mft_qom_set(QTestState *qts, int index, const char *name,
     QDict *response;
     char *path = g_strdup_printf("/machine/soc/mft[%d]", index);
 
-    g_test_message("Setting properties %s of mft[%d] with value %u",
-                   name, index, value);
+    if (verbosity_level >= 2) {
+        g_test_message("Setting properties %s of mft[%d] with value %u",
+                       name, index, value);
+    }
     response = qtest_qmp(qts, "{ 'execute': 'qom-set',"
             " 'arguments': { 'path': %s, "
             " 'property': %s, 'value': %u}}",
@@ -506,9 +512,12 @@ static void mft_verify_rpm(QTestState *qts, const TestData *td, uint64_t duty)
     int32_t expected_cnt = mft_compute_cnt(rpm, clk);
 
     qtest_irq_intercept_in(qts, "/machine/soc/a9mpcore/gic");
-    g_test_message(
-        "verifying rpm for mft[%d]: clk: %" PRIu64 ", duty: %" PRIu64 ", rpm: %u, cnt: %d",
-        index, clk, duty, rpm, expected_cnt);
+    if (verbosity_level >= 2) {
+        g_test_message(
+            "verifying rpm for mft[%d]: clk: %" PRIu64 ", duty: %" PRIu64
+            ", rpm: %u, cnt: %d",
+            index, clk, duty, rpm, expected_cnt);
+    }
 
     /* Verify rpm for fan A */
     /* Stop capture */
@@ -670,6 +679,12 @@ int main(int argc, char **argv)
 {
     TestData test_data_list[ARRAY_SIZE(pwm_module_list) * ARRAY_SIZE(pwm_list)];
 
+    char *v_env = getenv("V");
+
+    if (v_env) {
+        verbosity_level = atoi(v_env);
+    }
+
     g_test_init(&argc, &argv, NULL);
 
     for (int i = 0; i < ARRAY_SIZE(pwm_module_list); ++i) {
index 52f0b5c67c033b8ed859a88569eb1d0e1bec8303..62b6eef4649328a3d2c24d66104ee2cb6fb8d922 100644 (file)
@@ -108,6 +108,10 @@ static void test_batch(const testdef_t *tests, bool ipv6)
         const testdef_t *test = &tests[i];
         char *testname;
 
+        if (!qtest_has_device(test->model)) {
+            continue;
+        }
+
         testname = g_strdup_printf("pxe/ipv4/%s/%s",
                                    test->machine, test->model);
         qtest_add_data_func(testname, test, test_pxe_ipv4);
index 81831cafbce423e09533c638744027d3083edc65..bafd8c2180fc00e262b7791cb47314ada553ddba 100644 (file)
@@ -14,7 +14,7 @@ config-cc.mak: Makefile
 I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c))
 ALL_X86_TESTS=$(I386_SRCS:.c=)
 SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx
-X86_64_TESTS:=$(filter test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
+X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
 
 test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse
 run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max
@@ -28,6 +28,10 @@ test-i386-bmi2: CFLAGS=-O2
 run-test-i386-bmi2: QEMU_OPTS += -cpu max
 run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max
 
+test-i386-adcox: CFLAGS=-O2
+run-test-i386-adcox: QEMU_OPTS += -cpu max
+run-plugin-test-i386-adcox-%: QEMU_OPTS += -cpu max
+
 #
 # hello-i386 is a barebones app
 #
diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c
new file mode 100644 (file)
index 0000000..16169ef
--- /dev/null
@@ -0,0 +1,75 @@
+/* See if various BMI2 instructions give expected results */
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define CC_C 1
+#define CC_O (1 << 11)
+
+#ifdef __x86_64__
+#define REG uint64_t
+#else
+#define REG uint32_t
+#endif
+
+void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand)
+{
+    REG flags;
+    REG out_adcx, out_adox;
+
+    asm("pushf; pop %0" : "=r"(flags));
+    flags &= ~(CC_C | CC_O);
+    flags |= (in_c ? CC_C : 0);
+    flags |= (in_o ? CC_O : 0);
+
+    out_adcx = adcx_operand;
+    out_adox = adox_operand;
+    asm("push %0; popf;"
+        "adox %3, %2;"
+        "adcx %3, %1;"
+        "pushf; pop %0"
+        : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
+        : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
+
+    assert(out_adcx == in_c + adcx_operand - 1);
+    assert(out_adox == in_o + adox_operand - 1);
+    assert(!!(flags & CC_C) == (in_c || adcx_operand));
+    assert(!!(flags & CC_O) == (in_o || adox_operand));
+}
+
+void test_adcx_adox(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand)
+{
+    REG flags;
+    REG out_adcx, out_adox;
+
+    asm("pushf; pop %0" : "=r"(flags));
+    flags &= ~(CC_C | CC_O);
+    flags |= (in_c ? CC_C : 0);
+    flags |= (in_o ? CC_O : 0);
+
+    out_adcx = adcx_operand;
+    out_adox = adox_operand;
+    asm("push %0; popf;"
+        "adcx %3, %1;"
+        "adox %3, %2;"
+        "pushf; pop %0"
+        : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
+        : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
+
+    assert(out_adcx == in_c + adcx_operand - 1);
+    assert(out_adox == in_o + adox_operand - 1);
+    assert(!!(flags & CC_C) == (in_c || adcx_operand));
+    assert(!!(flags & CC_O) == (in_o || adox_operand));
+}
+
+int main(int argc, char *argv[]) {
+    /* try all combinations of input CF, input OF, CF from op1+op2,  OF from op2+op1 */
+    int i;
+    for (i = 0; i <= 15; i++) {
+        printf("%d\n", i);
+        test_adcx_adox(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8));
+        test_adox_adcx(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8));
+    }
+    return 0;
+}
+
index 5fadf47510f21fb00d1373ceb6022d2fff61cdce..982d4abda455d5f5627cc9f5b1e420452ae3e0e6 100644 (file)
@@ -3,34 +3,40 @@
 #include <stdint.h>
 #include <stdio.h>
 
+#ifdef __x86_64
+typedef uint64_t reg_t;
+#else
+typedef uint32_t reg_t;
+#endif
+
 #define insn1q(name, arg0)                                                           \
-static inline uint64_t name##q(uint64_t arg0)                                        \
+static inline reg_t name##q(reg_t arg0)                                              \
 {                                                                                    \
-    uint64_t result64;                                                               \
+    reg_t result64;                                                                  \
     asm volatile (#name "q   %1, %0" : "=r"(result64) : "rm"(arg0));                 \
     return result64;                                                                 \
 }
 
 #define insn1l(name, arg0)                                                           \
-static inline uint32_t name##l(uint32_t arg0)                                        \
+static inline reg_t name##l(reg_t arg0)                                              \
 {                                                                                    \
-    uint32_t result32;                                                               \
+    reg_t result32;                                                                  \
     asm volatile (#name "l   %k1, %k0" : "=r"(result32) : "rm"(arg0));               \
     return result32;                                                                 \
 }
 
 #define insn2q(name, arg0, c0, arg1, c1)                                             \
-static inline uint64_t name##q(uint64_t arg0, uint64_t arg1)                         \
+static inline reg_t name##q(reg_t arg0, reg_t arg1)                                  \
 {                                                                                    \
-    uint64_t result64;                                                               \
+    reg_t result64;                                                                  \
     asm volatile (#name "q   %2, %1, %0" : "=r"(result64) : c0(arg0), c1(arg1));     \
     return result64;                                                                 \
 }
 
 #define insn2l(name, arg0, c0, arg1, c1)                                             \
-static inline uint32_t name##l(uint32_t arg0, uint32_t arg1)                         \
+static inline reg_t name##l(reg_t arg0, reg_t arg1)                                  \
 {                                                                                    \
-    uint32_t result32;                                                               \
+    reg_t result32;                                                                  \
     asm volatile (#name "l   %k2, %k1, %k0" : "=r"(result32) : c0(arg0), c1(arg1));  \
     return result32;                                                                 \
 }
@@ -65,130 +71,140 @@ insn1l(blsr, src)
 int main(int argc, char *argv[]) {
     uint64_t ehlo = 0x202020204f4c4845ull;
     uint64_t mask = 0xa080800302020001ull;
-    uint32_t result32;
+    reg_t result;
 
 #ifdef __x86_64
-    uint64_t result64;
-
     /* 64 bits */
-    result64 = andnq(mask, ehlo);
-    assert(result64 == 0x002020204d4c4844);
+    result = andnq(mask, ehlo);
+    assert(result == 0x002020204d4c4844);
 
-    result64 = pextq(ehlo, mask);
-    assert(result64 == 133);
+    result = pextq(ehlo, mask);
+    assert(result == 133);
 
-    result64 = pdepq(result64, mask);
-    assert(result64 == (ehlo & mask));
+    result = pdepq(result, mask);
+    assert(result == (ehlo & mask));
 
-    result64 = pextq(-1ull, mask);
-    assert(result64 == 511); /* mask has 9 bits set */
+    result = pextq(-1ull, mask);
+    assert(result == 511); /* mask has 9 bits set */
 
-    result64 = pdepq(-1ull, mask);
-    assert(result64 == mask);
+    result = pdepq(-1ull, mask);
+    assert(result == mask);
 
-    result64 = bextrq(mask, 0x3f00);
-    assert(result64 == (mask & ~INT64_MIN));
+    result = bextrq(mask, 0x3f00);
+    assert(result == (mask & ~INT64_MIN));
 
-    result64 = bextrq(mask, 0x1038);
-    assert(result64 == 0xa0);
+    result = bextrq(mask, 0x1038);
+    assert(result == 0xa0);
 
-    result64 = bextrq(mask, 0x10f8);
-    assert(result64 == 0);
+    result = bextrq(mask, 0x10f8);
+    assert(result == 0);
 
-    result64 = blsiq(0x30);
-    assert(result64 == 0x10);
+    result = bextrq(0xfedcba9876543210ull, 0x7f00);
+    assert(result == 0xfedcba9876543210ull);
 
-    result64 = blsiq(0x30ull << 32);
-    assert(result64 == 0x10ull << 32);
+    result = blsiq(0x30);
+    assert(result == 0x10);
 
-    result64 = blsmskq(0x30);
-    assert(result64 == 0x1f);
+    result = blsiq(0x30ull << 32);
+    assert(result == 0x10ull << 32);
 
-    result64 = blsrq(0x30);
-    assert(result64 == 0x20);
+    result = blsmskq(0x30);
+    assert(result == 0x1f);
 
-    result64 = blsrq(0x30ull << 32);
-    assert(result64 == 0x20ull << 32);
+    result = blsrq(0x30);
+    assert(result == 0x20);
 
-    result64 = bzhiq(mask, 0x3f);
-    assert(result64 == (mask & ~INT64_MIN));
+    result = blsrq(0x30ull << 32);
+    assert(result == 0x20ull << 32);
 
-    result64 = bzhiq(mask, 0x1f);
-    assert(result64 == (mask & ~(-1 << 30)));
+    result = bzhiq(mask, 0x3f);
+    assert(result == (mask & ~INT64_MIN));
 
-    result64 = rorxq(0x2132435465768798, 8);
-    assert(result64 == 0x9821324354657687);
+    result = bzhiq(mask, 0x1f);
+    assert(result == (mask & ~(-1 << 30)));
 
-    result64 = sarxq(0xffeeddccbbaa9988, 8);
-    assert(result64 == 0xffffeeddccbbaa99);
+    result = rorxq(0x2132435465768798, 8);
+    assert(result == 0x9821324354657687);
 
-    result64 = sarxq(0x77eeddccbbaa9988, 8 | 64);
-    assert(result64 == 0x0077eeddccbbaa99);
+    result = sarxq(0xffeeddccbbaa9988, 8);
+    assert(result == 0xffffeeddccbbaa99);
 
-    result64 = shrxq(0xffeeddccbbaa9988, 8);
-    assert(result64 == 0x00ffeeddccbbaa99);
+    result = sarxq(0x77eeddccbbaa9988, 8 | 64);
+    assert(result == 0x0077eeddccbbaa99);
 
-    result64 = shrxq(0x77eeddccbbaa9988, 8 | 192);
-    assert(result64 == 0x0077eeddccbbaa99);
+    result = shrxq(0xffeeddccbbaa9988, 8);
+    assert(result == 0x00ffeeddccbbaa99);
 
-    result64 = shlxq(0xffeeddccbbaa9988, 8);
-    assert(result64 == 0xeeddccbbaa998800);
+    result = shrxq(0x77eeddccbbaa9988, 8 | 192);
+    assert(result == 0x0077eeddccbbaa99);
+
+    result = shlxq(0xffeeddccbbaa9988, 8);
+    assert(result == 0xeeddccbbaa998800);
 #endif
 
     /* 32 bits */
-    result32 = andnl(mask, ehlo);
-    assert(result32 == 0x04d4c4844);
+    result = andnl(mask, ehlo);
+    assert(result == 0x04d4c4844);
+
+    result = pextl((uint32_t) ehlo, mask);
+    assert(result == 5);
+
+    result = pdepl(result, mask);
+    assert(result == (uint32_t)(ehlo & mask));
+
+    result = pextl(-1u, mask);
+    assert(result == 7); /* mask has 3 bits set */
 
-    result32 = pextl((uint32_t) ehlo, mask);
-    assert(result32 == 5);
+    result = pdepl(-1u, mask);
+    assert(result == (uint32_t)mask);
 
-    result32 = pdepl(result32, mask);
-    assert(result32 == (uint32_t)(ehlo & mask));
+    result = bextrl(mask, 0x1f00);
+    assert(result == (mask & ~INT32_MIN));
 
-    result32 = pextl(-1u, mask);
-    assert(result32 == 7); /* mask has 3 bits set */
+    result = bextrl(ehlo, 0x1018);
+    assert(result == 0x4f);
 
-    result32 = pdepl(-1u, mask);
-    assert(result32 == (uint32_t)mask);
+    result = bextrl(mask, 0x1038);
+    assert(result == 0);
 
-    result32 = bextrl(mask, 0x1f00);
-    assert(result32 == (mask & ~INT32_MIN));
+    result = bextrl((reg_t)0x8f635a775ad3b9b4ull, 0x3018);
+    assert(result == 0x5a);
 
-    result32 = bextrl(ehlo, 0x1018);
-    assert(result32 == 0x4f);
+    result = bextrl((reg_t)0xfedcba9876543210ull, 0x7f00);
+    assert(result == 0x76543210u);
 
-    result32 = bextrl(mask, 0x1038);
-    assert(result32 == 0);
+    result = bextrl(-1, 0);
+    assert(result == 0);
 
-    result32 = blsil(0xffff);
-    assert(result32 == 1);
+    result = blsil(0xffff);
+    assert(result == 1);
 
-    result32 = blsmskl(0x300);
-    assert(result32 == 0x1ff);
+    result = blsmskl(0x300);
+    assert(result == 0x1ff);
 
-    result32 = blsrl(0xffc);
-    assert(result32 == 0xff8);
+    result = blsrl(0xffc);
+    assert(result == 0xff8);
 
-    result32 = bzhil(mask, 0xf);
-    assert(result32 == 1);
+    result = bzhil(mask, 0xf);
+    assert(result == 1);
 
-    result32 = rorxl(0x65768798, 8);
-    assert(result32 == 0x98657687);
+    result = rorxl(0x65768798, 8);
+    assert(result == 0x98657687);
 
-    result32 = sarxl(0xffeeddcc, 8);
-    assert(result32 == 0xffffeedd);
+    result = sarxl(0xffeeddcc, 8);
+    assert(result == 0xffffeedd);
 
-    result32 = sarxl(0x77eeddcc, 8 | 32);
-    assert(result32 == 0x0077eedd);
+    result = sarxl(0x77eeddcc, 8 | 32);
+    assert(result == 0x0077eedd);
 
-    result32 = shrxl(0xffeeddcc, 8);
-    assert(result32 == 0x00ffeedd);
+    result = shrxl(0xffeeddcc, 8);
+    assert(result == 0x00ffeedd);
 
-    result32 = shrxl(0x77eeddcc, 8 | 128);
-    assert(result32 == 0x0077eedd);
+    result = shrxl(0x77eeddcc, 8 | 128);
+    assert(result == 0x0077eedd);
 
-    result32 = shlxl(0xffeeddcc, 8);
-    assert(result32 == 0xeeddcc00);
+    result = shlxl(0xffeeddcc, 8);
+    assert(result == 0xeeddcc00);
 
     return 0;
 }
index a34fa68473ea8042fb7096322ce50757b6b8a06d..50c1b88065d1857e5db15f7c9c9b55dc274fe223 100644 (file)
@@ -3,7 +3,7 @@ VPATH+=$(S390X_SRC)
 QEMU_OPTS=-action panic=exit-failure -kernel
 
 %: %.S
-       $(CC) -march=z13 -m64 -nostartfiles -static -Wl,-Ttext=0 \
+       $(CC) -march=z13 -m64 -nostdlib -static -Wl,-Ttext=0 \
                -Wl,--build-id=none $< -o $@
 
 TESTS += unaligned-lowcore
index 4eac78293f2e0171ac330258c5b17824e0206cd2..e64aab1b81c6dbc4d70f812140fff9cf6d30dc80 100644 (file)
@@ -12,11 +12,14 @@ ifeq ($(filter %-linux-user, $(TARGET)),$(TARGET))
 X86_64_TESTS += vsyscall
 X86_64_TESTS += noexec
 X86_64_TESTS += cmpxchg
+X86_64_TESTS += adox
 TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
 else
 TESTS=$(MULTIARCH_TESTS)
 endif
 
+adox: CFLAGS=-O2
+
 run-test-i386-ssse3: QEMU_OPTS += -cpu max
 run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max
 
diff --git a/tests/tcg/x86_64/adox.c b/tests/tcg/x86_64/adox.c
new file mode 100644 (file)
index 0000000..36be644
--- /dev/null
@@ -0,0 +1,69 @@
+/* See if ADOX give expected results */
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+static uint64_t adoxq(bool *c_out, uint64_t a, uint64_t b, bool c)
+{
+    asm ("addl $0x7fffffff, %k1\n\t"
+         "adoxq %2, %0\n\t"
+         "seto %b1"
+         : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c));
+    *c_out = c;
+    return a;
+}
+
+static uint64_t adoxl(bool *c_out, uint64_t a, uint64_t b, bool c)
+{
+    asm ("addl $0x7fffffff, %k1\n\t"
+         "adoxl %k2, %k0\n\t"
+         "seto %b1"
+         : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c));
+    *c_out = c;
+    return a;
+}
+
+int main()
+{
+    uint64_t r;
+    bool c;
+
+    r = adoxq(&c, 0, 0, 0);
+    assert(r == 0);
+    assert(c == 0);
+
+    r = adoxl(&c, 0, 0, 0);
+    assert(r == 0);
+    assert(c == 0);
+
+    r = adoxl(&c, 0x100000000, 0, 0);
+    assert(r == 0);
+    assert(c == 0);
+
+    r = adoxq(&c, 0, 0, 1);
+    assert(r == 1);
+    assert(c == 0);
+
+    r = adoxl(&c, 0, 0, 1);
+    assert(r == 1);
+    assert(c == 0);
+
+    r = adoxq(&c, -1, -1, 0);
+    assert(r == -2);
+    assert(c == 1);
+
+    r = adoxl(&c, -1, -1, 0);
+    assert(r == 0xfffffffe);
+    assert(c == 1);
+
+    r = adoxq(&c, -1, -1, 1);
+    assert(r == -1);
+    assert(c == 1);
+
+    r = adoxl(&c, -1, -1, 1);
+    assert(r == 0xffffffff);
+    assert(c == 1);
+
+    return 0;
+}
index 4fed8b751f3896d4286d53d6266c633c2ee91680..d9d38070621af9a7734890ba35761ebf4d615eff 100644 (file)
@@ -933,10 +933,9 @@ static void bdrv_test_top_close(BlockDriverState *bs)
     }
 }
 
-static int coroutine_fn bdrv_test_top_co_preadv(BlockDriverState *bs,
-                                                int64_t offset, int64_t bytes,
-                                                QEMUIOVector *qiov,
-                                                BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_test_top_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                        QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVTestTopState *tts = bs->opaque;
     return bdrv_co_preadv(tts->wait_child, offset, bytes, qiov, flags);
@@ -967,6 +966,8 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque)
     void *buffer = g_malloc(65536);
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536);
 
+    GRAPH_RDLOCK_GUARD();
+
     /* Pretend some internal write operation from parent to child.
      * Important: We have to read from the child, not from the parent!
      * Draining works by first propagating it all up the tree to the
@@ -1698,11 +1699,9 @@ static void bdrv_replace_test_close(BlockDriverState *bs)
  * Otherwise:
  *   Set .has_read to true and return success.
  */
-static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
-                                                    int64_t offset,
-                                                    int64_t bytes,
-                                                    QEMUIOVector *qiov,
-                                                    BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_replace_test_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                            QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVReplaceTestState *s = bs->opaque;
 
@@ -1778,7 +1777,10 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque)
     int ret;
 
     /* Queue a read request post-drain */
+    bdrv_graph_co_rdlock();
     ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
+    bdrv_graph_co_rdunlock();
+
     g_assert(ret >= 0);
     bdrv_dec_in_flight(bs);
 }
index 6dfac6468ad1526da97b0eb4a775f29d305edd12..3a5e1eb2c4131e0627b177579f35f5305fd0f619 100644 (file)
@@ -312,7 +312,8 @@ static void test_sync_op_blk_truncate(BlockBackend *blk)
     g_assert_cmpint(ret, ==, -EINVAL);
 }
 
-static void test_sync_op_block_status(BdrvChild *c)
+/* Disable TSA to make bdrv_test.bdrv_co_block_status writable */
+static void TSA_NO_TSA test_sync_op_block_status(BdrvChild *c)
 {
     int ret;
     int64_t n;
index ef951b6e5405caeff72e9c5a4ac4385e556a3e05..547046d093ac8c09c238369809d60a6789cd7df8 100644 (file)
 
 #define XBZRLE_PAGE_SIZE 4096
 
+int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
+     uint8_t *, int) = xbzrle_encode_buffer;
+#if defined(CONFIG_AVX512BW_OPT)
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+    unsigned max = __get_cpuid_max(0, NULL);
+    int a, b, c, d;
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+         /* We must check that AVX is not just available, but usable.  */
+        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+            int bv;
+            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+            __cpuid_count(7, 0, a, b, c, d);
+           /* 0xe6:
+            *  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+            *                    and ZMM16-ZMM31 state are enabled by OS)
+            *  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+            */
+            if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+                xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
+            }
+        }
+    }
+    return ;
+}
+#endif
+
 static void test_uleb(void)
 {
     uint32_t i, val;
@@ -54,7 +83,7 @@ static void test_encode_decode_zero(void)
     buffer[1000 + diff_len + 5] = 105;
 
     /* encode zero page */
-    dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
+    dlen = xbzrle_encode_buffer_func(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
                        XBZRLE_PAGE_SIZE);
     g_assert(dlen == 0);
 
@@ -78,7 +107,7 @@ static void test_encode_decode_unchanged(void)
     test[1000 + diff_len + 5] = 109;
 
     /* test unchanged buffer */
-    dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed,
+    dlen = xbzrle_encode_buffer_func(test, test, XBZRLE_PAGE_SIZE, compressed,
                                 XBZRLE_PAGE_SIZE);
     g_assert(dlen == 0);
 
@@ -96,7 +125,7 @@ static void test_encode_decode_1_byte(void)
 
     test[XBZRLE_PAGE_SIZE - 1] = 1;
 
-    dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+    dlen = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
                        XBZRLE_PAGE_SIZE);
     g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
 
@@ -121,7 +150,7 @@ static void test_encode_decode_overflow(void)
     }
 
     /* encode overflow */
-    rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+    rc = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
                               XBZRLE_PAGE_SIZE);
     g_assert(rc == -1);
 
@@ -152,7 +181,7 @@ static void encode_decode_range(void)
     test[1000 + diff_len + 5] = 109;
 
     /* test encode/decode */
-    dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
+    dlen = xbzrle_encode_buffer_func(test, buffer, XBZRLE_PAGE_SIZE, compressed,
                                 XBZRLE_PAGE_SIZE);
 
     rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
index 10eb3a043f7773eacede26b9bb23e9f47da0c9b3..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 (file)
@@ -1,13 +0,0 @@
-have_virtiofsd = get_option('virtiofsd') \
-    .require(targetos == 'linux',
-             error_message: 'virtiofsd requires Linux') \
-    .require(seccomp.found() and libcap_ng.found(),
-             error_message: 'virtiofsd requires libcap-ng-devel and seccomp-devel') \
-    .require(have_vhost_user,
-             error_message: 'virtiofsd needs vhost-user-support') \
-    .disable_auto_if(not have_tools and not have_system) \
-    .allowed()
-
-if have_virtiofsd
-  subdir('virtiofsd')
-endif
diff --git a/tools/virtiofsd/50-qemu-virtiofsd.json.in b/tools/virtiofsd/50-qemu-virtiofsd.json.in
deleted file mode 100644 (file)
index 9bcd86f..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-  "description": "QEMU virtiofsd vhost-user-fs",
-  "type": "fs",
-  "binary": "@libexecdir@/virtiofsd"
-}
diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c
deleted file mode 100644 (file)
index b5f04be..0000000
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2010  Miklos Szeredi <miklos@szeredi.hu>
- *
- * Functions for dealing with `struct fuse_buf` and `struct
- * fuse_bufvec`.
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include "qemu/osdep.h"
-#include "fuse_i.h"
-#include "fuse_lowlevel.h"
-
-size_t fuse_buf_size(const struct fuse_bufvec *bufv)
-{
-    size_t i;
-    size_t size = 0;
-
-    for (i = 0; i < bufv->count; i++) {
-        if (bufv->buf[i].size == SIZE_MAX) {
-            size = SIZE_MAX;
-        } else {
-            size += bufv->buf[i].size;
-        }
-    }
-
-    return size;
-}
-
-static ssize_t fuse_buf_writev(struct fuse_buf *out_buf,
-                               struct fuse_bufvec *in_buf)
-{
-    ssize_t res, i, j;
-    size_t iovcnt = in_buf->count;
-    struct iovec *iov;
-    int fd = out_buf->fd;
-
-    iov = g_try_new0(struct iovec, iovcnt);
-    if (!iov) {
-        return -ENOMEM;
-    }
-
-    for (i = 0, j = 0; i < iovcnt; i++) {
-        /* Skip the buf with 0 size */
-        if (in_buf->buf[i].size) {
-            iov[j].iov_base = in_buf->buf[i].mem;
-            iov[j].iov_len = in_buf->buf[i].size;
-            j++;
-        }
-    }
-
-    if (out_buf->flags & FUSE_BUF_FD_SEEK) {
-        res = pwritev(fd, iov, iovcnt, out_buf->pos);
-    } else {
-        res = writev(fd, iov, iovcnt);
-    }
-
-    if (res == -1) {
-        res = -errno;
-    }
-
-    g_free(iov);
-    return res;
-}
-
-static size_t min_size(size_t s1, size_t s2)
-{
-    return s1 < s2 ? s1 : s2;
-}
-
-static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off,
-                              const struct fuse_buf *src, size_t src_off,
-                              size_t len)
-{
-    ssize_t res = 0;
-    size_t copied = 0;
-
-    while (len) {
-        if (dst->flags & FUSE_BUF_FD_SEEK) {
-            res = pwrite(dst->fd, (char *)src->mem + src_off, len,
-                         dst->pos + dst_off);
-        } else {
-            res = write(dst->fd, (char *)src->mem + src_off, len);
-        }
-        if (res == -1) {
-            if (!copied) {
-                return -errno;
-            }
-            break;
-        }
-        if (res == 0) {
-            break;
-        }
-
-        copied += res;
-        if (!(dst->flags & FUSE_BUF_FD_RETRY)) {
-            break;
-        }
-
-        src_off += res;
-        dst_off += res;
-        len -= res;
-    }
-
-    return copied;
-}
-
-static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off,
-                             const struct fuse_buf *src, size_t src_off,
-                             size_t len)
-{
-    ssize_t res = 0;
-    size_t copied = 0;
-
-    while (len) {
-        if (src->flags & FUSE_BUF_FD_SEEK) {
-            res = pread(src->fd, (char *)dst->mem + dst_off, len,
-                        src->pos + src_off);
-        } else {
-            res = read(src->fd, (char *)dst->mem + dst_off, len);
-        }
-        if (res == -1) {
-            if (!copied) {
-                return -errno;
-            }
-            break;
-        }
-        if (res == 0) {
-            break;
-        }
-
-        copied += res;
-        if (!(src->flags & FUSE_BUF_FD_RETRY)) {
-            break;
-        }
-
-        dst_off += res;
-        src_off += res;
-        len -= res;
-    }
-
-    return copied;
-}
-
-static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off,
-                                 const struct fuse_buf *src, size_t src_off,
-                                 size_t len)
-{
-    char buf[4096];
-    struct fuse_buf tmp = {
-        .size = sizeof(buf),
-        .flags = 0,
-    };
-    ssize_t res;
-    size_t copied = 0;
-
-    tmp.mem = buf;
-
-    while (len) {
-        size_t this_len = min_size(tmp.size, len);
-        size_t read_len;
-
-        res = fuse_buf_read(&tmp, 0, src, src_off, this_len);
-        if (res < 0) {
-            if (!copied) {
-                return res;
-            }
-            break;
-        }
-        if (res == 0) {
-            break;
-        }
-
-        read_len = res;
-        res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len);
-        if (res < 0) {
-            if (!copied) {
-                return res;
-            }
-            break;
-        }
-        if (res == 0) {
-            break;
-        }
-
-        copied += res;
-
-        if (res < this_len) {
-            break;
-        }
-
-        dst_off += res;
-        src_off += res;
-        len -= res;
-    }
-
-    return copied;
-}
-
-static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off,
-                                 const struct fuse_buf *src, size_t src_off,
-                                 size_t len)
-{
-    int src_is_fd = src->flags & FUSE_BUF_IS_FD;
-    int dst_is_fd = dst->flags & FUSE_BUF_IS_FD;
-
-    if (!src_is_fd && !dst_is_fd) {
-        char *dstmem = (char *)dst->mem + dst_off;
-        char *srcmem = (char *)src->mem + src_off;
-
-        if (dstmem != srcmem) {
-            if (dstmem + len <= srcmem || srcmem + len <= dstmem) {
-                memcpy(dstmem, srcmem, len);
-            } else {
-                memmove(dstmem, srcmem, len);
-            }
-        }
-
-        return len;
-    } else if (!src_is_fd) {
-        return fuse_buf_write(dst, dst_off, src, src_off, len);
-    } else if (!dst_is_fd) {
-        return fuse_buf_read(dst, dst_off, src, src_off, len);
-    } else {
-        return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len);
-    }
-}
-
-static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv)
-{
-    if (bufv->idx < bufv->count) {
-        return &bufv->buf[bufv->idx];
-    } else {
-        return NULL;
-    }
-}
-
-static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len)
-{
-    const struct fuse_buf *buf = fuse_bufvec_current(bufv);
-
-    if (!buf) {
-        return 0;
-    }
-
-    bufv->off += len;
-    assert(bufv->off <= buf->size);
-    if (bufv->off == buf->size) {
-        assert(bufv->idx < bufv->count);
-        bufv->idx++;
-        if (bufv->idx == bufv->count) {
-            return 0;
-        }
-        bufv->off = 0;
-    }
-    return 1;
-}
-
-ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv)
-{
-    size_t copied = 0, i;
-
-    if (dstv == srcv) {
-        return fuse_buf_size(dstv);
-    }
-
-    /*
-     * use writev to improve bandwidth when all the
-     * src buffers already mapped by the daemon
-     * process
-     */
-    for (i = 0; i < srcv->count; i++) {
-        if (srcv->buf[i].flags & FUSE_BUF_IS_FD) {
-            break;
-        }
-    }
-    if ((i == srcv->count) && (dstv->count == 1) &&
-        (dstv->idx == 0) &&
-        (dstv->buf[0].flags & FUSE_BUF_IS_FD)) {
-        dstv->buf[0].pos += dstv->off;
-        return fuse_buf_writev(&dstv->buf[0], srcv);
-    }
-
-    for (;;) {
-        const struct fuse_buf *src = fuse_bufvec_current(srcv);
-        const struct fuse_buf *dst = fuse_bufvec_current(dstv);
-        size_t src_len;
-        size_t dst_len;
-        size_t len;
-        ssize_t res;
-
-        if (src == NULL || dst == NULL) {
-            break;
-        }
-
-        src_len = src->size - srcv->off;
-        dst_len = dst->size - dstv->off;
-        len = min_size(src_len, dst_len);
-
-        res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len);
-        if (res < 0) {
-            if (!copied) {
-                return res;
-            }
-            break;
-        }
-        copied += res;
-
-        if (!fuse_bufvec_advance(srcv, res) ||
-            !fuse_bufvec_advance(dstv, res)) {
-            break;
-        }
-
-        if (res < len) {
-            break;
-        }
-    }
-
-    return copied;
-}
-
-void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len)
-{
-    void *ptr;
-
-    if (len > iter->size - iter->pos) {
-        return NULL;
-    }
-
-    ptr = iter->mem + iter->pos;
-    iter->pos += len;
-    return ptr;
-}
-
-const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter)
-{
-    const char *str = iter->mem + iter->pos;
-    size_t remaining = iter->size - iter->pos;
-    size_t i;
-
-    for (i = 0; i < remaining; i++) {
-        if (str[i] == '\0') {
-            iter->pos += i + 1;
-            return str;
-        }
-    }
-    return NULL;
-}
diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
deleted file mode 100644 (file)
index bf46954..0000000
+++ /dev/null
@@ -1,837 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB.
- */
-
-/** @file */
-
-#if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_)
-#error \
-    "Never include <fuse_common.h> directly; use <fuse.h> or <fuse_lowlevel.h> instead."
-#endif
-
-#ifndef FUSE_COMMON_H_
-#define FUSE_COMMON_H_
-
-#include "fuse_log.h"
-#include "fuse_opt.h"
-
-/** Major version of FUSE library interface */
-#define FUSE_MAJOR_VERSION 3
-
-/** Minor version of FUSE library interface */
-#define FUSE_MINOR_VERSION 2
-
-#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min))
-#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION)
-
-/**
- * Information about an open file.
- *
- * File Handles are created by the open, opendir, and create methods and closed
- * by the release and releasedir methods.  Multiple file handles may be
- * concurrently open for the same file.  Generally, a client will create one
- * file handle per file descriptor, though in some cases multiple file
- * descriptors can share a single file handle.
- */
-struct fuse_file_info {
-    /** Open flags. Available in open() and release() */
-    int flags;
-
-    /*
-     * In case of a write operation indicates if this was caused
-     * by a delayed write from the page cache. If so, then the
-     * context's pid, uid, and gid fields will not be valid, and
-     * the *fh* value may not match the *fh* value that would
-     * have been sent with the corresponding individual write
-     * requests if write caching had been disabled.
-     */
-    unsigned int writepage:1;
-
-    /** Can be filled in by open, to use direct I/O on this file. */
-    unsigned int direct_io:1;
-
-    /*
-     *  Can be filled in by open. It signals the kernel that any
-     *  currently cached file data (ie., data that the filesystem
-     *  provided the last time the file was open) need not be
-     *  invalidated. Has no effect when set in other contexts (in
-     *  particular it does nothing when set by opendir()).
-     */
-    unsigned int keep_cache:1;
-
-    /*
-     *  Indicates a flush operation.  Set in flush operation, also
-     *  maybe set in highlevel lock operation and lowlevel release
-     *  operation.
-     */
-    unsigned int flush:1;
-
-    /*
-     *  Can be filled in by open, to indicate that the file is not
-     *  seekable.
-     */
-    unsigned int nonseekable:1;
-
-    /*
-     * Indicates that flock locks for this file should be
-     * released.  If set, lock_owner shall contain a valid value.
-     * May only be set in ->release().
-     */
-    unsigned int flock_release:1;
-
-    /*
-     *  Can be filled in by opendir. It signals the kernel to
-     *  enable caching of entries returned by readdir().  Has no
-     *  effect when set in other contexts (in particular it does
-     *  nothing when set by open()).
-     */
-    unsigned int cache_readdir:1;
-
-    /* Indicates that suid/sgid bits should be removed upon write */
-    unsigned int kill_priv:1;
-
-
-    /** Padding.  Reserved for future use*/
-    unsigned int padding:24;
-    unsigned int padding2:32;
-
-    /*
-     *  File handle id.  May be filled in by filesystem in create,
-     * open, and opendir().  Available in most other file operations on the
-     * same file handle.
-     */
-    uint64_t fh;
-
-    /** Lock owner id.  Available in locking operations and flush */
-    uint64_t lock_owner;
-
-    /*
-     * Requested poll events.  Available in ->poll.  Only set on kernels
-     * which support it.  If unsupported, this field is set to zero.
-     */
-    uint32_t poll_events;
-};
-
-/*
- * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want'
- */
-
-/**
- * Indicates that the filesystem supports asynchronous read requests.
- *
- * If this capability is not requested/available, the kernel will
- * ensure that there is at most one pending read request per
- * file-handle at any time, and will attempt to order read requests by
- * increasing offset.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_ASYNC_READ (1 << 0)
-
-/**
- * Indicates that the filesystem supports "remote" locking.
- *
- * This feature is enabled by default when supported by the kernel,
- * and if getlk() and setlk() handlers are implemented.
- */
-#define FUSE_CAP_POSIX_LOCKS (1 << 1)
-
-/**
- * Indicates that the filesystem supports the O_TRUNC open flag.  If
- * disabled, and an application specifies O_TRUNC, fuse first calls
- * truncate() and then open() with O_TRUNC filtered out.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3)
-
-/**
- * Indicates that the filesystem supports lookups of "." and "..".
- *
- * This feature is disabled by default.
- */
-#define FUSE_CAP_EXPORT_SUPPORT (1 << 4)
-
-/**
- * Indicates that the kernel should not apply the umask to the
- * file mode on create operations.
- *
- * This feature is disabled by default.
- */
-#define FUSE_CAP_DONT_MASK (1 << 6)
-
-/**
- * Indicates that libfuse should try to use splice() when writing to
- * the fuse device. This may improve performance.
- *
- * This feature is disabled by default.
- */
-#define FUSE_CAP_SPLICE_WRITE (1 << 7)
-
-/**
- * Indicates that libfuse should try to move pages instead of copying when
- * writing to / reading from the fuse device. This may improve performance.
- *
- * This feature is disabled by default.
- */
-#define FUSE_CAP_SPLICE_MOVE (1 << 8)
-
-/**
- * Indicates that libfuse should try to use splice() when reading from
- * the fuse device. This may improve performance.
- *
- * This feature is enabled by default when supported by the kernel and
- * if the filesystem implements a write_buf() handler.
- */
-#define FUSE_CAP_SPLICE_READ (1 << 9)
-
-/**
- * If set, the calls to flock(2) will be emulated using POSIX locks and must
- * then be handled by the filesystem's setlock() handler.
- *
- * If not set, flock(2) calls will be handled by the FUSE kernel module
- * internally (so any access that does not go through the kernel cannot be taken
- * into account).
- *
- * This feature is enabled by default when supported by the kernel and
- * if the filesystem implements a flock() handler.
- */
-#define FUSE_CAP_FLOCK_LOCKS (1 << 10)
-
-/**
- * Indicates that the filesystem supports ioctl's on directories.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_IOCTL_DIR (1 << 11)
-
-/**
- * Traditionally, while a file is open the FUSE kernel module only
- * asks the filesystem for an update of the file's attributes when a
- * client attempts to read beyond EOF. This is unsuitable for
- * e.g. network filesystems, where the file contents may change
- * without the kernel knowing about it.
- *
- * If this flag is set, FUSE will check the validity of the attributes
- * on every read. If the attributes are no longer valid (i.e., if the
- * *attr_timeout* passed to fuse_reply_attr() or set in `struct
- * fuse_entry_param` has passed), it will first issue a `getattr`
- * request. If the new mtime differs from the previous value, any
- * cached file *contents* will be invalidated as well.
- *
- * This flag should always be set when available. If all file changes
- * go through the kernel, *attr_timeout* should be set to a very large
- * number to avoid unnecessary getattr() calls.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12)
-
-/**
- * Indicates that the filesystem supports readdirplus.
- *
- * This feature is enabled by default when supported by the kernel and if the
- * filesystem implements a readdirplus() handler.
- */
-#define FUSE_CAP_READDIRPLUS (1 << 13)
-
-/**
- * Indicates that the filesystem supports adaptive readdirplus.
- *
- * If FUSE_CAP_READDIRPLUS is not set, this flag has no effect.
- *
- * If FUSE_CAP_READDIRPLUS is set and this flag is not set, the kernel
- * will always issue readdirplus() requests to retrieve directory
- * contents.
- *
- * If FUSE_CAP_READDIRPLUS is set and this flag is set, the kernel
- * will issue both readdir() and readdirplus() requests, depending on
- * how much information is expected to be required.
- *
- * As of Linux 4.20, the algorithm is as follows: when userspace
- * starts to read directory entries, issue a READDIRPLUS request to
- * the filesystem. If any entry attributes have been looked up by the
- * time userspace requests the next batch of entries continue with
- * READDIRPLUS, otherwise switch to plain READDIR.  This will reasult
- * in eg plain "ls" triggering READDIRPLUS first then READDIR after
- * that because it doesn't do lookups.  "ls -l" should result in all
- * READDIRPLUS, except if dentries are already cached.
- *
- * This feature is enabled by default when supported by the kernel and
- * if the filesystem implements both a readdirplus() and a readdir()
- * handler.
- */
-#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14)
-
-/**
- * Indicates that the filesystem supports asynchronous direct I/O submission.
- *
- * If this capability is not requested/available, the kernel will ensure that
- * there is at most one pending read and one pending write request per direct
- * I/O file-handle at any time.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_ASYNC_DIO (1 << 15)
-
-/**
- * Indicates that writeback caching should be enabled. This means that
- * individual write request may be buffered and merged in the kernel
- * before they are send to the filesystem.
- *
- * This feature is disabled by default.
- */
-#define FUSE_CAP_WRITEBACK_CACHE (1 << 16)
-
-/**
- * Indicates support for zero-message opens. If this flag is set in
- * the `capable` field of the `fuse_conn_info` structure, then the
- * filesystem may return `ENOSYS` from the open() handler to indicate
- * success. Further attempts to open files will be handled in the
- * kernel. (If this flag is not set, returning ENOSYS will be treated
- * as an error and signaled to the caller).
- *
- * Setting (or unsetting) this flag in the `want` field has *no
- * effect*.
- */
-#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17)
-
-/**
- * Indicates support for parallel directory operations. If this flag
- * is unset, the FUSE kernel module will ensure that lookup() and
- * readdir() requests are never issued concurrently for the same
- * directory.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_PARALLEL_DIROPS (1 << 18)
-
-/**
- * Indicates support for POSIX ACLs.
- *
- * If this feature is enabled, the kernel will cache and have
- * responsibility for enforcing ACLs. ACL will be stored as xattrs and
- * passed to userspace, which is responsible for updating the ACLs in
- * the filesystem, keeping the file mode in sync with the ACL, and
- * ensuring inheritance of default ACLs when new filesystem nodes are
- * created. Note that this requires that the file system is able to
- * parse and interpret the xattr representation of ACLs.
- *
- * Enabling this feature implicitly turns on the
- * ``default_permissions`` mount option (even if it was not passed to
- * mount(2)).
- *
- * This feature is disabled by default.
- */
-#define FUSE_CAP_POSIX_ACL (1 << 19)
-
-/**
- * Indicates that the filesystem is responsible for unsetting
- * setuid and setgid bits when a file is written, truncated, or
- * its owner is changed.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20)
-
-/**
- * Indicates support for zero-message opendirs. If this flag is set in
- * the `capable` field of the `fuse_conn_info` structure, then the filesystem
- * may return `ENOSYS` from the opendir() handler to indicate success. Further
- * opendir and releasedir messages will be handled in the kernel. (If this
- * flag is not set, returning ENOSYS will be treated as an error and signalled
- * to the caller.)
- *
- * Setting (or unsetting) this flag in the `want` field has *no effect*.
- */
-#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24)
-
-/**
- * Indicates that the kernel supports the FUSE_ATTR_SUBMOUNT flag.
- *
- * Setting (or unsetting) this flag in the `want` field has *no effect*.
- */
-#define FUSE_CAP_SUBMOUNTS (1 << 27)
-
-/**
- * Indicates that the filesystem is responsible for clearing
- * security.capability xattr and clearing setuid and setgid bits. Following
- * are the rules.
- * - clear "security.capability" on write, truncate and chown unconditionally
- * - clear suid/sgid if following is true. Note, sgid is cleared only if
- *   group executable bit is set.
- *    o setattr has FATTR_SIZE and FATTR_KILL_SUIDGID set.
- *    o setattr has FATTR_UID or FATTR_GID
- *    o open has O_TRUNC and FUSE_OPEN_KILL_SUIDGID
- *    o create has O_TRUNC and FUSE_OPEN_KILL_SUIDGID flag set.
- *    o write has FUSE_WRITE_KILL_SUIDGID
- */
-#define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 28)
-
-/**
- * Indicates that file server supports extended struct fuse_setxattr_in
- */
-#define FUSE_CAP_SETXATTR_EXT (1 << 29)
-
-/**
- * Indicates that file server supports creating file security context
- */
-#define FUSE_CAP_SECURITY_CTX (1ULL << 32)
-
-/**
- * Ioctl flags
- *
- * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
- * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
- * FUSE_IOCTL_RETRY: retry with new iovecs
- * FUSE_IOCTL_DIR: is a directory
- *
- * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
- */
-#define FUSE_IOCTL_COMPAT (1 << 0)
-#define FUSE_IOCTL_UNRESTRICTED (1 << 1)
-#define FUSE_IOCTL_RETRY (1 << 2)
-#define FUSE_IOCTL_DIR (1 << 4)
-
-#define FUSE_IOCTL_MAX_IOV 256
-
-/**
- * Connection information, passed to the ->init() method
- *
- * Some of the elements are read-write, these can be changed to
- * indicate the value requested by the filesystem.  The requested
- * value must usually be smaller than the indicated value.
- */
-struct fuse_conn_info {
-    /**
-     * Major version of the protocol (read-only)
-     */
-    unsigned proto_major;
-
-    /**
-     * Minor version of the protocol (read-only)
-     */
-    unsigned proto_minor;
-
-    /**
-     * Maximum size of the write buffer
-     */
-    unsigned max_write;
-
-    /**
-     * Maximum size of read requests. A value of zero indicates no
-     * limit. However, even if the filesystem does not specify a
-     * limit, the maximum size of read requests will still be
-     * limited by the kernel.
-     *
-     * NOTE: For the time being, the maximum size of read requests
-     * must be set both here *and* passed to fuse_session_new()
-     * using the ``-o max_read=<n>`` mount option. At some point
-     * in the future, specifying the mount option will no longer
-     * be necessary.
-     */
-    unsigned max_read;
-
-    /**
-     * Maximum readahead
-     */
-    unsigned max_readahead;
-
-    /**
-     * Capability flags that the kernel supports (read-only)
-     */
-    uint64_t capable;
-
-    /**
-     * Capability flags that the filesystem wants to enable.
-     *
-     * libfuse attempts to initialize this field with
-     * reasonable default values before calling the init() handler.
-     */
-    uint64_t want;
-
-    /**
-     * Maximum number of pending "background" requests. A
-     * background request is any type of request for which the
-     * total number is not limited by other means. As of kernel
-     * 4.8, only two types of requests fall into this category:
-     *
-     *   1. Read-ahead requests
-     *   2. Asynchronous direct I/O requests
-     *
-     * Read-ahead requests are generated (if max_readahead is
-     * non-zero) by the kernel to preemptively fill its caches
-     * when it anticipates that userspace will soon read more
-     * data.
-     *
-     * Asynchronous direct I/O requests are generated if
-     * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large
-     * direct I/O request. In this case the kernel will internally
-     * split it up into multiple smaller requests and submit them
-     * to the filesystem concurrently.
-     *
-     * Note that the following requests are *not* background
-     * requests: writeback requests (limited by the kernel's
-     * flusher algorithm), regular (i.e., synchronous and
-     * buffered) userspace read/write requests (limited to one per
-     * thread), asynchronous read requests (Linux's io_submit(2)
-     * call actually blocks, so these are also limited to one per
-     * thread).
-     */
-    unsigned max_background;
-
-    /**
-     * Kernel congestion threshold parameter. If the number of pending
-     * background requests exceeds this number, the FUSE kernel module will
-     * mark the filesystem as "congested". This instructs the kernel to
-     * expect that queued requests will take some time to complete, and to
-     * adjust its algorithms accordingly (e.g. by putting a waiting thread
-     * to sleep instead of using a busy-loop).
-     */
-    unsigned congestion_threshold;
-
-    /**
-     * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible
-     * for updating mtime and ctime when write requests are received. The
-     * updated values are passed to the filesystem with setattr() requests.
-     * However, if the filesystem does not support the full resolution of
-     * the kernel timestamps (nanoseconds), the mtime and ctime values used
-     * by kernel and filesystem will differ (and result in an apparent
-     * change of times after a cache flush).
-     *
-     * To prevent this problem, this variable can be used to inform the
-     * kernel about the timestamp granularity supported by the file-system.
-     * The value should be power of 10.  The default is 1, i.e. full
-     * nano-second resolution. Filesystems supporting only second resolution
-     * should set this to 1000000000.
-     */
-    unsigned time_gran;
-
-    /**
-     * For future use.
-     */
-    unsigned reserved[22];
-};
-
-struct fuse_session;
-struct fuse_pollhandle;
-struct fuse_conn_info_opts;
-
-/**
- * This function parses several command-line options that can be used
- * to override elements of struct fuse_conn_info. The pointer returned
- * by this function should be passed to the
- * fuse_apply_conn_info_opts() method by the file system's init()
- * handler.
- *
- * Before using this function, think twice if you really want these
- * parameters to be adjustable from the command line. In most cases,
- * they should be determined by the file system internally.
- *
- * The following options are recognized:
- *
- *   -o max_write=N         sets conn->max_write
- *   -o max_readahead=N     sets conn->max_readahead
- *   -o max_background=N    sets conn->max_background
- *   -o congestion_threshold=N  sets conn->congestion_threshold
- *   -o async_read          sets FUSE_CAP_ASYNC_READ in conn->want
- *   -o sync_read           unsets FUSE_CAP_ASYNC_READ in conn->want
- *   -o atomic_o_trunc      sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want
- *   -o no_remote_lock      Equivalent to -o
- *no_remote_flock,no_remote_posix_lock -o no_remote_flock     Unsets
- *FUSE_CAP_FLOCK_LOCKS in conn->want -o no_remote_posix_lock  Unsets
- *FUSE_CAP_POSIX_LOCKS in conn->want -o [no_]splice_write     (un-)sets
- *FUSE_CAP_SPLICE_WRITE in conn->want -o [no_]splice_move      (un-)sets
- *FUSE_CAP_SPLICE_MOVE in conn->want -o [no_]splice_read      (un-)sets
- *FUSE_CAP_SPLICE_READ in conn->want -o [no_]auto_inval_data  (un-)sets
- *FUSE_CAP_AUTO_INVAL_DATA in conn->want -o readdirplus=no        unsets
- *FUSE_CAP_READDIRPLUS in conn->want -o readdirplus=yes       sets
- *FUSE_CAP_READDIRPLUS and unsets FUSE_CAP_READDIRPLUS_AUTO in conn->want -o
- *readdirplus=auto      sets FUSE_CAP_READDIRPLUS and FUSE_CAP_READDIRPLUS_AUTO
- *in conn->want -o [no_]async_dio        (un-)sets FUSE_CAP_ASYNC_DIO in
- *conn->want -o [no_]writeback_cache  (un-)sets FUSE_CAP_WRITEBACK_CACHE in
- *conn->want -o time_gran=N           sets conn->time_gran
- *
- * Known options will be removed from *args*, unknown options will be
- * passed through unchanged.
- *
- * @param args argument vector (input+output)
- * @return parsed options
- **/
-struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args);
-
-/**
- * This function applies the (parsed) parameters in *opts* to the
- * *conn* pointer. It may modify the following fields: wants,
- * max_write, max_readahead, congestion_threshold, max_background,
- * time_gran. A field is only set (or unset) if the corresponding
- * option has been explicitly set.
- */
-void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts,
-                               struct fuse_conn_info *conn);
-
-/**
- * Go into the background
- *
- * @param foreground if true, stay in the foreground
- * @return 0 on success, -1 on failure
- */
-int fuse_daemonize(int foreground);
-
-/**
- * Get the version of the library
- *
- * @return the version
- */
-int fuse_version(void);
-
-/**
- * Get the full package version string of the library
- *
- * @return the package version
- */
-const char *fuse_pkgversion(void);
-
-/**
- * Destroy poll handle
- *
- * @param ph the poll handle
- */
-void fuse_pollhandle_destroy(struct fuse_pollhandle *ph);
-
-/*
- * Data buffer
- */
-
-/**
- * Buffer flags
- */
-enum fuse_buf_flags {
-    /**
-     * Buffer contains a file descriptor
-     *
-     * If this flag is set, the .fd field is valid, otherwise the
-     * .mem fields is valid.
-     */
-    FUSE_BUF_IS_FD = (1 << 1),
-
-    /**
-     * Seek on the file descriptor
-     *
-     * If this flag is set then the .pos field is valid and is
-     * used to seek to the given offset before performing
-     * operation on file descriptor.
-     */
-    FUSE_BUF_FD_SEEK = (1 << 2),
-
-    /**
-     * Retry operation on file descriptor
-     *
-     * If this flag is set then retry operation on file descriptor
-     * until .size bytes have been copied or an error or EOF is
-     * detected.
-     */
-    FUSE_BUF_FD_RETRY = (1 << 3),
-};
-
-/**
- * Single data buffer
- *
- * Generic data buffer for I/O, extended attributes, etc...  Data may
- * be supplied as a memory pointer or as a file descriptor
- */
-struct fuse_buf {
-    /**
-     * Size of data in bytes
-     */
-    size_t size;
-
-    /**
-     * Buffer flags
-     */
-    enum fuse_buf_flags flags;
-
-    /**
-     * Memory pointer
-     *
-     * Used unless FUSE_BUF_IS_FD flag is set.
-     */
-    void *mem;
-
-    /**
-     * File descriptor
-     *
-     * Used if FUSE_BUF_IS_FD flag is set.
-     */
-    int fd;
-
-    /**
-     * File position
-     *
-     * Used if FUSE_BUF_FD_SEEK flag is set.
-     */
-    off_t pos;
-};
-
-/**
- * Data buffer vector
- *
- * An array of data buffers, each containing a memory pointer or a
- * file descriptor.
- *
- * Allocate dynamically to add more than one buffer.
- */
-struct fuse_bufvec {
-    /**
-     * Number of buffers in the array
-     */
-    size_t count;
-
-    /**
-     * Index of current buffer within the array
-     */
-    size_t idx;
-
-    /**
-     * Current offset within the current buffer
-     */
-    size_t off;
-
-    /**
-     * Array of buffers
-     */
-    struct fuse_buf buf[1];
-};
-
-/* Initialize bufvec with a single buffer of given size */
-#define FUSE_BUFVEC_INIT(size__)                                      \
-    ((struct fuse_bufvec){ /* .count= */ 1,                           \
-                           /* .idx =  */ 0,                           \
-                           /* .off =  */ 0, /* .buf =  */             \
-                           { /* [0] = */ {                            \
-                               /* .size =  */ (size__),               \
-                               /* .flags = */ (enum fuse_buf_flags)0, \
-                               /* .mem =   */ NULL,                   \
-                               /* .fd =    */ -1,                     \
-                               /* .pos =   */ 0,                      \
-                           } } })
-
-/**
- * Get total size of data in a fuse buffer vector
- *
- * @param bufv buffer vector
- * @return size of data
- */
-size_t fuse_buf_size(const struct fuse_bufvec *bufv);
-
-/**
- * Copy data from one buffer vector to another
- *
- * @param dst destination buffer vector
- * @param src source buffer vector
- * @return actual number of bytes copied or -errno on error
- */
-ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src);
-
-/**
- * Memory buffer iterator
- *
- */
-struct fuse_mbuf_iter {
-    /**
-     * Data pointer
-     */
-    void *mem;
-
-    /**
-     * Total length, in bytes
-     */
-    size_t size;
-
-    /**
-     * Offset from start of buffer
-     */
-    size_t pos;
-};
-
-/* Initialize memory buffer iterator from a fuse_buf */
-#define FUSE_MBUF_ITER_INIT(fbuf) \
-    ((struct fuse_mbuf_iter){     \
-        .mem = fbuf->mem,         \
-        .size = fbuf->size,       \
-        .pos = 0,                 \
-    })
-
-/**
- * Consume bytes from a memory buffer iterator
- *
- * @param iter memory buffer iterator
- * @param len number of bytes to consume
- * @return pointer to start of consumed bytes or
- *         NULL if advancing beyond end of buffer
- */
-void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len);
-
-/**
- * Consume a NUL-terminated string from a memory buffer iterator
- *
- * @param iter memory buffer iterator
- * @return pointer to the string or
- *         NULL if advancing beyond end of buffer or there is no NUL-terminator
- */
-const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter);
-
-/*
- * Signal handling
- */
-/**
- * Exit session on HUP, TERM and INT signals and ignore PIPE signal
- *
- * Stores session in a global variable. May only be called once per
- * process until fuse_remove_signal_handlers() is called.
- *
- * Once either of the POSIX signals arrives, the signal handler calls
- * fuse_session_exit().
- *
- * @param se the session to exit
- * @return 0 on success, -1 on failure
- *
- * See also:
- * fuse_remove_signal_handlers()
- */
-int fuse_set_signal_handlers(struct fuse_session *se);
-
-/**
- * Restore default signal handlers
- *
- * Resets global session.  After this fuse_set_signal_handlers() may
- * be called again.
- *
- * @param se the same session as given in fuse_set_signal_handlers()
- *
- * See also:
- * fuse_set_signal_handlers()
- */
-void fuse_remove_signal_handlers(struct fuse_session *se);
-
-/*
- * Compatibility stuff
- */
-
-#if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30
-#error only API version 30 or greater is supported
-#endif
-
-
-/*
- * This interface uses 64 bit off_t.
- *
- * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags!
- */
-QEMU_BUILD_BUG_ON(sizeof(off_t) != 8);
-
-#endif /* FUSE_COMMON_H_ */
diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
deleted file mode 100644 (file)
index a5572fa..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#ifndef FUSE_I_H
-#define FUSE_I_H
-
-#define FUSE_USE_VERSION 31
-#include "fuse_lowlevel.h"
-
-struct fv_VuDev;
-struct fv_QueueInfo;
-
-struct fuse_security_context {
-        const char *name;
-        uint32_t ctxlen;
-        const void *ctx;
-};
-
-struct fuse_req {
-    struct fuse_session *se;
-    uint64_t unique;
-    int ctr;
-    pthread_mutex_t lock;
-    struct fuse_ctx ctx;
-    struct fuse_chan *ch;
-    int interrupted;
-    unsigned int ioctl_64bit:1;
-    union {
-        struct {
-            uint64_t unique;
-        } i;
-        struct {
-            fuse_interrupt_func_t func;
-            void *data;
-        } ni;
-    } u;
-    struct fuse_req *next;
-    struct fuse_req *prev;
-    struct fuse_security_context secctx;
-};
-
-struct fuse_notify_req {
-    uint64_t unique;
-    void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t,
-                  const void *, const struct fuse_buf *);
-    struct fuse_notify_req *next;
-    struct fuse_notify_req *prev;
-};
-
-struct fuse_session {
-    char *mountpoint;
-    volatile int exited;
-    int fd;
-    int debug;
-    int deny_others;
-    struct fuse_lowlevel_ops op;
-    int got_init;
-    struct cuse_data *cuse_data;
-    void *userdata;
-    uid_t owner;
-    struct fuse_conn_info conn;
-    struct fuse_req list;
-    struct fuse_req interrupts;
-    pthread_mutex_t lock;
-    pthread_rwlock_t init_rwlock;
-    int got_destroy;
-    int broken_splice_nonblock;
-    uint64_t notify_ctr;
-    struct fuse_notify_req notify_list;
-    size_t bufsize;
-    int error;
-    char *vu_socket_path;
-    char *vu_socket_group;
-    int   vu_listen_fd;
-    int   vu_socketfd;
-    struct fv_VuDev *virtio_dev;
-    int thread_pool_size;
-};
-
-struct fuse_chan {
-    pthread_mutex_t lock;
-    int ctr;
-    int fd;
-    struct fv_QueueInfo *qi;
-};
-
-int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov,
-                               int count);
-void fuse_free_req(fuse_req_t req);
-
-void fuse_session_process_buf_int(struct fuse_session *se,
-                                  struct fuse_bufvec *bufv,
-                                  struct fuse_chan *ch);
-
-
-#define FUSE_MAX_MAX_PAGES 256
-#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
-
-/* room needed in buffer to accommodate header */
-#define FUSE_BUFFER_HEADER_SIZE 0x1000
-
-#endif
diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c
deleted file mode 100644 (file)
index 2de3f48..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2019  Red Hat, Inc.
- *
- * Logging API.
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include "qemu/osdep.h"
-#include "fuse_log.h"
-
-
-G_GNUC_PRINTF(2, 0)
-static void default_log_func(__attribute__((unused)) enum fuse_log_level level,
-                             const char *fmt, va_list ap)
-{
-    vfprintf(stderr, fmt, ap);
-}
-
-static fuse_log_func_t log_func = default_log_func;
-
-void fuse_set_log_func(fuse_log_func_t func)
-{
-    if (!func) {
-        func = default_log_func;
-    }
-
-    log_func = func;
-}
-
-void fuse_log(enum fuse_log_level level, const char *fmt, ...)
-{
-    va_list ap;
-
-    va_start(ap, fmt);
-    log_func(level, fmt, ap);
-    va_end(ap);
-}
diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h
deleted file mode 100644 (file)
index e5c2967..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2019  Red Hat, Inc.
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB.
- */
-
-#ifndef FUSE_LOG_H_
-#define FUSE_LOG_H_
-
-/** @file
- *
- * This file defines the logging interface of FUSE
- */
-
-
-/**
- * Log severity level
- *
- * These levels correspond to syslog(2) log levels since they are widely used.
- */
-enum fuse_log_level {
-    FUSE_LOG_EMERG,
-    FUSE_LOG_ALERT,
-    FUSE_LOG_CRIT,
-    FUSE_LOG_ERR,
-    FUSE_LOG_WARNING,
-    FUSE_LOG_NOTICE,
-    FUSE_LOG_INFO,
-    FUSE_LOG_DEBUG
-};
-
-/**
- * Log message handler function.
- *
- * This function must be thread-safe.  It may be called from any libfuse
- * function, including fuse_parse_cmdline() and other functions invoked before
- * a FUSE filesystem is created.
- *
- * Install a custom log message handler function using fuse_set_log_func().
- *
- * @param level log severity level
- * @param fmt sprintf-style format string including newline
- * @param ap format string arguments
- */
-typedef void (*fuse_log_func_t)(enum fuse_log_level level, const char *fmt,
-                                va_list ap)
-    G_GNUC_PRINTF(2, 0);
-
-/**
- * Install a custom log handler function.
- *
- * Log messages are emitted by libfuse functions to report errors and debug
- * information.  Messages are printed to stderr by default but this can be
- * overridden by installing a custom log message handler function.
- *
- * The log message handler function is global and affects all FUSE filesystems
- * created within this process.
- *
- * @param func a custom log message handler function or NULL to revert to
- *             the default
- */
-void fuse_set_log_func(fuse_log_func_t func);
-
-/**
- * Emit a log message
- *
- * @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc)
- * @param fmt sprintf-style format string including newline
- */
-void fuse_log(enum fuse_log_level level, const char *fmt, ...)
-    G_GNUC_PRINTF(2, 3);
-
-#endif /* FUSE_LOG_H_ */
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
deleted file mode 100644 (file)
index 194a1b8..0000000
+++ /dev/null
@@ -1,2732 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * Implementation of (most of) the low-level FUSE API. The session loop
- * functions are implemented in separate files.
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include "qemu/osdep.h"
-#include "fuse_i.h"
-#include "standard-headers/linux/fuse.h"
-#include "fuse_misc.h"
-#include "fuse_opt.h"
-#include "fuse_virtio.h"
-
-#include <sys/file.h>
-
-#define THREAD_POOL_SIZE 0
-
-#define OFFSET_MAX 0x7fffffffffffffffLL
-
-struct fuse_pollhandle {
-    uint64_t kh;
-    struct fuse_session *se;
-};
-
-static size_t pagesize;
-
-static __attribute__((constructor)) void fuse_ll_init_pagesize(void)
-{
-    pagesize = getpagesize();
-}
-
-static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr)
-{
-    *attr = (struct fuse_attr){
-        .ino = stbuf->st_ino,
-        .mode = stbuf->st_mode,
-        .nlink = stbuf->st_nlink,
-        .uid = stbuf->st_uid,
-        .gid = stbuf->st_gid,
-        .rdev = stbuf->st_rdev,
-        .size = stbuf->st_size,
-        .blksize = stbuf->st_blksize,
-        .blocks = stbuf->st_blocks,
-        .atime = stbuf->st_atime,
-        .mtime = stbuf->st_mtime,
-        .ctime = stbuf->st_ctime,
-        .atimensec = ST_ATIM_NSEC(stbuf),
-        .mtimensec = ST_MTIM_NSEC(stbuf),
-        .ctimensec = ST_CTIM_NSEC(stbuf),
-    };
-}
-
-static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf)
-{
-    stbuf->st_mode = attr->mode;
-    stbuf->st_uid = attr->uid;
-    stbuf->st_gid = attr->gid;
-    stbuf->st_size = attr->size;
-    stbuf->st_atime = attr->atime;
-    stbuf->st_mtime = attr->mtime;
-    stbuf->st_ctime = attr->ctime;
-    ST_ATIM_NSEC_SET(stbuf, attr->atimensec);
-    ST_MTIM_NSEC_SET(stbuf, attr->mtimensec);
-    ST_CTIM_NSEC_SET(stbuf, attr->ctimensec);
-}
-
-static size_t iov_length(const struct iovec *iov, size_t count)
-{
-    size_t seg;
-    size_t ret = 0;
-
-    for (seg = 0; seg < count; seg++) {
-        ret += iov[seg].iov_len;
-    }
-    return ret;
-}
-
-static void list_init_req(struct fuse_req *req)
-{
-    req->next = req;
-    req->prev = req;
-}
-
-static void list_del_req(struct fuse_req *req)
-{
-    struct fuse_req *prev = req->prev;
-    struct fuse_req *next = req->next;
-    prev->next = next;
-    next->prev = prev;
-}
-
-static void list_add_req(struct fuse_req *req, struct fuse_req *next)
-{
-    struct fuse_req *prev = next->prev;
-    req->next = next;
-    req->prev = prev;
-    prev->next = req;
-    next->prev = req;
-}
-
-static void destroy_req(fuse_req_t req)
-{
-    pthread_mutex_destroy(&req->lock);
-    g_free(req);
-}
-
-void fuse_free_req(fuse_req_t req)
-{
-    int ctr;
-    struct fuse_session *se = req->se;
-
-    pthread_mutex_lock(&se->lock);
-    req->u.ni.func = NULL;
-    req->u.ni.data = NULL;
-    list_del_req(req);
-    ctr = --req->ctr;
-    req->ch = NULL;
-    pthread_mutex_unlock(&se->lock);
-    if (!ctr) {
-        destroy_req(req);
-    }
-}
-
-static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se)
-{
-    struct fuse_req *req;
-
-    req = g_try_new0(struct fuse_req, 1);
-    if (req == NULL) {
-        fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n");
-    } else {
-        req->se = se;
-        req->ctr = 1;
-        list_init_req(req);
-        fuse_mutex_init(&req->lock);
-    }
-
-    return req;
-}
-
-/* Send data. If *ch* is NULL, send via session master fd */
-static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch,
-                         struct iovec *iov, int count)
-{
-    struct fuse_out_header *out = iov[0].iov_base;
-
-    out->len = iov_length(iov, count);
-    if (out->unique == 0) {
-        fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error,
-                 out->len);
-    } else if (out->error) {
-        fuse_log(FUSE_LOG_DEBUG,
-                 "   unique: %llu, error: %i (%s), outsize: %i\n",
-                 (unsigned long long)out->unique, out->error,
-                 strerror(-out->error), out->len);
-    } else {
-        fuse_log(FUSE_LOG_DEBUG, "   unique: %llu, success, outsize: %i\n",
-                 (unsigned long long)out->unique, out->len);
-    }
-
-    if (fuse_lowlevel_is_virtio(se)) {
-        return virtio_send_msg(se, ch, iov, count);
-    }
-
-    abort(); /* virtio should have taken it before here */
-    return 0;
-}
-
-
-int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov,
-                               int count)
-{
-    struct fuse_out_header out = {
-        .unique = req->unique,
-        .error = error,
-    };
-
-    if (error <= -1000 || error > 0) {
-        fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error);
-        out.error = -ERANGE;
-    }
-
-    iov[0].iov_base = &out;
-    iov[0].iov_len = sizeof(struct fuse_out_header);
-
-    return fuse_send_msg(req->se, req->ch, iov, count);
-}
-
-static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov,
-                          int count)
-{
-    int res;
-
-    res = fuse_send_reply_iov_nofree(req, error, iov, count);
-    fuse_free_req(req);
-    return res;
-}
-
-static int send_reply(fuse_req_t req, int error, const void *arg,
-                      size_t argsize)
-{
-    struct iovec iov[2];
-    int count = 1;
-    if (argsize) {
-        iov[1].iov_base = (void *)arg;
-        iov[1].iov_len = argsize;
-        count++;
-    }
-    return send_reply_iov(req, error, iov, count);
-}
-
-int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count)
-{
-    g_autofree struct iovec *padded_iov = NULL;
-
-    padded_iov = g_try_new(struct iovec, count + 1);
-    if (padded_iov == NULL) {
-        return fuse_reply_err(req, ENOMEM);
-    }
-
-    memcpy(padded_iov + 1, iov, count * sizeof(struct iovec));
-    count++;
-
-    return send_reply_iov(req, 0, padded_iov, count);
-}
-
-
-/*
- * 'buf` is allowed to be empty so that the proper size may be
- * allocated by the caller
- */
-size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize,
-                         const char *name, const struct stat *stbuf, off_t off)
-{
-    (void)req;
-    size_t namelen;
-    size_t entlen;
-    size_t entlen_padded;
-    struct fuse_dirent *dirent;
-
-    namelen = strlen(name);
-    entlen = FUSE_NAME_OFFSET + namelen;
-    entlen_padded = FUSE_DIRENT_ALIGN(entlen);
-
-    if ((buf == NULL) || (entlen_padded > bufsize)) {
-        return entlen_padded;
-    }
-
-    dirent = (struct fuse_dirent *)buf;
-    dirent->ino = stbuf->st_ino;
-    dirent->off = off;
-    dirent->namelen = namelen;
-    dirent->type = (stbuf->st_mode & S_IFMT) >> 12;
-    memcpy(dirent->name, name, namelen);
-    memset(dirent->name + namelen, 0, entlen_padded - entlen);
-
-    return entlen_padded;
-}
-
-static void convert_statfs(const struct statvfs *stbuf,
-                           struct fuse_kstatfs *kstatfs)
-{
-    *kstatfs = (struct fuse_kstatfs){
-        .bsize = stbuf->f_bsize,
-        .frsize = stbuf->f_frsize,
-        .blocks = stbuf->f_blocks,
-        .bfree = stbuf->f_bfree,
-        .bavail = stbuf->f_bavail,
-        .files = stbuf->f_files,
-        .ffree = stbuf->f_ffree,
-        .namelen = stbuf->f_namemax,
-    };
-}
-
-static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize)
-{
-    return send_reply(req, 0, arg, argsize);
-}
-
-int fuse_reply_err(fuse_req_t req, int err)
-{
-    return send_reply(req, -err, NULL, 0);
-}
-
-void fuse_reply_none(fuse_req_t req)
-{
-    fuse_free_req(req);
-}
-
-static unsigned long calc_timeout_sec(double t)
-{
-    if (t > (double)ULONG_MAX) {
-        return ULONG_MAX;
-    } else if (t < 0.0) {
-        return 0;
-    } else {
-        return (unsigned long)t;
-    }
-}
-
-static unsigned int calc_timeout_nsec(double t)
-{
-    double f = t - (double)calc_timeout_sec(t);
-    if (f < 0.0) {
-        return 0;
-    } else if (f >= 0.999999999) {
-        return 999999999;
-    } else {
-        return (unsigned int)(f * 1.0e9);
-    }
-}
-
-static void fill_entry(struct fuse_entry_out *arg,
-                       const struct fuse_entry_param *e)
-{
-    *arg = (struct fuse_entry_out){
-        .nodeid = e->ino,
-        .generation = e->generation,
-        .entry_valid = calc_timeout_sec(e->entry_timeout),
-        .entry_valid_nsec = calc_timeout_nsec(e->entry_timeout),
-        .attr_valid = calc_timeout_sec(e->attr_timeout),
-        .attr_valid_nsec = calc_timeout_nsec(e->attr_timeout),
-    };
-    convert_stat(&e->attr, &arg->attr);
-
-    arg->attr.flags = e->attr_flags;
-}
-
-/*
- * `buf` is allowed to be empty so that the proper size may be
- * allocated by the caller
- */
-size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize,
-                              const char *name,
-                              const struct fuse_entry_param *e, off_t off)
-{
-    (void)req;
-    size_t namelen;
-    size_t entlen;
-    size_t entlen_padded;
-
-    namelen = strlen(name);
-    entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen;
-    entlen_padded = FUSE_DIRENT_ALIGN(entlen);
-    if ((buf == NULL) || (entlen_padded > bufsize)) {
-        return entlen_padded;
-    }
-
-    struct fuse_direntplus *dp = (struct fuse_direntplus *)buf;
-    memset(&dp->entry_out, 0, sizeof(dp->entry_out));
-    fill_entry(&dp->entry_out, e);
-
-    struct fuse_dirent *dirent = &dp->dirent;
-    *dirent = (struct fuse_dirent){
-        .ino = e->attr.st_ino,
-        .off = off,
-        .namelen = namelen,
-        .type = (e->attr.st_mode & S_IFMT) >> 12,
-    };
-    memcpy(dirent->name, name, namelen);
-    memset(dirent->name + namelen, 0, entlen_padded - entlen);
-
-    return entlen_padded;
-}
-
-static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f)
-{
-    arg->fh = f->fh;
-    if (f->direct_io) {
-        arg->open_flags |= FOPEN_DIRECT_IO;
-    }
-    if (f->keep_cache) {
-        arg->open_flags |= FOPEN_KEEP_CACHE;
-    }
-    if (f->cache_readdir) {
-        arg->open_flags |= FOPEN_CACHE_DIR;
-    }
-    if (f->nonseekable) {
-        arg->open_flags |= FOPEN_NONSEEKABLE;
-    }
-}
-
-int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e)
-{
-    struct fuse_entry_out arg;
-    size_t size = sizeof(arg);
-
-    memset(&arg, 0, sizeof(arg));
-    fill_entry(&arg, e);
-    return send_reply_ok(req, &arg, size);
-}
-
-int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e,
-                      const struct fuse_file_info *f)
-{
-    char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)];
-    size_t entrysize = sizeof(struct fuse_entry_out);
-    struct fuse_entry_out *earg = (struct fuse_entry_out *)buf;
-    struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize);
-
-    memset(buf, 0, sizeof(buf));
-    fill_entry(earg, e);
-    fill_open(oarg, f);
-    return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out));
-}
-
-int fuse_reply_attr(fuse_req_t req, const struct stat *attr,
-                    double attr_timeout)
-{
-    struct fuse_attr_out arg;
-    size_t size = sizeof(arg);
-
-    memset(&arg, 0, sizeof(arg));
-    arg.attr_valid = calc_timeout_sec(attr_timeout);
-    arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout);
-    convert_stat(attr, &arg.attr);
-
-    return send_reply_ok(req, &arg, size);
-}
-
-int fuse_reply_readlink(fuse_req_t req, const char *linkname)
-{
-    return send_reply_ok(req, linkname, strlen(linkname));
-}
-
-int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f)
-{
-    struct fuse_open_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    fill_open(&arg, f);
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-int fuse_reply_write(fuse_req_t req, size_t count)
-{
-    struct fuse_write_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.size = count;
-
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size)
-{
-    return send_reply_ok(req, buf, size);
-}
-
-static int fuse_send_data_iov_fallback(struct fuse_session *se,
-                                       struct fuse_chan *ch, struct iovec *iov,
-                                       int iov_count, struct fuse_bufvec *buf,
-                                       size_t len)
-{
-    /* Optimize common case */
-    if (buf->count == 1 && buf->idx == 0 && buf->off == 0 &&
-        !(buf->buf[0].flags & FUSE_BUF_IS_FD)) {
-        /*
-         * FIXME: also avoid memory copy if there are multiple buffers
-         * but none of them contain an fd
-         */
-
-        iov[iov_count].iov_base = buf->buf[0].mem;
-        iov[iov_count].iov_len = len;
-        iov_count++;
-        return fuse_send_msg(se, ch, iov, iov_count);
-    }
-
-    if (fuse_lowlevel_is_virtio(se) && buf->count == 1 &&
-        buf->buf[0].flags == (FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK)) {
-        return virtio_send_data_iov(se, ch, iov, iov_count, buf, len);
-    }
-
-    abort(); /* Will have taken vhost path */
-    return 0;
-}
-
-static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
-                              struct iovec *iov, int iov_count,
-                              struct fuse_bufvec *buf)
-{
-    size_t len = fuse_buf_size(buf);
-
-    return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len);
-}
-
-int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv)
-{
-    struct iovec iov[2];
-    struct fuse_out_header out = {
-        .unique = req->unique,
-    };
-    int res;
-
-    iov[0].iov_base = &out;
-    iov[0].iov_len = sizeof(struct fuse_out_header);
-
-    res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv);
-    if (res <= 0) {
-        fuse_free_req(req);
-        return res;
-    } else {
-        return fuse_reply_err(req, res);
-    }
-}
-
-int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf)
-{
-    struct fuse_statfs_out arg;
-    size_t size = sizeof(arg);
-
-    memset(&arg, 0, sizeof(arg));
-    convert_statfs(stbuf, &arg.st);
-
-    return send_reply_ok(req, &arg, size);
-}
-
-int fuse_reply_xattr(fuse_req_t req, size_t count)
-{
-    struct fuse_getxattr_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.size = count;
-
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-int fuse_reply_lock(fuse_req_t req, const struct flock *lock)
-{
-    struct fuse_lk_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.lk.type = lock->l_type;
-    if (lock->l_type != F_UNLCK) {
-        arg.lk.start = lock->l_start;
-        if (lock->l_len == 0) {
-            arg.lk.end = OFFSET_MAX;
-        } else {
-            arg.lk.end = lock->l_start + lock->l_len - 1;
-        }
-    }
-    arg.lk.pid = lock->l_pid;
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-int fuse_reply_bmap(fuse_req_t req, uint64_t idx)
-{
-    struct fuse_bmap_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.block = idx;
-
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov,
-                                                      size_t count)
-{
-    struct fuse_ioctl_iovec *fiov;
-    size_t i;
-
-    fiov = g_try_new(struct fuse_ioctl_iovec, count);
-    if (!fiov) {
-        return NULL;
-    }
-
-    for (i = 0; i < count; i++) {
-        fiov[i].base = (uintptr_t)iov[i].iov_base;
-        fiov[i].len = iov[i].iov_len;
-    }
-
-    return fiov;
-}
-
-int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov,
-                           size_t in_count, const struct iovec *out_iov,
-                           size_t out_count)
-{
-    struct fuse_ioctl_out arg;
-    g_autofree struct fuse_ioctl_iovec *in_fiov = NULL;
-    g_autofree struct fuse_ioctl_iovec *out_fiov = NULL;
-    struct iovec iov[4];
-    size_t count = 1;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.flags |= FUSE_IOCTL_RETRY;
-    arg.in_iovs = in_count;
-    arg.out_iovs = out_count;
-    iov[count].iov_base = &arg;
-    iov[count].iov_len = sizeof(arg);
-    count++;
-
-    /* Can't handle non-compat 64bit ioctls on 32bit */
-    if (sizeof(void *) == 4 && req->ioctl_64bit) {
-        return fuse_reply_err(req, EINVAL);
-    }
-
-    if (in_count) {
-        in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count);
-        if (!in_fiov) {
-            return fuse_reply_err(req, ENOMEM);
-        }
-
-        iov[count].iov_base = (void *)in_fiov;
-        iov[count].iov_len = sizeof(in_fiov[0]) * in_count;
-        count++;
-    }
-    if (out_count) {
-        out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count);
-        if (!out_fiov) {
-            return fuse_reply_err(req, ENOMEM);
-        }
-
-        iov[count].iov_base = (void *)out_fiov;
-        iov[count].iov_len = sizeof(out_fiov[0]) * out_count;
-        count++;
-    }
-
-    return send_reply_iov(req, 0, iov, count);
-}
-
-int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size)
-{
-    struct fuse_ioctl_out arg;
-    struct iovec iov[3];
-    size_t count = 1;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.result = result;
-    iov[count].iov_base = &arg;
-    iov[count].iov_len = sizeof(arg);
-    count++;
-
-    if (size) {
-        iov[count].iov_base = (char *)buf;
-        iov[count].iov_len = size;
-        count++;
-    }
-
-    return send_reply_iov(req, 0, iov, count);
-}
-
-int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov,
-                         int count)
-{
-    g_autofree struct iovec *padded_iov = NULL;
-    struct fuse_ioctl_out arg;
-
-    padded_iov = g_try_new(struct iovec, count + 2);
-    if (padded_iov == NULL) {
-        return fuse_reply_err(req, ENOMEM);
-    }
-
-    memset(&arg, 0, sizeof(arg));
-    arg.result = result;
-    padded_iov[1].iov_base = &arg;
-    padded_iov[1].iov_len = sizeof(arg);
-
-    memcpy(&padded_iov[2], iov, count * sizeof(struct iovec));
-
-    return send_reply_iov(req, 0, padded_iov, count + 2);
-}
-
-int fuse_reply_poll(fuse_req_t req, unsigned revents)
-{
-    struct fuse_poll_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.revents = revents;
-
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-int fuse_reply_lseek(fuse_req_t req, off_t off)
-{
-    struct fuse_lseek_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.offset = off;
-
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-static void do_lookup(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    const char *name = fuse_mbuf_iter_advance_str(iter);
-    if (!name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.lookup) {
-        req->se->op.lookup(req, nodeid, name);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_forget(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    struct fuse_forget_in *arg;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.forget) {
-        req->se->op.forget(req, nodeid, arg->nlookup);
-    } else {
-        fuse_reply_none(req);
-    }
-}
-
-static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid,
-                            struct fuse_mbuf_iter *iter)
-{
-    struct fuse_batch_forget_in *arg;
-    struct fuse_forget_data *forgets;
-    size_t scount;
-
-    (void)nodeid;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_none(req);
-        return;
-    }
-
-    /*
-     * Prevent integer overflow.  The compiler emits the following warning
-     * unless we use the scount local variable:
-     *
-     * error: comparison is always false due to limited range of data type
-     * [-Werror=type-limits]
-     *
-     * This may be true on 64-bit hosts but we need this check for 32-bit
-     * hosts.
-     */
-    scount = arg->count;
-    if (scount > SIZE_MAX / sizeof(forgets[0])) {
-        fuse_reply_none(req);
-        return;
-    }
-
-    forgets = fuse_mbuf_iter_advance(iter, arg->count * sizeof(forgets[0]));
-    if (!forgets) {
-        fuse_reply_none(req);
-        return;
-    }
-
-    if (req->se->op.forget_multi) {
-        req->se->op.forget_multi(req, arg->count, forgets);
-    } else if (req->se->op.forget) {
-        unsigned int i;
-
-        for (i = 0; i < arg->count; i++) {
-            struct fuse_req *dummy_req;
-
-            dummy_req = fuse_ll_alloc_req(req->se);
-            if (dummy_req == NULL) {
-                break;
-            }
-
-            dummy_req->unique = req->unique;
-            dummy_req->ctx = req->ctx;
-            dummy_req->ch = NULL;
-
-            req->se->op.forget(dummy_req, forgets[i].ino, forgets[i].nlookup);
-        }
-        fuse_reply_none(req);
-    } else {
-        fuse_reply_none(req);
-    }
-}
-
-static void do_getattr(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    struct fuse_file_info *fip = NULL;
-    struct fuse_file_info fi;
-
-    struct fuse_getattr_in *arg;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (arg->getattr_flags & FUSE_GETATTR_FH) {
-        memset(&fi, 0, sizeof(fi));
-        fi.fh = arg->fh;
-        fip = &fi;
-    }
-
-    if (req->se->op.getattr) {
-        req->se->op.getattr(req, nodeid, fip);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_setattr(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    if (req->se->op.setattr) {
-        struct fuse_setattr_in *arg;
-        struct fuse_file_info *fi = NULL;
-        struct fuse_file_info fi_store;
-        struct stat stbuf;
-
-        arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-        if (!arg) {
-            fuse_reply_err(req, EINVAL);
-            return;
-        }
-
-        memset(&stbuf, 0, sizeof(stbuf));
-        convert_attr(arg, &stbuf);
-        if (arg->valid & FATTR_FH) {
-            arg->valid &= ~FATTR_FH;
-            memset(&fi_store, 0, sizeof(fi_store));
-            fi = &fi_store;
-            fi->fh = arg->fh;
-        }
-        arg->valid &= FUSE_SET_ATTR_MODE | FUSE_SET_ATTR_UID |
-                      FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE |
-                      FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME |
-                      FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW |
-                      FUSE_SET_ATTR_CTIME | FUSE_SET_ATTR_KILL_SUIDGID;
-
-        req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_access(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    struct fuse_access_in *arg;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.access) {
-        req->se->op.access(req, nodeid, arg->mask);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_readlink(fuse_req_t req, fuse_ino_t nodeid,
-                        struct fuse_mbuf_iter *iter)
-{
-    (void)iter;
-
-    if (req->se->op.readlink) {
-        req->se->op.readlink(req, nodeid);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static int parse_secctx_fill_req(fuse_req_t req, struct fuse_mbuf_iter *iter)
-{
-    struct fuse_secctx_header *fsecctx_header;
-    struct fuse_secctx *fsecctx;
-    const void *secctx;
-    const char *name;
-
-    fsecctx_header = fuse_mbuf_iter_advance(iter, sizeof(*fsecctx_header));
-    if (!fsecctx_header) {
-        return -EINVAL;
-    }
-
-    /*
-     * As of now maximum of one security context is supported. It can
-     * change in future though.
-     */
-    if (fsecctx_header->nr_secctx > 1) {
-        return -EINVAL;
-    }
-
-    /* No security context sent. Maybe no LSM supports it */
-    if (!fsecctx_header->nr_secctx) {
-        return 0;
-    }
-
-    fsecctx = fuse_mbuf_iter_advance(iter, sizeof(*fsecctx));
-    if (!fsecctx) {
-        return -EINVAL;
-    }
-
-    /* struct fsecctx with zero sized context is not expected */
-    if (!fsecctx->size) {
-        return -EINVAL;
-    }
-    name = fuse_mbuf_iter_advance_str(iter);
-    if (!name) {
-        return -EINVAL;
-    }
-
-    secctx = fuse_mbuf_iter_advance(iter, fsecctx->size);
-    if (!secctx) {
-        return -EINVAL;
-    }
-
-    req->secctx.name = name;
-    req->secctx.ctx = secctx;
-    req->secctx.ctxlen = fsecctx->size;
-    return 0;
-}
-
-static void do_mknod(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_mknod_in *arg;
-    const char *name;
-    bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX;
-    int err;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    name = fuse_mbuf_iter_advance_str(iter);
-    if (!arg || !name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    req->ctx.umask = arg->umask;
-
-    if (secctx_enabled) {
-        err = parse_secctx_fill_req(req, iter);
-        if (err) {
-            fuse_reply_err(req, -err);
-            return;
-        }
-    }
-
-    if (req->se->op.mknod) {
-        req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_mkdir_in *arg;
-    const char *name;
-    bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX;
-    int err;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    name = fuse_mbuf_iter_advance_str(iter);
-    if (!arg || !name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    req->ctx.umask = arg->umask;
-
-    if (secctx_enabled) {
-        err = parse_secctx_fill_req(req, iter);
-        if (err) {
-            fuse_reply_err(req, err);
-            return;
-        }
-    }
-
-    if (req->se->op.mkdir) {
-        req->se->op.mkdir(req, nodeid, name, arg->mode);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_unlink(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    const char *name = fuse_mbuf_iter_advance_str(iter);
-
-    if (!name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.unlink) {
-        req->se->op.unlink(req, nodeid, name);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    const char *name = fuse_mbuf_iter_advance_str(iter);
-
-    if (!name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.rmdir) {
-        req->se->op.rmdir(req, nodeid, name);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_symlink(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    const char *name = fuse_mbuf_iter_advance_str(iter);
-    const char *linkname = fuse_mbuf_iter_advance_str(iter);
-    bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX;
-    int err;
-
-    if (!name || !linkname) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (secctx_enabled) {
-        err = parse_secctx_fill_req(req, iter);
-        if (err) {
-            fuse_reply_err(req, err);
-            return;
-        }
-    }
-
-    if (req->se->op.symlink) {
-        req->se->op.symlink(req, linkname, nodeid, name);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_rename(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    struct fuse_rename_in *arg;
-    const char *oldname;
-    const char *newname;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    oldname = fuse_mbuf_iter_advance_str(iter);
-    newname = fuse_mbuf_iter_advance_str(iter);
-    if (!arg || !oldname || !newname) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.rename) {
-        req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_rename2(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    struct fuse_rename2_in *arg;
-    const char *oldname;
-    const char *newname;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    oldname = fuse_mbuf_iter_advance_str(iter);
-    newname = fuse_mbuf_iter_advance_str(iter);
-    if (!arg || !oldname || !newname) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.rename) {
-        req->se->op.rename(req, nodeid, oldname, arg->newdir, newname,
-                           arg->flags);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_link(fuse_req_t req, fuse_ino_t nodeid,
-                    struct fuse_mbuf_iter *iter)
-{
-    struct fuse_link_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    const char *name = fuse_mbuf_iter_advance_str(iter);
-
-    if (!arg || !name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.link) {
-        req->se->op.link(req, arg->oldnodeid, nodeid, name);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_create(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX;
-
-    if (req->se->op.create) {
-        struct fuse_create_in *arg;
-        struct fuse_file_info fi;
-        const char *name;
-
-        arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-        name = fuse_mbuf_iter_advance_str(iter);
-        if (!arg || !name) {
-            fuse_reply_err(req, EINVAL);
-            return;
-        }
-
-        if (secctx_enabled) {
-            int err;
-            err = parse_secctx_fill_req(req, iter);
-            if (err) {
-                fuse_reply_err(req, err);
-                return;
-            }
-        }
-
-        memset(&fi, 0, sizeof(fi));
-        fi.flags = arg->flags;
-        fi.kill_priv = arg->open_flags & FUSE_OPEN_KILL_SUIDGID;
-
-        req->ctx.umask = arg->umask;
-
-        req->se->op.create(req, nodeid, name, arg->mode, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_open(fuse_req_t req, fuse_ino_t nodeid,
-                    struct fuse_mbuf_iter *iter)
-{
-    struct fuse_open_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    /* File creation is handled by do_create() or do_mknod() */
-    if (arg->flags & (O_CREAT | O_TMPFILE)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.flags = arg->flags;
-    fi.kill_priv = arg->open_flags & FUSE_OPEN_KILL_SUIDGID;
-
-    if (req->se->op.open) {
-        req->se->op.open(req, nodeid, &fi);
-    } else {
-        fuse_reply_open(req, &fi);
-    }
-}
-
-static void do_read(fuse_req_t req, fuse_ino_t nodeid,
-                    struct fuse_mbuf_iter *iter)
-{
-    if (req->se->op.read) {
-        struct fuse_read_in *arg;
-        struct fuse_file_info fi;
-
-        arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-        if (!arg) {
-            fuse_reply_err(req, EINVAL);
-            return;
-        }
-
-        memset(&fi, 0, sizeof(fi));
-        fi.fh = arg->fh;
-        fi.lock_owner = arg->lock_owner;
-        fi.flags = arg->flags;
-        req->se->op.read(req, nodeid, arg->size, arg->offset, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_write(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_write_in *arg;
-    struct fuse_file_info fi;
-    const char *param;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    param = fuse_mbuf_iter_advance(iter, arg->size);
-    if (!param) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-    fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0;
-    fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV);
-
-    fi.lock_owner = arg->lock_owner;
-    fi.flags = arg->flags;
-
-    if (req->se->op.write) {
-        req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid,
-                         struct fuse_mbuf_iter *iter, struct fuse_bufvec *ibufv)
-{
-    struct fuse_session *se = req->se;
-    struct fuse_bufvec *pbufv = ibufv;
-    struct fuse_bufvec tmpbufv = {
-        .buf[0] = ibufv->buf[0],
-        .count = 1,
-    };
-    struct fuse_write_in *arg;
-    size_t arg_size = sizeof(*arg);
-    struct fuse_file_info fi;
-
-    memset(&fi, 0, sizeof(fi));
-
-    arg = fuse_mbuf_iter_advance(iter, arg_size);
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    fi.lock_owner = arg->lock_owner;
-    fi.flags = arg->flags;
-    fi.fh = arg->fh;
-    fi.writepage = !!(arg->write_flags & FUSE_WRITE_CACHE);
-    fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV);
-
-    if (ibufv->count == 1) {
-        assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD));
-        tmpbufv.buf[0].mem = ((char *)arg) + arg_size;
-        tmpbufv.buf[0].size -= sizeof(struct fuse_in_header) + arg_size;
-        pbufv = &tmpbufv;
-    } else {
-        /*
-         *  Input bufv contains the headers in the first element
-         * and the data in the rest, we need to skip that first element
-         */
-        ibufv->buf[0].size = 0;
-    }
-
-    if (fuse_buf_size(pbufv) != arg->size) {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: do_write_buf: buffer size doesn't match arg->size\n");
-        fuse_reply_err(req, EIO);
-        return;
-    }
-
-    se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi);
-}
-
-static void do_flush(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_flush_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-    fi.flush = 1;
-    fi.lock_owner = arg->lock_owner;
-
-    if (req->se->op.flush) {
-        req->se->op.flush(req, nodeid, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_release(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    struct fuse_release_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.flags = arg->flags;
-    fi.fh = arg->fh;
-    fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0;
-    fi.lock_owner = arg->lock_owner;
-
-    if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) {
-        fi.flock_release = 1;
-    }
-
-    if (req->se->op.release) {
-        req->se->op.release(req, nodeid, &fi);
-    } else {
-        fuse_reply_err(req, 0);
-    }
-}
-
-static void do_fsync(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_fsync_in *arg;
-    struct fuse_file_info fi;
-    int datasync;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-    datasync = arg->fsync_flags & 1;
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (req->se->op.fsync) {
-        if (fi.fh == (uint64_t)-1) {
-            req->se->op.fsync(req, nodeid, datasync, NULL);
-        } else {
-            req->se->op.fsync(req, nodeid, datasync, &fi);
-        }
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_opendir(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    struct fuse_open_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.flags = arg->flags;
-
-    if (req->se->op.opendir) {
-        req->se->op.opendir(req, nodeid, &fi);
-    } else {
-        fuse_reply_open(req, &fi);
-    }
-}
-
-static void do_readdir(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    struct fuse_read_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (req->se->op.readdir) {
-        req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid,
-                           struct fuse_mbuf_iter *iter)
-{
-    struct fuse_read_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (req->se->op.readdirplus) {
-        req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid,
-                          struct fuse_mbuf_iter *iter)
-{
-    struct fuse_release_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.flags = arg->flags;
-    fi.fh = arg->fh;
-
-    if (req->se->op.releasedir) {
-        req->se->op.releasedir(req, nodeid, &fi);
-    } else {
-        fuse_reply_err(req, 0);
-    }
-}
-
-static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid,
-                        struct fuse_mbuf_iter *iter)
-{
-    struct fuse_fsync_in *arg;
-    struct fuse_file_info fi;
-    int datasync;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-    datasync = arg->fsync_flags & 1;
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (req->se->op.fsyncdir) {
-        req->se->op.fsyncdir(req, nodeid, datasync, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_statfs(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    (void)nodeid;
-    (void)iter;
-
-    if (req->se->op.statfs) {
-        req->se->op.statfs(req, nodeid);
-    } else {
-        struct statvfs buf = {
-            .f_namemax = 255,
-            .f_bsize = 512,
-        };
-        fuse_reply_statfs(req, &buf);
-    }
-}
-
-static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid,
-                        struct fuse_mbuf_iter *iter)
-{
-    struct fuse_setxattr_in *arg;
-    const char *name;
-    const char *value;
-    bool setxattr_ext = req->se->conn.want & FUSE_CAP_SETXATTR_EXT;
-
-    if (setxattr_ext) {
-        arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    } else {
-        arg = fuse_mbuf_iter_advance(iter, FUSE_COMPAT_SETXATTR_IN_SIZE);
-    }
-    name = fuse_mbuf_iter_advance_str(iter);
-    if (!arg || !name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    value = fuse_mbuf_iter_advance(iter, arg->size);
-    if (!value) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.setxattr) {
-        uint32_t setxattr_flags = setxattr_ext ? arg->setxattr_flags : 0;
-        req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags,
-                             setxattr_flags);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid,
-                        struct fuse_mbuf_iter *iter)
-{
-    struct fuse_getxattr_in *arg;
-    const char *name;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    name = fuse_mbuf_iter_advance_str(iter);
-    if (!arg || !name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.getxattr) {
-        req->se->op.getxattr(req, nodeid, name, arg->size);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid,
-                         struct fuse_mbuf_iter *iter)
-{
-    struct fuse_getxattr_in *arg;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.listxattr) {
-        req->se->op.listxattr(req, nodeid, arg->size);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid,
-                           struct fuse_mbuf_iter *iter)
-{
-    const char *name = fuse_mbuf_iter_advance_str(iter);
-
-    if (!name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.removexattr) {
-        req->se->op.removexattr(req, nodeid, name);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void convert_fuse_file_lock(struct fuse_file_lock *fl,
-                                   struct flock *flock)
-{
-    memset(flock, 0, sizeof(struct flock));
-    flock->l_type = fl->type;
-    flock->l_whence = SEEK_SET;
-    flock->l_start = fl->start;
-    if (fl->end == OFFSET_MAX) {
-        flock->l_len = 0;
-    } else {
-        flock->l_len = fl->end - fl->start + 1;
-    }
-    flock->l_pid = fl->pid;
-}
-
-static void do_getlk(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_lk_in *arg;
-    struct fuse_file_info fi;
-    struct flock flock;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-    fi.lock_owner = arg->owner;
-
-    convert_fuse_file_lock(&arg->lk, &flock);
-    if (req->se->op.getlk) {
-        req->se->op.getlk(req, nodeid, &fi, &flock);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid,
-                            struct fuse_mbuf_iter *iter, int sleep)
-{
-    struct fuse_lk_in *arg;
-    struct fuse_file_info fi;
-    struct flock flock;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-    fi.lock_owner = arg->owner;
-
-    if (arg->lk_flags & FUSE_LK_FLOCK) {
-        int op = 0;
-
-        switch (arg->lk.type) {
-        case F_RDLCK:
-            op = LOCK_SH;
-            break;
-        case F_WRLCK:
-            op = LOCK_EX;
-            break;
-        case F_UNLCK:
-            op = LOCK_UN;
-            break;
-        }
-        if (!sleep) {
-            op |= LOCK_NB;
-        }
-
-        if (req->se->op.flock) {
-            req->se->op.flock(req, nodeid, &fi, op);
-        } else {
-            fuse_reply_err(req, ENOSYS);
-        }
-    } else {
-        convert_fuse_file_lock(&arg->lk, &flock);
-        if (req->se->op.setlk) {
-            req->se->op.setlk(req, nodeid, &fi, &flock, sleep);
-        } else {
-            fuse_reply_err(req, ENOSYS);
-        }
-    }
-}
-
-static void do_setlk(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    do_setlk_common(req, nodeid, iter, 0);
-}
-
-static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    do_setlk_common(req, nodeid, iter, 1);
-}
-
-static int find_interrupted(struct fuse_session *se, struct fuse_req *req)
-{
-    struct fuse_req *curr;
-
-    for (curr = se->list.next; curr != &se->list; curr = curr->next) {
-        if (curr->unique == req->u.i.unique) {
-            fuse_interrupt_func_t func;
-            void *data;
-
-            curr->ctr++;
-            pthread_mutex_unlock(&se->lock);
-
-            /* Ugh, ugly locking */
-            pthread_mutex_lock(&curr->lock);
-            pthread_mutex_lock(&se->lock);
-            curr->interrupted = 1;
-            func = curr->u.ni.func;
-            data = curr->u.ni.data;
-            pthread_mutex_unlock(&se->lock);
-            if (func) {
-                func(curr, data);
-            }
-            pthread_mutex_unlock(&curr->lock);
-
-            pthread_mutex_lock(&se->lock);
-            curr->ctr--;
-            if (!curr->ctr) {
-                destroy_req(curr);
-            }
-
-            return 1;
-        }
-    }
-    for (curr = se->interrupts.next; curr != &se->interrupts;
-         curr = curr->next) {
-        if (curr->u.i.unique == req->u.i.unique) {
-            return 1;
-        }
-    }
-    return 0;
-}
-
-static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid,
-                         struct fuse_mbuf_iter *iter)
-{
-    struct fuse_interrupt_in *arg;
-    struct fuse_session *se = req->se;
-
-    (void)nodeid;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n",
-             (unsigned long long)arg->unique);
-
-    req->u.i.unique = arg->unique;
-
-    pthread_mutex_lock(&se->lock);
-    if (find_interrupted(se, req)) {
-        destroy_req(req);
-    } else {
-        list_add_req(req, &se->interrupts);
-    }
-    pthread_mutex_unlock(&se->lock);
-}
-
-static struct fuse_req *check_interrupt(struct fuse_session *se,
-                                        struct fuse_req *req)
-{
-    struct fuse_req *curr;
-
-    for (curr = se->interrupts.next; curr != &se->interrupts;
-         curr = curr->next) {
-        if (curr->u.i.unique == req->unique) {
-            req->interrupted = 1;
-            list_del_req(curr);
-            g_free(curr);
-            return NULL;
-        }
-    }
-    curr = se->interrupts.next;
-    if (curr != &se->interrupts) {
-        list_del_req(curr);
-        list_init_req(curr);
-        return curr;
-    } else {
-        return NULL;
-    }
-}
-
-static void do_bmap(fuse_req_t req, fuse_ino_t nodeid,
-                    struct fuse_mbuf_iter *iter)
-{
-    struct fuse_bmap_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.bmap) {
-        req->se->op.bmap(req, nodeid, arg->blocksize, arg->block);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_ioctl_in *arg;
-    unsigned int flags;
-    void *in_buf = NULL;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    flags = arg->flags;
-    if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) {
-        fuse_reply_err(req, ENOTTY);
-        return;
-    }
-
-    if (arg->in_size) {
-        in_buf = fuse_mbuf_iter_advance(iter, arg->in_size);
-        if (!in_buf) {
-            fuse_reply_err(req, EINVAL);
-            return;
-        }
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (sizeof(void *) == 4 && !(flags & FUSE_IOCTL_32BIT)) {
-        req->ioctl_64bit = 1;
-    }
-
-    if (req->se->op.ioctl) {
-        req->se->op.ioctl(req, nodeid, arg->cmd, (void *)(uintptr_t)arg->arg,
-                          &fi, flags, in_buf, arg->in_size, arg->out_size);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-void fuse_pollhandle_destroy(struct fuse_pollhandle *ph)
-{
-    free(ph);
-}
-
-static void do_poll(fuse_req_t req, fuse_ino_t nodeid,
-                    struct fuse_mbuf_iter *iter)
-{
-    struct fuse_poll_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-    fi.poll_events = arg->events;
-
-    if (req->se->op.poll) {
-        struct fuse_pollhandle *ph = NULL;
-
-        if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) {
-            ph = malloc(sizeof(struct fuse_pollhandle));
-            if (ph == NULL) {
-                fuse_reply_err(req, ENOMEM);
-                return;
-            }
-            ph->kh = arg->kh;
-            ph->se = req->se;
-        }
-
-        req->se->op.poll(req, nodeid, &fi, ph);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid,
-                         struct fuse_mbuf_iter *iter)
-{
-    struct fuse_fallocate_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (req->se->op.fallocate) {
-        req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length,
-                              &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in,
-                               struct fuse_mbuf_iter *iter)
-{
-    struct fuse_copy_file_range_in *arg;
-    struct fuse_file_info fi_in, fi_out;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi_in, 0, sizeof(fi_in));
-    fi_in.fh = arg->fh_in;
-
-    memset(&fi_out, 0, sizeof(fi_out));
-    fi_out.fh = arg->fh_out;
-
-
-    if (req->se->op.copy_file_range) {
-        req->se->op.copy_file_range(req, nodeid_in, arg->off_in, &fi_in,
-                                    arg->nodeid_out, arg->off_out, &fi_out,
-                                    arg->len, arg->flags);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_lseek(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_lseek_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (req->se->op.lseek) {
-        req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    if (req->se->op.syncfs) {
-        req->se->op.syncfs(req, nodeid);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_init(fuse_req_t req, fuse_ino_t nodeid,
-                    struct fuse_mbuf_iter *iter)
-{
-    size_t compat_size = offsetof(struct fuse_init_in, max_readahead);
-    size_t compat2_size = offsetof(struct fuse_init_in, flags) +
-                              sizeof(uint32_t);
-    /* Fuse structure extended with minor version 36 */
-    size_t compat3_size = endof(struct fuse_init_in, unused);
-    struct fuse_init_in *arg;
-    struct fuse_init_out outarg;
-    struct fuse_session *se = req->se;
-    size_t bufsize = se->bufsize;
-    size_t outargsize = sizeof(outarg);
-    uint64_t flags = 0;
-
-    (void)nodeid;
-
-    /* First consume the old fields... */
-    arg = fuse_mbuf_iter_advance(iter, compat_size);
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    /* ...and now consume the new fields. */
-    if (arg->major == 7 && arg->minor >= 6) {
-        if (!fuse_mbuf_iter_advance(iter, compat2_size - compat_size)) {
-            fuse_reply_err(req, EINVAL);
-            return;
-        }
-        flags |= arg->flags;
-    }
-
-    /*
-     * fuse_init_in was extended again with minor version 36. Just read
-     * current known size of fuse_init so that future extension and
-     * header rebase does not cause breakage.
-     */
-    if (sizeof(*arg) > compat2_size && (arg->flags & FUSE_INIT_EXT)) {
-        if (!fuse_mbuf_iter_advance(iter, compat3_size - compat2_size)) {
-            fuse_reply_err(req, EINVAL);
-            return;
-        }
-        flags |= (uint64_t) arg->flags2 << 32;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor);
-    if (arg->major == 7 && arg->minor >= 6) {
-        fuse_log(FUSE_LOG_DEBUG, "flags=0x%016" PRIx64 "\n", flags);
-        fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", arg->max_readahead);
-    }
-    se->conn.proto_major = arg->major;
-    se->conn.proto_minor = arg->minor;
-    se->conn.capable = 0;
-    se->conn.want = 0;
-
-    memset(&outarg, 0, sizeof(outarg));
-    outarg.major = FUSE_KERNEL_VERSION;
-    outarg.minor = FUSE_KERNEL_MINOR_VERSION;
-
-    if (arg->major < 7 || (arg->major == 7 && arg->minor < 31)) {
-        fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n",
-                 arg->major, arg->minor);
-        fuse_reply_err(req, EPROTO);
-        return;
-    }
-
-    if (arg->major > 7) {
-        /* Wait for a second INIT request with a 7.X version */
-        send_reply_ok(req, &outarg, sizeof(outarg));
-        return;
-    }
-
-    if (arg->max_readahead < se->conn.max_readahead) {
-        se->conn.max_readahead = arg->max_readahead;
-    }
-    if (flags & FUSE_ASYNC_READ) {
-        se->conn.capable |= FUSE_CAP_ASYNC_READ;
-    }
-    if (flags & FUSE_POSIX_LOCKS) {
-        se->conn.capable |= FUSE_CAP_POSIX_LOCKS;
-    }
-    if (flags & FUSE_ATOMIC_O_TRUNC) {
-        se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC;
-    }
-    if (flags & FUSE_EXPORT_SUPPORT) {
-        se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT;
-    }
-    if (flags & FUSE_DONT_MASK) {
-        se->conn.capable |= FUSE_CAP_DONT_MASK;
-    }
-    if (flags & FUSE_FLOCK_LOCKS) {
-        se->conn.capable |= FUSE_CAP_FLOCK_LOCKS;
-    }
-    if (flags & FUSE_AUTO_INVAL_DATA) {
-        se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA;
-    }
-    if (flags & FUSE_DO_READDIRPLUS) {
-        se->conn.capable |= FUSE_CAP_READDIRPLUS;
-    }
-    if (flags & FUSE_READDIRPLUS_AUTO) {
-        se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO;
-    }
-    if (flags & FUSE_ASYNC_DIO) {
-        se->conn.capable |= FUSE_CAP_ASYNC_DIO;
-    }
-    if (flags & FUSE_WRITEBACK_CACHE) {
-        se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE;
-    }
-    if (flags & FUSE_NO_OPEN_SUPPORT) {
-        se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT;
-    }
-    if (flags & FUSE_PARALLEL_DIROPS) {
-        se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS;
-    }
-    if (flags & FUSE_POSIX_ACL) {
-        se->conn.capable |= FUSE_CAP_POSIX_ACL;
-    }
-    if (flags & FUSE_HANDLE_KILLPRIV) {
-        se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV;
-    }
-    if (flags & FUSE_NO_OPENDIR_SUPPORT) {
-        se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT;
-    }
-    if (!(flags & FUSE_MAX_PAGES)) {
-        size_t max_bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() +
-                             FUSE_BUFFER_HEADER_SIZE;
-        if (bufsize > max_bufsize) {
-            bufsize = max_bufsize;
-        }
-    }
-    if (flags & FUSE_SUBMOUNTS) {
-        se->conn.capable |= FUSE_CAP_SUBMOUNTS;
-    }
-    if (flags & FUSE_HANDLE_KILLPRIV_V2) {
-        se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2;
-    }
-    if (flags & FUSE_SETXATTR_EXT) {
-        se->conn.capable |= FUSE_CAP_SETXATTR_EXT;
-    }
-    if (flags & FUSE_SECURITY_CTX) {
-        se->conn.capable |= FUSE_CAP_SECURITY_CTX;
-    }
-#ifdef HAVE_SPLICE
-#ifdef HAVE_VMSPLICE
-    se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
-#endif
-    se->conn.capable |= FUSE_CAP_SPLICE_READ;
-#endif
-    se->conn.capable |= FUSE_CAP_IOCTL_DIR;
-
-    /*
-     * Default settings for modern filesystems.
-     *
-     * Most of these capabilities were disabled by default in
-     * libfuse2 for backwards compatibility reasons. In libfuse3,
-     * we can finally enable them by default (as long as they're
-     * supported by the kernel).
-     */
-#define LL_SET_DEFAULT(cond, cap)             \
-    if ((cond) && (se->conn.capable & (cap))) \
-        se->conn.want |= (cap)
-    LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ);
-    LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS);
-    LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA);
-    LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV);
-    LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO);
-    LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR);
-    LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC);
-    LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ);
-    LL_SET_DEFAULT(se->op.getlk && se->op.setlk, FUSE_CAP_POSIX_LOCKS);
-    LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS);
-    LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS);
-    LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir,
-                   FUSE_CAP_READDIRPLUS_AUTO);
-    se->conn.time_gran = 1;
-
-    if (bufsize < FUSE_MIN_READ_BUFFER) {
-        fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n",
-                 bufsize);
-        bufsize = FUSE_MIN_READ_BUFFER;
-    }
-    se->bufsize = bufsize;
-
-    if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) {
-        se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE;
-    }
-
-    se->got_init = 1;
-    se->got_destroy = 0;
-    if (se->op.init) {
-        se->op.init(se->userdata, &se->conn);
-    }
-
-    if (se->conn.want & (~se->conn.capable)) {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: error: filesystem requested capabilities "
-                 "0x%" PRIx64 " that are not supported by kernel, aborting.\n",
-                 se->conn.want & (~se->conn.capable));
-        fuse_reply_err(req, EPROTO);
-        se->error = -EPROTO;
-        fuse_session_exit(se);
-        return;
-    }
-
-    if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) {
-        se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE;
-    }
-    if (flags & FUSE_MAX_PAGES) {
-        outarg.flags |= FUSE_MAX_PAGES;
-        outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1;
-    }
-
-    /*
-     * Always enable big writes, this is superseded
-     * by the max_write option
-     */
-    outarg.flags |= FUSE_BIG_WRITES;
-
-    if (se->conn.want & FUSE_CAP_ASYNC_READ) {
-        outarg.flags |= FUSE_ASYNC_READ;
-    }
-    if (se->conn.want & FUSE_CAP_PARALLEL_DIROPS) {
-        outarg.flags |= FUSE_PARALLEL_DIROPS;
-    }
-    if (se->conn.want & FUSE_CAP_POSIX_LOCKS) {
-        outarg.flags |= FUSE_POSIX_LOCKS;
-    }
-    if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) {
-        outarg.flags |= FUSE_ATOMIC_O_TRUNC;
-    }
-    if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) {
-        outarg.flags |= FUSE_EXPORT_SUPPORT;
-    }
-    if (se->conn.want & FUSE_CAP_DONT_MASK) {
-        outarg.flags |= FUSE_DONT_MASK;
-    }
-    if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) {
-        outarg.flags |= FUSE_FLOCK_LOCKS;
-    }
-    if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) {
-        outarg.flags |= FUSE_AUTO_INVAL_DATA;
-    }
-    if (se->conn.want & FUSE_CAP_READDIRPLUS) {
-        outarg.flags |= FUSE_DO_READDIRPLUS;
-    }
-    if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) {
-        outarg.flags |= FUSE_READDIRPLUS_AUTO;
-    }
-    if (se->conn.want & FUSE_CAP_ASYNC_DIO) {
-        outarg.flags |= FUSE_ASYNC_DIO;
-    }
-    if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) {
-        outarg.flags |= FUSE_WRITEBACK_CACHE;
-    }
-    if (se->conn.want & FUSE_CAP_POSIX_ACL) {
-        outarg.flags |= FUSE_POSIX_ACL;
-    }
-    outarg.max_readahead = se->conn.max_readahead;
-    outarg.max_write = se->conn.max_write;
-    if (se->conn.max_background >= (1 << 16)) {
-        se->conn.max_background = (1 << 16) - 1;
-    }
-    if (se->conn.congestion_threshold > se->conn.max_background) {
-        se->conn.congestion_threshold = se->conn.max_background;
-    }
-    if (!se->conn.congestion_threshold) {
-        se->conn.congestion_threshold = se->conn.max_background * 3 / 4;
-    }
-
-    outarg.max_background = se->conn.max_background;
-    outarg.congestion_threshold = se->conn.congestion_threshold;
-    outarg.time_gran = se->conn.time_gran;
-
-    if (se->conn.want & FUSE_CAP_HANDLE_KILLPRIV_V2) {
-        outarg.flags |= FUSE_HANDLE_KILLPRIV_V2;
-    }
-
-    if (se->conn.want & FUSE_CAP_SETXATTR_EXT) {
-        outarg.flags |= FUSE_SETXATTR_EXT;
-    }
-
-    if (se->conn.want & FUSE_CAP_SECURITY_CTX) {
-        /* bits 32..63 get shifted down 32 bits into the flags2 field */
-        outarg.flags2 |= FUSE_SECURITY_CTX >> 32;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "   INIT: %u.%u\n", outarg.major, outarg.minor);
-    fuse_log(FUSE_LOG_DEBUG, "   flags2=0x%08x flags=0x%08x\n", outarg.flags2,
-             outarg.flags);
-    fuse_log(FUSE_LOG_DEBUG, "   max_readahead=0x%08x\n", outarg.max_readahead);
-    fuse_log(FUSE_LOG_DEBUG, "   max_write=0x%08x\n", outarg.max_write);
-    fuse_log(FUSE_LOG_DEBUG, "   max_background=%i\n", outarg.max_background);
-    fuse_log(FUSE_LOG_DEBUG, "   congestion_threshold=%i\n",
-             outarg.congestion_threshold);
-    fuse_log(FUSE_LOG_DEBUG, "   time_gran=%u\n", outarg.time_gran);
-
-    send_reply_ok(req, &outarg, outargsize);
-}
-
-static void do_destroy(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    struct fuse_session *se = req->se;
-
-    (void)nodeid;
-    (void)iter;
-
-    se->got_destroy = 1;
-    se->got_init = 0;
-    if (se->op.destroy) {
-        se->op.destroy(se->userdata);
-    }
-
-    send_reply_ok(req, NULL, 0);
-}
-
-int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino,
-                               off_t offset, struct fuse_bufvec *bufv)
-{
-    struct fuse_out_header out = {
-        .error = FUSE_NOTIFY_STORE,
-    };
-    struct fuse_notify_store_out outarg = {
-        .nodeid = ino,
-        .offset = offset,
-        .size = fuse_buf_size(bufv),
-    };
-    struct iovec iov[3];
-    int res;
-
-    if (!se) {
-        return -EINVAL;
-    }
-
-    iov[0].iov_base = &out;
-    iov[0].iov_len = sizeof(out);
-    iov[1].iov_base = &outarg;
-    iov[1].iov_len = sizeof(outarg);
-
-    res = fuse_send_data_iov(se, NULL, iov, 2, bufv);
-    if (res > 0) {
-        res = -res;
-    }
-
-    return res;
-}
-
-void *fuse_req_userdata(fuse_req_t req)
-{
-    return req->se->userdata;
-}
-
-const struct fuse_ctx *fuse_req_ctx(fuse_req_t req)
-{
-    return &req->ctx;
-}
-
-void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func,
-                             void *data)
-{
-    pthread_mutex_lock(&req->lock);
-    pthread_mutex_lock(&req->se->lock);
-    req->u.ni.func = func;
-    req->u.ni.data = data;
-    pthread_mutex_unlock(&req->se->lock);
-    if (req->interrupted && func) {
-        func(req, data);
-    }
-    pthread_mutex_unlock(&req->lock);
-}
-
-int fuse_req_interrupted(fuse_req_t req)
-{
-    int interrupted;
-
-    pthread_mutex_lock(&req->se->lock);
-    interrupted = req->interrupted;
-    pthread_mutex_unlock(&req->se->lock);
-
-    return interrupted;
-}
-
-static struct {
-    void (*func)(fuse_req_t, fuse_ino_t, struct fuse_mbuf_iter *);
-    const char *name;
-} fuse_ll_ops[] = {
-    [FUSE_LOOKUP] = { do_lookup, "LOOKUP" },
-    [FUSE_FORGET] = { do_forget, "FORGET" },
-    [FUSE_GETATTR] = { do_getattr, "GETATTR" },
-    [FUSE_SETATTR] = { do_setattr, "SETATTR" },
-    [FUSE_READLINK] = { do_readlink, "READLINK" },
-    [FUSE_SYMLINK] = { do_symlink, "SYMLINK" },
-    [FUSE_MKNOD] = { do_mknod, "MKNOD" },
-    [FUSE_MKDIR] = { do_mkdir, "MKDIR" },
-    [FUSE_UNLINK] = { do_unlink, "UNLINK" },
-    [FUSE_RMDIR] = { do_rmdir, "RMDIR" },
-    [FUSE_RENAME] = { do_rename, "RENAME" },
-    [FUSE_LINK] = { do_link, "LINK" },
-    [FUSE_OPEN] = { do_open, "OPEN" },
-    [FUSE_READ] = { do_read, "READ" },
-    [FUSE_WRITE] = { do_write, "WRITE" },
-    [FUSE_STATFS] = { do_statfs, "STATFS" },
-    [FUSE_RELEASE] = { do_release, "RELEASE" },
-    [FUSE_FSYNC] = { do_fsync, "FSYNC" },
-    [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" },
-    [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" },
-    [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" },
-    [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" },
-    [FUSE_FLUSH] = { do_flush, "FLUSH" },
-    [FUSE_INIT] = { do_init, "INIT" },
-    [FUSE_OPENDIR] = { do_opendir, "OPENDIR" },
-    [FUSE_READDIR] = { do_readdir, "READDIR" },
-    [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" },
-    [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" },
-    [FUSE_GETLK] = { do_getlk, "GETLK" },
-    [FUSE_SETLK] = { do_setlk, "SETLK" },
-    [FUSE_SETLKW] = { do_setlkw, "SETLKW" },
-    [FUSE_ACCESS] = { do_access, "ACCESS" },
-    [FUSE_CREATE] = { do_create, "CREATE" },
-    [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" },
-    [FUSE_BMAP] = { do_bmap, "BMAP" },
-    [FUSE_IOCTL] = { do_ioctl, "IOCTL" },
-    [FUSE_POLL] = { do_poll, "POLL" },
-    [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" },
-    [FUSE_DESTROY] = { do_destroy, "DESTROY" },
-    [FUSE_NOTIFY_REPLY] = { NULL, "NOTIFY_REPLY" },
-    [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" },
-    [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" },
-    [FUSE_RENAME2] = { do_rename2, "RENAME2" },
-    [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" },
-    [FUSE_LSEEK] = { do_lseek, "LSEEK" },
-    [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" },
-};
-
-#define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
-
-static const char *opname(enum fuse_opcode opcode)
-{
-    if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) {
-        return "???";
-    } else {
-        return fuse_ll_ops[opcode].name;
-    }
-}
-
-void fuse_session_process_buf(struct fuse_session *se,
-                              const struct fuse_buf *buf)
-{
-    struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 };
-    fuse_session_process_buf_int(se, &bufv, NULL);
-}
-
-/*
- * Restriction:
- *   bufv is normally a single entry buffer, except for a write
- *   where (if it's in memory) then the bufv may be multiple entries,
- *   where the first entry contains all headers and subsequent entries
- *   contain data
- *   bufv shall not use any offsets etc to make the data anything
- *   other than contiguous starting from 0.
- */
-void fuse_session_process_buf_int(struct fuse_session *se,
-                                  struct fuse_bufvec *bufv,
-                                  struct fuse_chan *ch)
-{
-    const struct fuse_buf *buf = bufv->buf;
-    struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf);
-    struct fuse_in_header *in;
-    struct fuse_req *req;
-    int err;
-
-    /* The first buffer must be a memory buffer */
-    assert(!(buf->flags & FUSE_BUF_IS_FD));
-
-    in = fuse_mbuf_iter_advance(&iter, sizeof(*in));
-    assert(in); /* caller guarantees the input buffer is large enough */
-
-    fuse_log(
-        FUSE_LOG_DEBUG,
-        "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n",
-        (unsigned long long)in->unique, opname((enum fuse_opcode)in->opcode),
-        in->opcode, (unsigned long long)in->nodeid, buf->size, in->pid);
-
-    req = fuse_ll_alloc_req(se);
-    if (req == NULL) {
-        struct fuse_out_header out = {
-            .unique = in->unique,
-            .error = -ENOMEM,
-        };
-        struct iovec iov = {
-            .iov_base = &out,
-            .iov_len = sizeof(struct fuse_out_header),
-        };
-
-        fuse_send_msg(se, ch, &iov, 1);
-        return;
-    }
-
-    req->unique = in->unique;
-    req->ctx.uid = in->uid;
-    req->ctx.gid = in->gid;
-    req->ctx.pid = in->pid;
-    req->ch = ch;
-
-    /*
-     * INIT and DESTROY requests are serialized, all other request types
-     * run in parallel.  This prevents races between FUSE_INIT and ordinary
-     * requests, FUSE_INIT and FUSE_INIT, FUSE_INIT and FUSE_DESTROY, and
-     * FUSE_DESTROY and FUSE_DESTROY.
-     */
-    if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT ||
-        in->opcode == FUSE_DESTROY) {
-        pthread_rwlock_wrlock(&se->init_rwlock);
-    } else {
-        pthread_rwlock_rdlock(&se->init_rwlock);
-    }
-
-    err = EIO;
-    if (!se->got_init) {
-        enum fuse_opcode expected;
-
-        expected = se->cuse_data ? CUSE_INIT : FUSE_INIT;
-        if (in->opcode != expected) {
-            goto reply_err;
-        }
-    } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) {
-        if (fuse_lowlevel_is_virtio(se)) {
-            /*
-             * TODO: This is after a hard reboot typically, we need to do
-             * a destroy, but we can't reply to this request yet so
-             * we can't use do_destroy
-             */
-            fuse_log(FUSE_LOG_DEBUG, "%s: reinit\n", __func__);
-            se->got_destroy = 1;
-            se->got_init = 0;
-            if (se->op.destroy) {
-                se->op.destroy(se->userdata);
-            }
-        } else {
-            goto reply_err;
-        }
-    }
-
-    err = EACCES;
-    /* Implement -o allow_root */
-    if (se->deny_others && in->uid != se->owner && in->uid != 0 &&
-        in->opcode != FUSE_INIT && in->opcode != FUSE_READ &&
-        in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC &&
-        in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR &&
-        in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR &&
-        in->opcode != FUSE_NOTIFY_REPLY && in->opcode != FUSE_READDIRPLUS) {
-        goto reply_err;
-    }
-
-    err = ENOSYS;
-    if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) {
-        goto reply_err;
-    }
-    if (in->opcode != FUSE_INTERRUPT) {
-        struct fuse_req *intr;
-        pthread_mutex_lock(&se->lock);
-        intr = check_interrupt(se, req);
-        list_add_req(req, &se->list);
-        pthread_mutex_unlock(&se->lock);
-        if (intr) {
-            fuse_reply_err(intr, EAGAIN);
-        }
-    }
-
-    if (in->opcode == FUSE_WRITE && se->op.write_buf) {
-        do_write_buf(req, in->nodeid, &iter, bufv);
-    } else {
-        fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter);
-    }
-
-    pthread_rwlock_unlock(&se->init_rwlock);
-    return;
-
-reply_err:
-    fuse_reply_err(req, err);
-    pthread_rwlock_unlock(&se->init_rwlock);
-}
-
-#define LL_OPTION(n, o, v)                     \
-    {                                          \
-        n, offsetof(struct fuse_session, o), v \
-    }
-
-static const struct fuse_opt fuse_ll_opts[] = {
-    LL_OPTION("debug", debug, 1),
-    LL_OPTION("-d", debug, 1),
-    LL_OPTION("--debug", debug, 1),
-    LL_OPTION("allow_root", deny_others, 1),
-    LL_OPTION("--socket-path=%s", vu_socket_path, 0),
-    LL_OPTION("--socket-group=%s", vu_socket_group, 0),
-    LL_OPTION("--fd=%d", vu_listen_fd, 0),
-    LL_OPTION("--thread-pool-size=%d", thread_pool_size, 0),
-    FUSE_OPT_END
-};
-
-void fuse_lowlevel_version(void)
-{
-    printf("using FUSE kernel interface version %i.%i\n", FUSE_KERNEL_VERSION,
-           FUSE_KERNEL_MINOR_VERSION);
-}
-
-void fuse_lowlevel_help(void)
-{
-    /*
-     * These are not all options, but the ones that are
-     * potentially of interest to an end-user
-     */
-    printf(
-        "    -o allow_root              allow access by root\n"
-        "    --socket-path=PATH         path for the vhost-user socket\n"
-        "    --socket-group=GRNAME      name of group for the vhost-user socket\n"
-        "    --fd=FDNUM                 fd number of vhost-user socket\n"
-        "    --thread-pool-size=NUM     thread pool size limit (default %d)\n",
-        THREAD_POOL_SIZE);
-}
-
-void fuse_session_destroy(struct fuse_session *se)
-{
-    if (se->got_init && !se->got_destroy) {
-        if (se->op.destroy) {
-            se->op.destroy(se->userdata);
-        }
-    }
-    pthread_rwlock_destroy(&se->init_rwlock);
-    pthread_mutex_destroy(&se->lock);
-    free(se->cuse_data);
-    if (se->fd != -1) {
-        close(se->fd);
-    }
-
-    if (fuse_lowlevel_is_virtio(se)) {
-        virtio_session_close(se);
-    }
-
-    free(se->vu_socket_path);
-    se->vu_socket_path = NULL;
-
-    g_free(se);
-}
-
-
-struct fuse_session *fuse_session_new(struct fuse_args *args,
-                                      const struct fuse_lowlevel_ops *op,
-                                      size_t op_size, void *userdata)
-{
-    struct fuse_session *se;
-
-    if (sizeof(struct fuse_lowlevel_ops) < op_size) {
-        fuse_log(
-            FUSE_LOG_ERR,
-            "fuse: warning: library too old, some operations may not work\n");
-        op_size = sizeof(struct fuse_lowlevel_ops);
-    }
-
-    if (args->argc == 0) {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: empty argv passed to fuse_session_new().\n");
-        return NULL;
-    }
-
-    se = g_try_new0(struct fuse_session, 1);
-    if (se == NULL) {
-        fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n");
-        goto out1;
-    }
-    se->fd = -1;
-    se->vu_listen_fd = -1;
-    se->thread_pool_size = THREAD_POOL_SIZE;
-    se->conn.max_write = UINT_MAX;
-    se->conn.max_readahead = UINT_MAX;
-
-    /* Parse options */
-    if (fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) {
-        goto out2;
-    }
-    if (args->argc == 1 && args->argv[0][0] == '-') {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: warning: argv[0] looks like an option, but "
-                 "will be ignored\n");
-    } else if (args->argc != 1) {
-        int i;
-        fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `");
-        for (i = 1; i < args->argc - 1; i++) {
-            fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]);
-        }
-        fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]);
-        goto out4;
-    }
-
-    if (!se->vu_socket_path && se->vu_listen_fd < 0) {
-        fuse_log(FUSE_LOG_ERR, "fuse: missing --socket-path or --fd option\n");
-        goto out4;
-    }
-    if (se->vu_socket_path && se->vu_listen_fd >= 0) {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: --socket-path and --fd cannot be given together\n");
-        goto out4;
-    }
-    if (se->vu_socket_group && !se->vu_socket_path) {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: --socket-group can only be used with --socket-path\n");
-        goto out4;
-    }
-
-    se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE;
-
-    list_init_req(&se->list);
-    list_init_req(&se->interrupts);
-    fuse_mutex_init(&se->lock);
-    pthread_rwlock_init(&se->init_rwlock, NULL);
-
-    memcpy(&se->op, op, op_size);
-    se->owner = getuid();
-    se->userdata = userdata;
-
-    return se;
-
-out4:
-    fuse_opt_free_args(args);
-out2:
-    g_free(se);
-out1:
-    return NULL;
-}
-
-int fuse_session_mount(struct fuse_session *se)
-{
-    return virtio_session_mount(se);
-}
-
-int fuse_session_fd(struct fuse_session *se)
-{
-    return se->fd;
-}
-
-void fuse_session_unmount(struct fuse_session *se)
-{
-}
-
-int fuse_lowlevel_is_virtio(struct fuse_session *se)
-{
-    return !!se->virtio_dev;
-}
-
-void fuse_session_exit(struct fuse_session *se)
-{
-    se->exited = 1;
-}
-
-void fuse_session_reset(struct fuse_session *se)
-{
-    se->exited = 0;
-    se->error = 0;
-}
-
-int fuse_session_exited(struct fuse_session *se)
-{
-    return se->exited;
-}
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
deleted file mode 100644 (file)
index b889dae..0000000
+++ /dev/null
@@ -1,1988 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB.
- */
-
-#ifndef FUSE_LOWLEVEL_H_
-#define FUSE_LOWLEVEL_H_
-
-/**
- * @file
- *
- * Low level API
- *
- * IMPORTANT: you should define FUSE_USE_VERSION before including this
- * header.  To use the newest API define it to 31 (recommended for any
- * new application).
- */
-
-#ifndef FUSE_USE_VERSION
-#error FUSE_USE_VERSION not defined
-#endif
-
-#include "fuse_common.h"
-
-#include <sys/statvfs.h>
-#include <sys/uio.h>
-#include <utime.h>
-
-/*
- * Miscellaneous definitions
- */
-
-/** The node ID of the root inode */
-#define FUSE_ROOT_ID 1
-
-/** Inode number type */
-typedef uint64_t fuse_ino_t;
-
-/** Request pointer type */
-typedef struct fuse_req *fuse_req_t;
-
-/**
- * Session
- *
- * This provides hooks for processing requests, and exiting
- */
-struct fuse_session;
-
-/** Directory entry parameters supplied to fuse_reply_entry() */
-struct fuse_entry_param {
-    /**
-     * Unique inode number
-     *
-     * In lookup, zero means negative entry (from version 2.5)
-     * Returning ENOENT also means negative entry, but by setting zero
-     * ino the kernel may cache negative entries for entry_timeout
-     * seconds.
-     */
-    fuse_ino_t ino;
-
-    /**
-     * Generation number for this entry.
-     *
-     * If the file system will be exported over NFS, the
-     * ino/generation pairs need to be unique over the file
-     * system's lifetime (rather than just the mount time). So if
-     * the file system reuses an inode after it has been deleted,
-     * it must assign a new, previously unused generation number
-     * to the inode at the same time.
-     *
-     */
-    uint64_t generation;
-
-    /**
-     * Inode attributes.
-     *
-     * Even if attr_timeout == 0, attr must be correct. For example,
-     * for open(), FUSE uses attr.st_size from lookup() to determine
-     * how many bytes to request. If this value is not correct,
-     * incorrect data will be returned.
-     */
-    struct stat attr;
-
-    /**
-     * Validity timeout (in seconds) for inode attributes. If
-     *  attributes only change as a result of requests that come
-     *  through the kernel, this should be set to a very large
-     *  value.
-     */
-    double attr_timeout;
-
-    /**
-     * Validity timeout (in seconds) for the name. If directory
-     *  entries are changed/deleted only as a result of requests
-     *  that come through the kernel, this should be set to a very
-     *  large value.
-     */
-    double entry_timeout;
-
-    /**
-     * Flags for fuse_attr.flags that do not fit into attr.
-     */
-    uint32_t attr_flags;
-};
-
-/**
- * Additional context associated with requests.
- *
- * Note that the reported client uid, gid and pid may be zero in some
- * situations. For example, if the FUSE file system is running in a
- * PID or user namespace but then accessed from outside the namespace,
- * there is no valid uid/pid/gid that could be reported.
- */
-struct fuse_ctx {
-    /** User ID of the calling process */
-    uid_t uid;
-
-    /** Group ID of the calling process */
-    gid_t gid;
-
-    /** Thread ID of the calling process */
-    pid_t pid;
-
-    /** Umask of the calling process */
-    mode_t umask;
-};
-
-struct fuse_forget_data {
-    fuse_ino_t ino;
-    uint64_t nlookup;
-};
-
-/* 'to_set' flags in setattr */
-#define FUSE_SET_ATTR_MODE (1 << 0)
-#define FUSE_SET_ATTR_UID (1 << 1)
-#define FUSE_SET_ATTR_GID (1 << 2)
-#define FUSE_SET_ATTR_SIZE (1 << 3)
-#define FUSE_SET_ATTR_ATIME (1 << 4)
-#define FUSE_SET_ATTR_MTIME (1 << 5)
-#define FUSE_SET_ATTR_ATIME_NOW (1 << 7)
-#define FUSE_SET_ATTR_MTIME_NOW (1 << 8)
-#define FUSE_SET_ATTR_CTIME (1 << 10)
-#define FUSE_SET_ATTR_KILL_SUIDGID (1 << 11)
-
-/*
- * Request methods and replies
- */
-
-/**
- * Low level filesystem operations
- *
- * Most of the methods (with the exception of init and destroy)
- * receive a request handle (fuse_req_t) as their first argument.
- * This handle must be passed to one of the specified reply functions.
- *
- * This may be done inside the method invocation, or after the call
- * has returned.  The request handle is valid until one of the reply
- * functions is called.
- *
- * Other pointer arguments (name, fuse_file_info, etc) are not valid
- * after the call has returned, so if they are needed later, their
- * contents have to be copied.
- *
- * In general, all methods are expected to perform any necessary
- * permission checking. However, a filesystem may delegate this task
- * to the kernel by passing the `default_permissions` mount option to
- * `fuse_session_new()`. In this case, methods will only be called if
- * the kernel's permission check has succeeded.
- *
- * The filesystem sometimes needs to handle a return value of -ENOENT
- * from the reply function, which means, that the request was
- * interrupted, and the reply discarded.  For example if
- * fuse_reply_open() return -ENOENT means, that the release method for
- * this file will not be called.
- */
-struct fuse_lowlevel_ops {
-    /**
-     * Initialize filesystem
-     *
-     * This function is called when libfuse establishes
-     * communication with the FUSE kernel module. The file system
-     * should use this module to inspect and/or modify the
-     * connection parameters provided in the `conn` structure.
-     *
-     * Note that some parameters may be overwritten by options
-     * passed to fuse_session_new() which take precedence over the
-     * values set in this handler.
-     *
-     * There's no reply to this function
-     *
-     * @param userdata the user data passed to fuse_session_new()
-     */
-    void (*init)(void *userdata, struct fuse_conn_info *conn);
-
-    /**
-     * Clean up filesystem.
-     *
-     * Called on filesystem exit. When this method is called, the
-     * connection to the kernel may be gone already, so that eg. calls
-     * to fuse_lowlevel_notify_* will fail.
-     *
-     * There's no reply to this function
-     *
-     * @param userdata the user data passed to fuse_session_new()
-     */
-    void (*destroy)(void *userdata);
-
-    /**
-     * Look up a directory entry by name and get its attributes.
-     *
-     * Valid replies:
-     *   fuse_reply_entry
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the parent directory
-     * @param name the name to look up
-     */
-    void (*lookup)(fuse_req_t req, fuse_ino_t parent, const char *name);
-
-    /**
-     * Forget about an inode
-     *
-     * This function is called when the kernel removes an inode
-     * from its internal caches.
-     *
-     * The inode's lookup count increases by one for every call to
-     * fuse_reply_entry and fuse_reply_create. The nlookup parameter
-     * indicates by how much the lookup count should be decreased.
-     *
-     * Inodes with a non-zero lookup count may receive request from
-     * the kernel even after calls to unlink, rmdir or (when
-     * overwriting an existing file) rename. Filesystems must handle
-     * such requests properly and it is recommended to defer removal
-     * of the inode until the lookup count reaches zero. Calls to
-     * unlink, rmdir or rename will be followed closely by forget
-     * unless the file or directory is open, in which case the
-     * kernel issues forget only after the release or releasedir
-     * calls.
-     *
-     * Note that if a file system will be exported over NFS the
-     * inodes lifetime must extend even beyond forget. See the
-     * generation field in struct fuse_entry_param above.
-     *
-     * On unmount the lookup count for all inodes implicitly drops
-     * to zero. It is not guaranteed that the file system will
-     * receive corresponding forget messages for the affected
-     * inodes.
-     *
-     * Valid replies:
-     *   fuse_reply_none
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param nlookup the number of lookups to forget
-     */
-    void (*forget)(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup);
-
-    /**
-     * Get file attributes.
-     *
-     * If writeback caching is enabled, the kernel may have a
-     * better idea of a file's length than the FUSE file system
-     * (eg if there has been a write that extended the file size,
-     * but that has not yet been passed to the filesystem.n
-     *
-     * In this case, the st_size value provided by the file system
-     * will be ignored.
-     *
-     * Valid replies:
-     *   fuse_reply_attr
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi for future use, currently always NULL
-     */
-    void (*getattr)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
-
-    /**
-     * Set file attributes
-     *
-     * In the 'attr' argument only members indicated by the 'to_set'
-     * bitmask contain valid values.  Other members contain undefined
-     * values.
-     *
-     * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is
-     * expected to reset the setuid and setgid bits if the file
-     * size or owner is being changed.
-     *
-     * If the setattr was invoked from the ftruncate() system call
-     * under Linux kernel versions 2.6.15 or later, the fi->fh will
-     * contain the value set by the open method or will be undefined
-     * if the open method didn't set any value.  Otherwise (not
-     * ftruncate call, or kernel version earlier than 2.6.15) the fi
-     * parameter will be NULL.
-     *
-     * Valid replies:
-     *   fuse_reply_attr
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param attr the attributes
-     * @param to_set bit mask of attributes which should be set
-     * @param fi file information, or NULL
-     */
-    void (*setattr)(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
-                    int to_set, struct fuse_file_info *fi);
-
-    /**
-     * Read symbolic link
-     *
-     * Valid replies:
-     *   fuse_reply_readlink
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     */
-    void (*readlink)(fuse_req_t req, fuse_ino_t ino);
-
-    /**
-     * Create file node
-     *
-     * Create a regular file, character device, block device, fifo or
-     * socket node.
-     *
-     * Valid replies:
-     *   fuse_reply_entry
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the parent directory
-     * @param name to create
-     * @param mode file type and mode with which to create the new file
-     * @param rdev the device number (only valid if created file is a device)
-     */
-    void (*mknod)(fuse_req_t req, fuse_ino_t parent, const char *name,
-                  mode_t mode, dev_t rdev);
-
-    /**
-     * Create a directory
-     *
-     * Valid replies:
-     *   fuse_reply_entry
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the parent directory
-     * @param name to create
-     * @param mode with which to create the new file
-     */
-    void (*mkdir)(fuse_req_t req, fuse_ino_t parent, const char *name,
-                  mode_t mode);
-
-    /**
-     * Remove a file
-     *
-     * If the file's inode's lookup count is non-zero, the file
-     * system is expected to postpone any removal of the inode
-     * until the lookup count reaches zero (see description of the
-     * forget function).
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the parent directory
-     * @param name to remove
-     */
-    void (*unlink)(fuse_req_t req, fuse_ino_t parent, const char *name);
-
-    /**
-     * Remove a directory
-     *
-     * If the directory's inode's lookup count is non-zero, the
-     * file system is expected to postpone any removal of the
-     * inode until the lookup count reaches zero (see description
-     * of the forget function).
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the parent directory
-     * @param name to remove
-     */
-    void (*rmdir)(fuse_req_t req, fuse_ino_t parent, const char *name);
-
-    /**
-     * Create a symbolic link
-     *
-     * Valid replies:
-     *   fuse_reply_entry
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param link the contents of the symbolic link
-     * @param parent inode number of the parent directory
-     * @param name to create
-     */
-    void (*symlink)(fuse_req_t req, const char *link, fuse_ino_t parent,
-                    const char *name);
-
-    /**
-     * Rename a file
-     *
-     * If the target exists it should be atomically replaced. If
-     * the target's inode's lookup count is non-zero, the file
-     * system is expected to postpone any removal of the inode
-     * until the lookup count reaches zero (see description of the
-     * forget function).
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EINVAL, i.e. all
-     * future bmap requests will fail with EINVAL without being
-     * send to the filesystem process.
-     *
-     * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If
-     * RENAME_NOREPLACE is specified, the filesystem must not
-     * overwrite *newname* if it exists and return an error
-     * instead. If `RENAME_EXCHANGE` is specified, the filesystem
-     * must atomically exchange the two files, i.e. both must
-     * exist and neither may be deleted.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the old parent directory
-     * @param name old name
-     * @param newparent inode number of the new parent directory
-     * @param newname new name
-     */
-    void (*rename)(fuse_req_t req, fuse_ino_t parent, const char *name,
-                   fuse_ino_t newparent, const char *newname,
-                   unsigned int flags);
-
-    /**
-     * Create a hard link
-     *
-     * Valid replies:
-     *   fuse_reply_entry
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the old inode number
-     * @param newparent inode number of the new parent directory
-     * @param newname new name to create
-     */
-    void (*link)(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent,
-                 const char *newname);
-
-    /**
-     * Open a file
-     *
-     * Open flags are available in fi->flags. The following rules
-     * apply.
-     *
-     *  - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be
-     *    filtered out / handled by the kernel.
-     *
-     *  - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used
-     *    by the filesystem to check if the operation is
-     *    permitted.  If the ``-o default_permissions`` mount
-     *    option is given, this check is already done by the
-     *    kernel before calling open() and may thus be omitted by
-     *    the filesystem.
-     *
-     *  - When writeback caching is enabled, the kernel may send
-     *    read requests even for files opened with O_WRONLY. The
-     *    filesystem should be prepared to handle this.
-     *
-     *  - When writeback caching is disabled, the filesystem is
-     *    expected to properly handle the O_APPEND flag and ensure
-     *    that each write is appending to the end of the file.
-     *
-     *  - When writeback caching is enabled, the kernel will
-     *    handle O_APPEND. However, unless all changes to the file
-     *    come through the kernel this will not work reliably. The
-     *    filesystem should thus either ignore the O_APPEND flag
-     *    (and let the kernel handle it), or return an error
-     *    (indicating that reliably O_APPEND is not available).
-     *
-     * Filesystem may store an arbitrary file handle (pointer,
-     * index, etc) in fi->fh, and use this in other all other file
-     * operations (read, write, flush, release, fsync).
-     *
-     * Filesystem may also implement stateless file I/O and not store
-     * anything in fi->fh.
-     *
-     * There are also some flags (direct_io, keep_cache) which the
-     * filesystem may set in fi, to change the way the file is opened.
-     * See fuse_file_info structure in <fuse_common.h> for more details.
-     *
-     * If this request is answered with an error code of ENOSYS
-     * and FUSE_CAP_NO_OPEN_SUPPORT is set in
-     * `fuse_conn_info.capable`, this is treated as success and
-     * future calls to open and release will also succeed without being
-     * sent to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_open
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     */
-    void (*open)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
-
-    /**
-     * Read data
-     *
-     * Read should send exactly the number of bytes requested except
-     * on EOF or error, otherwise the rest of the data will be
-     * substituted with zeroes.  An exception to this is when the file
-     * has been opened in 'direct_io' mode, in which case the return
-     * value of the read system call will reflect the return value of
-     * this operation.
-     *
-     * fi->fh will contain the value set by the open method, or will
-     * be undefined if the open method didn't set any value.
-     *
-     * Valid replies:
-     *   fuse_reply_buf
-     *   fuse_reply_iov
-     *   fuse_reply_data
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param size number of bytes to read
-     * @param off offset to read from
-     * @param fi file information
-     */
-    void (*read)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
-                 struct fuse_file_info *fi);
-
-    /**
-     * Write data
-     *
-     * Write should return exactly the number of bytes requested
-     * except on error.  An exception to this is when the file has
-     * been opened in 'direct_io' mode, in which case the return value
-     * of the write system call will reflect the return value of this
-     * operation.
-     *
-     * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is
-     * expected to reset the setuid and setgid bits.
-     *
-     * fi->fh will contain the value set by the open method, or will
-     * be undefined if the open method didn't set any value.
-     *
-     * Valid replies:
-     *   fuse_reply_write
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param buf data to write
-     * @param size number of bytes to write
-     * @param off offset to write to
-     * @param fi file information
-     */
-    void (*write)(fuse_req_t req, fuse_ino_t ino, const char *buf, size_t size,
-                  off_t off, struct fuse_file_info *fi);
-
-    /**
-     * Flush method
-     *
-     * This is called on each close() of the opened file.
-     *
-     * Since file descriptors can be duplicated (dup, dup2, fork), for
-     * one open call there may be many flush calls.
-     *
-     * Filesystems shouldn't assume that flush will always be called
-     * after some writes, or that if will be called at all.
-     *
-     * fi->fh will contain the value set by the open method, or will
-     * be undefined if the open method didn't set any value.
-     *
-     * NOTE: the name of the method is misleading, since (unlike
-     * fsync) the filesystem is not forced to flush pending writes.
-     * One reason to flush data is if the filesystem wants to return
-     * write errors during close.  However, such use is non-portable
-     * because POSIX does not require [close] to wait for delayed I/O to
-     * complete.
-     *
-     * If the filesystem supports file locking operations (setlk,
-     * getlk) it should remove all locks belonging to 'fi->owner'.
-     *
-     * If this request is answered with an error code of ENOSYS,
-     * this is treated as success and future calls to flush() will
-     * succeed automatically without being send to the filesystem
-     * process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     *
-     * [close]:
-     * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html
-     */
-    void (*flush)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
-
-    /**
-     * Release an open file
-     *
-     * Release is called when there are no more references to an open
-     * file: all file descriptors are closed and all memory mappings
-     * are unmapped.
-     *
-     * For every open call there will be exactly one release call (unless
-     * the filesystem is force-unmounted).
-     *
-     * The filesystem may reply with an error, but error values are
-     * not returned to close() or munmap() which triggered the
-     * release.
-     *
-     * fi->fh will contain the value set by the open method, or will
-     * be undefined if the open method didn't set any value.
-     * fi->flags will contain the same flags as for open.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     */
-    void (*release)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
-
-    /**
-     * Synchronize file contents
-     *
-     * If the datasync parameter is non-zero, then only the user data
-     * should be flushed, not the meta data.
-     *
-     * If this request is answered with an error code of ENOSYS,
-     * this is treated as success and future calls to fsync() will
-     * succeed automatically without being send to the filesystem
-     * process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param datasync flag indicating if only data should be flushed
-     * @param fi file information
-     */
-    void (*fsync)(fuse_req_t req, fuse_ino_t ino, int datasync,
-                  struct fuse_file_info *fi);
-
-    /**
-     * Open a directory
-     *
-     * Filesystem may store an arbitrary file handle (pointer, index,
-     * etc) in fi->fh, and use this in other all other directory
-     * stream operations (readdir, releasedir, fsyncdir).
-     *
-     * If this request is answered with an error code of ENOSYS and
-     * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`,
-     * this is treated as success and future calls to opendir and
-     * releasedir will also succeed without being sent to the filesystem
-     * process. In addition, the kernel will cache readdir results
-     * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR.
-     *
-     * Valid replies:
-     *   fuse_reply_open
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     */
-    void (*opendir)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
-
-    /**
-     * Read directory
-     *
-     * Send a buffer filled using fuse_add_direntry(), with size not
-     * exceeding the requested size.  Send an empty buffer on end of
-     * stream.
-     *
-     * fi->fh will contain the value set by the opendir method, or
-     * will be undefined if the opendir method didn't set any value.
-     *
-     * Returning a directory entry from readdir() does not affect
-     * its lookup count.
-     *
-     * If off_t is non-zero, then it will correspond to one of the off_t
-     * values that was previously returned by readdir() for the same
-     * directory handle. In this case, readdir() should skip over entries
-     * coming before the position defined by the off_t value. If entries
-     * are added or removed while the directory handle is open, they filesystem
-     * may still include the entries that have been removed, and may not
-     * report the entries that have been created. However, addition or
-     * removal of entries must never cause readdir() to skip over unrelated
-     * entries or to report them more than once. This means
-     * that off_t can not be a simple index that enumerates the entries
-     * that have been returned but must contain sufficient information to
-     * uniquely determine the next directory entry to return even when the
-     * set of entries is changing.
-     *
-     * The function does not have to report the '.' and '..'
-     * entries, but is allowed to do so. Note that, if readdir does
-     * not return '.' or '..', they will not be implicitly returned,
-     * and this behavior is observable by the caller.
-     *
-     * Valid replies:
-     *   fuse_reply_buf
-     *   fuse_reply_data
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param size maximum number of bytes to send
-     * @param off offset to continue reading the directory stream
-     * @param fi file information
-     */
-    void (*readdir)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
-                    struct fuse_file_info *fi);
-
-    /**
-     * Release an open directory
-     *
-     * For every opendir call there will be exactly one releasedir
-     * call (unless the filesystem is force-unmounted).
-     *
-     * fi->fh will contain the value set by the opendir method, or
-     * will be undefined if the opendir method didn't set any value.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     */
-    void (*releasedir)(fuse_req_t req, fuse_ino_t ino,
-                       struct fuse_file_info *fi);
-
-    /**
-     * Synchronize directory contents
-     *
-     * If the datasync parameter is non-zero, then only the directory
-     * contents should be flushed, not the meta data.
-     *
-     * fi->fh will contain the value set by the opendir method, or
-     * will be undefined if the opendir method didn't set any value.
-     *
-     * If this request is answered with an error code of ENOSYS,
-     * this is treated as success and future calls to fsyncdir() will
-     * succeed automatically without being send to the filesystem
-     * process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param datasync flag indicating if only data should be flushed
-     * @param fi file information
-     */
-    void (*fsyncdir)(fuse_req_t req, fuse_ino_t ino, int datasync,
-                     struct fuse_file_info *fi);
-
-    /**
-     * Get file system statistics
-     *
-     * Valid replies:
-     *   fuse_reply_statfs
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number, zero means "undefined"
-     */
-    void (*statfs)(fuse_req_t req, fuse_ino_t ino);
-
-    /**
-     * Set an extended attribute
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
-     * future setxattr() requests will fail with EOPNOTSUPP without being
-     * send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     */
-    void (*setxattr)(fuse_req_t req, fuse_ino_t ino, const char *name,
-                     const char *value, size_t size, int flags,
-                     uint32_t setxattr_flags);
-
-    /**
-     * Get an extended attribute
-     *
-     * If size is zero, the size of the value should be sent with
-     * fuse_reply_xattr.
-     *
-     * If the size is non-zero, and the value fits in the buffer, the
-     * value should be sent with fuse_reply_buf.
-     *
-     * If the size is too small for the value, the ERANGE error should
-     * be sent.
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
-     * future getxattr() requests will fail with EOPNOTSUPP without being
-     * send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_buf
-     *   fuse_reply_data
-     *   fuse_reply_xattr
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param name of the extended attribute
-     * @param size maximum size of the value to send
-     */
-    void (*getxattr)(fuse_req_t req, fuse_ino_t ino, const char *name,
-                     size_t size);
-
-    /**
-     * List extended attribute names
-     *
-     * If size is zero, the total size of the attribute list should be
-     * sent with fuse_reply_xattr.
-     *
-     * If the size is non-zero, and the null character separated
-     * attribute list fits in the buffer, the list should be sent with
-     * fuse_reply_buf.
-     *
-     * If the size is too small for the list, the ERANGE error should
-     * be sent.
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
-     * future listxattr() requests will fail with EOPNOTSUPP without being
-     * send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_buf
-     *   fuse_reply_data
-     *   fuse_reply_xattr
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param size maximum size of the list to send
-     */
-    void (*listxattr)(fuse_req_t req, fuse_ino_t ino, size_t size);
-
-    /**
-     * Remove an extended attribute
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
-     * future removexattr() requests will fail with EOPNOTSUPP without being
-     * send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param name of the extended attribute
-     */
-    void (*removexattr)(fuse_req_t req, fuse_ino_t ino, const char *name);
-
-    /**
-     * Check file access permissions
-     *
-     * This will be called for the access() and chdir() system
-     * calls.  If the 'default_permissions' mount option is given,
-     * this method is not called.
-     *
-     * This method is not called under Linux kernel versions 2.4.x
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent success, i.e. this and all future access()
-     * requests will succeed without being send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param mask requested access mode
-     */
-    void (*access)(fuse_req_t req, fuse_ino_t ino, int mask);
-
-    /**
-     * Create and open a file
-     *
-     * If the file does not exist, first create it with the specified
-     * mode, and then open it.
-     *
-     * See the description of the open handler for more
-     * information.
-     *
-     * If this method is not implemented or under Linux kernel
-     * versions earlier than 2.6.15, the mknod() and open() methods
-     * will be called instead.
-     *
-     * If this request is answered with an error code of ENOSYS, the handler
-     * is treated as not implemented (i.e., for this and future requests the
-     * mknod() and open() handlers will be called instead).
-     *
-     * Valid replies:
-     *   fuse_reply_create
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the parent directory
-     * @param name to create
-     * @param mode file type and mode with which to create the new file
-     * @param fi file information
-     */
-    void (*create)(fuse_req_t req, fuse_ino_t parent, const char *name,
-                   mode_t mode, struct fuse_file_info *fi);
-
-    /**
-     * Test for a POSIX file lock
-     *
-     * Valid replies:
-     *   fuse_reply_lock
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     * @param lock the region/type to test
-     */
-    void (*getlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                  struct flock *lock);
-
-    /**
-     * Acquire, modify or release a POSIX file lock
-     *
-     * For POSIX threads (NPTL) there's a 1-1 relation between pid and
-     * owner, but otherwise this is not always the case.  For checking
-     * lock ownership, 'fi->owner' must be used.  The l_pid field in
-     * 'struct flock' should only be used to fill in this field in
-     * getlk().
-     *
-     * Note: if the locking methods are not implemented, the kernel
-     * will still allow file locking to work locally.  Hence these are
-     * only interesting for network filesystems and similar.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     * @param lock the region/type to set
-     * @param sleep locking operation may sleep
-     */
-    void (*setlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                  struct flock *lock, int sleep);
-
-    /**
-     * Map block index within file to block index within device
-     *
-     * Note: This makes sense only for block device backed filesystems
-     * mounted with the 'blkdev' option
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure, i.e. all future bmap() requests will
-     * fail with the same error code without being send to the filesystem
-     * process.
-     *
-     * Valid replies:
-     *   fuse_reply_bmap
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param blocksize unit of block index
-     * @param idx block index within file
-     */
-    void (*bmap)(fuse_req_t req, fuse_ino_t ino, size_t blocksize,
-                 uint64_t idx);
-
-    /**
-     * Ioctl
-     *
-     * Note: For unrestricted ioctls (not allowed for FUSE
-     * servers), data in and out areas can be discovered by giving
-     * iovs and setting FUSE_IOCTL_RETRY in *flags*.  For
-     * restricted ioctls, kernel prepares in/out data area
-     * according to the information encoded in cmd.
-     *
-     * Valid replies:
-     *   fuse_reply_ioctl_retry
-     *   fuse_reply_ioctl
-     *   fuse_reply_ioctl_iov
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param cmd ioctl command
-     * @param arg ioctl argument
-     * @param fi file information
-     * @param flags for FUSE_IOCTL_* flags
-     * @param in_buf data fetched from the caller
-     * @param in_bufsz number of fetched bytes
-     * @param out_bufsz maximum size of output data
-     *
-     * Note : the unsigned long request submitted by the application
-     * is truncated to 32 bits.
-     */
-    void (*ioctl)(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg,
-                  struct fuse_file_info *fi, unsigned flags, const void *in_buf,
-                  size_t in_bufsz, size_t out_bufsz);
-
-    /**
-     * Poll for IO readiness
-     *
-     * Note: If ph is non-NULL, the client should notify
-     * when IO readiness events occur by calling
-     * fuse_lowlevel_notify_poll() with the specified ph.
-     *
-     * Regardless of the number of times poll with a non-NULL ph
-     * is received, single notification is enough to clear all.
-     * Notifying more times incurs overhead but doesn't harm
-     * correctness.
-     *
-     * The callee is responsible for destroying ph with
-     * fuse_pollhandle_destroy() when no longer in use.
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as success (with a kernel-defined default poll-mask) and
-     * future calls to pull() will succeed the same way without being send
-     * to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_poll
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     * @param ph poll handle to be used for notification
-     */
-    void (*poll)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                 struct fuse_pollhandle *ph);
-
-    /**
-     * Write data made available in a buffer
-     *
-     * This is a more generic version of the ->write() method.  If
-     * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the
-     * kernel supports splicing from the fuse device, then the
-     * data will be made available in pipe for supporting zero
-     * copy data transfer.
-     *
-     * buf->count is guaranteed to be one (and thus buf->idx is
-     * always zero). The write_buf handler must ensure that
-     * bufv->off is correctly updated (reflecting the number of
-     * bytes read from bufv->buf[0]).
-     *
-     * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is
-     * expected to reset the setuid and setgid bits.
-     *
-     * Valid replies:
-     *   fuse_reply_write
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param bufv buffer containing the data
-     * @param off offset to write to
-     * @param fi file information
-     */
-    void (*write_buf)(fuse_req_t req, fuse_ino_t ino, struct fuse_bufvec *bufv,
-                      off_t off, struct fuse_file_info *fi);
-
-    /**
-     * Forget about multiple inodes
-     *
-     * See description of the forget function for more
-     * information.
-     *
-     * Valid replies:
-     *   fuse_reply_none
-     *
-     * @param req request handle
-     */
-    void (*forget_multi)(fuse_req_t req, size_t count,
-                         struct fuse_forget_data *forgets);
-
-    /**
-     * Acquire, modify or release a BSD file lock
-     *
-     * Note: if the locking methods are not implemented, the kernel
-     * will still allow file locking to work locally.  Hence these are
-     * only interesting for network filesystems and similar.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     * @param op the locking operation, see flock(2)
-     */
-    void (*flock)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                  int op);
-
-    /**
-     * Allocate requested space. If this function returns success then
-     * subsequent writes to the specified range shall not fail due to the lack
-     * of free space on the file system storage media.
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
-     * future fallocate() requests will fail with EOPNOTSUPP without being
-     * send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param offset starting point for allocated region
-     * @param length size of allocated region
-     * @param mode determines the operation to be performed on the given range,
-     *             see fallocate(2)
-     */
-    void (*fallocate)(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset,
-                      off_t length, struct fuse_file_info *fi);
-
-    /**
-     * Read directory with attributes
-     *
-     * Send a buffer filled using fuse_add_direntry_plus(), with size not
-     * exceeding the requested size.  Send an empty buffer on end of
-     * stream.
-     *
-     * fi->fh will contain the value set by the opendir method, or
-     * will be undefined if the opendir method didn't set any value.
-     *
-     * In contrast to readdir() (which does not affect the lookup counts),
-     * the lookup count of every entry returned by readdirplus(), except "."
-     * and "..", is incremented by one.
-     *
-     * Valid replies:
-     *   fuse_reply_buf
-     *   fuse_reply_data
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param size maximum number of bytes to send
-     * @param off offset to continue reading the directory stream
-     * @param fi file information
-     */
-    void (*readdirplus)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
-                        struct fuse_file_info *fi);
-
-    /**
-     * Copy a range of data from one file to another
-     *
-     * Performs an optimized copy between two file descriptors without the
-     * additional cost of transferring data through the FUSE kernel module
-     * to user space (glibc) and then back into the FUSE filesystem again.
-     *
-     * In case this method is not implemented, glibc falls back to reading
-     * data from the source and writing to the destination. Effectively
-     * doing an inefficient copy of the data.
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
-     * future copy_file_range() requests will fail with EOPNOTSUPP without
-     * being send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_write
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino_in the inode number or the source file
-     * @param off_in starting point from were the data should be read
-     * @param fi_in file information of the source file
-     * @param ino_out the inode number or the destination file
-     * @param off_out starting point where the data should be written
-     * @param fi_out file information of the destination file
-     * @param len maximum size of the data to copy
-     * @param flags passed along with the copy_file_range() syscall
-     */
-    void (*copy_file_range)(fuse_req_t req, fuse_ino_t ino_in, off_t off_in,
-                            struct fuse_file_info *fi_in, fuse_ino_t ino_out,
-                            off_t off_out, struct fuse_file_info *fi_out,
-                            size_t len, int flags);
-
-    /**
-     * Find next data or hole after the specified offset
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure, i.e. all future lseek() requests will
-     * fail with the same error code without being send to the filesystem
-     * process.
-     *
-     * Valid replies:
-     *   fuse_reply_lseek
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param off offset to start search from
-     * @param whence either SEEK_DATA or SEEK_HOLE
-     * @param fi file information
-     */
-    void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
-                  struct fuse_file_info *fi);
-
-    /**
-     * Synchronize file system content
-     *
-     * If this request is answered with an error code of ENOSYS,
-     * this is treated as success and future calls to syncfs() will
-     * succeed automatically without being sent to the filesystem
-     * process.
-     *
-     * @param req request handle
-     * @param ino the inode number
-     */
-    void (*syncfs)(fuse_req_t req, fuse_ino_t ino);
-};
-
-/**
- * Reply with an error code or success.
- *
- * Possible requests:
- *   all except forget
- *
- * Whereever possible, error codes should be chosen from the list of
- * documented error conditions in the corresponding system calls
- * manpage.
- *
- * An error code of ENOSYS is sometimes treated specially. This is
- * indicated in the documentation of the affected handler functions.
- *
- * The following requests may be answered with a zero error code:
- * unlink, rmdir, rename, flush, release, fsync, fsyncdir, setxattr,
- * removexattr, setlk.
- *
- * @param req request handle
- * @param err the positive error value, or zero for success
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_err(fuse_req_t req, int err);
-
-/**
- * Don't send reply
- *
- * Possible requests:
- *   forget
- *   forget_multi
- *   retrieve_reply
- *
- * @param req request handle
- */
-void fuse_reply_none(fuse_req_t req);
-
-/**
- * Reply with a directory entry
- *
- * Possible requests:
- *   lookup, mknod, mkdir, symlink, link
- *
- * Side effects:
- *   increments the lookup count on success
- *
- * @param req request handle
- * @param e the entry parameters
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e);
-
-/**
- * Reply with a directory entry and open parameters
- *
- * currently the following members of 'fi' are used:
- *   fh, direct_io, keep_cache
- *
- * Possible requests:
- *   create
- *
- * Side effects:
- *   increments the lookup count on success
- *
- * @param req request handle
- * @param e the entry parameters
- * @param fi file information
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e,
-                      const struct fuse_file_info *fi);
-
-/**
- * Reply with attributes
- *
- * Possible requests:
- *   getattr, setattr
- *
- * @param req request handle
- * @param attr the attributes
- * @param attr_timeout validity timeout (in seconds) for the attributes
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_attr(fuse_req_t req, const struct stat *attr,
-                    double attr_timeout);
-
-/**
- * Reply with the contents of a symbolic link
- *
- * Possible requests:
- *   readlink
- *
- * @param req request handle
- * @param link symbolic link contents
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_readlink(fuse_req_t req, const char *link);
-
-/**
- * Reply with open parameters
- *
- * currently the following members of 'fi' are used:
- *   fh, direct_io, keep_cache
- *
- * Possible requests:
- *   open, opendir
- *
- * @param req request handle
- * @param fi file information
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *fi);
-
-/**
- * Reply with number of bytes written
- *
- * Possible requests:
- *   write
- *
- * @param req request handle
- * @param count the number of bytes written
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_write(fuse_req_t req, size_t count);
-
-/**
- * Reply with data
- *
- * Possible requests:
- *   read, readdir, getxattr, listxattr
- *
- * @param req request handle
- * @param buf buffer containing data
- * @param size the size of data in bytes
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size);
-
-/**
- * Reply with data copied/moved from buffer(s)
- *
- * Possible requests:
- *   read, readdir, getxattr, listxattr
- *
- * Side effects:
- *   when used to return data from a readdirplus() (but not readdir())
- *   call, increments the lookup count of each returned entry by one
- *   on success.
- *
- * @param req request handle
- * @param bufv buffer vector
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv);
-
-/**
- * Reply with data vector
- *
- * Possible requests:
- *   read, readdir, getxattr, listxattr
- *
- * @param req request handle
- * @param iov the vector containing the data
- * @param count the size of vector
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count);
-
-/**
- * Reply with filesystem statistics
- *
- * Possible requests:
- *   statfs
- *
- * @param req request handle
- * @param stbuf filesystem statistics
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf);
-
-/**
- * Reply with needed buffer size
- *
- * Possible requests:
- *   getxattr, listxattr
- *
- * @param req request handle
- * @param count the buffer size needed in bytes
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_xattr(fuse_req_t req, size_t count);
-
-/**
- * Reply with file lock information
- *
- * Possible requests:
- *   getlk
- *
- * @param req request handle
- * @param lock the lock information
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_lock(fuse_req_t req, const struct flock *lock);
-
-/**
- * Reply with block index
- *
- * Possible requests:
- *   bmap
- *
- * @param req request handle
- * @param idx block index within device
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_bmap(fuse_req_t req, uint64_t idx);
-
-/*
- * Filling a buffer in readdir
- */
-
-/**
- * Add a directory entry to the buffer
- *
- * Buffer needs to be large enough to hold the entry.  If it's not,
- * then the entry is not filled in but the size of the entry is still
- * returned.  The caller can check this by comparing the bufsize
- * parameter with the returned entry size.  If the entry size is
- * larger than the buffer size, the operation failed.
- *
- * From the 'stbuf' argument the st_ino field and bits 12-15 of the
- * st_mode field are used.  The other fields are ignored.
- *
- * *off* should be any non-zero value that the filesystem can use to
- * identify the current point in the directory stream. It does not
- * need to be the actual physical position. A value of zero is
- * reserved to mean "from the beginning", and should therefore never
- * be used (the first call to fuse_add_direntry should be passed the
- * offset of the second directory entry).
- *
- * @param req request handle
- * @param buf the point where the new entry will be added to the buffer
- * @param bufsize remaining size of the buffer
- * @param name the name of the entry
- * @param stbuf the file attributes
- * @param off the offset of the next entry
- * @return the space needed for the entry
- */
-size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize,
-                         const char *name, const struct stat *stbuf, off_t off);
-
-/**
- * Add a directory entry to the buffer with the attributes
- *
- * See documentation of `fuse_add_direntry()` for more details.
- *
- * @param req request handle
- * @param buf the point where the new entry will be added to the buffer
- * @param bufsize remaining size of the buffer
- * @param name the name of the entry
- * @param e the directory entry
- * @param off the offset of the next entry
- * @return the space needed for the entry
- */
-size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize,
-                              const char *name,
-                              const struct fuse_entry_param *e, off_t off);
-
-/**
- * Reply to ask for data fetch and output buffer preparation.  ioctl
- * will be retried with the specified input data fetched and output
- * buffer prepared.
- *
- * Possible requests:
- *   ioctl
- *
- * @param req request handle
- * @param in_iov iovec specifying data to fetch from the caller
- * @param in_count number of entries in in_iov
- * @param out_iov iovec specifying addresses to write output to
- * @param out_count number of entries in out_iov
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov,
-                           size_t in_count, const struct iovec *out_iov,
-                           size_t out_count);
-
-/**
- * Reply to finish ioctl
- *
- * Possible requests:
- *   ioctl
- *
- * @param req request handle
- * @param result result to be passed to the caller
- * @param buf buffer containing output data
- * @param size length of output data
- */
-int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size);
-
-/**
- * Reply to finish ioctl with iov buffer
- *
- * Possible requests:
- *   ioctl
- *
- * @param req request handle
- * @param result result to be passed to the caller
- * @param iov the vector containing the data
- * @param count the size of vector
- */
-int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov,
-                         int count);
-
-/**
- * Reply with poll result event mask
- *
- * @param req request handle
- * @param revents poll result event mask
- */
-int fuse_reply_poll(fuse_req_t req, unsigned revents);
-
-/**
- * Reply with offset
- *
- * Possible requests:
- *   lseek
- *
- * @param req request handle
- * @param off offset of next data or hole
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_lseek(fuse_req_t req, off_t off);
-
-/*
- * Notification
- */
-
-/**
- * Notify IO readiness event
- *
- * For more information, please read comment for poll operation.
- *
- * @param ph poll handle to notify IO readiness event for
- */
-int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph);
-
-/**
- * Notify to invalidate cache for an inode.
- *
- * Added in FUSE protocol version 7.12. If the kernel does not support
- * this (or a newer) version, the function will return -ENOSYS and do
- * nothing.
- *
- * If the filesystem has writeback caching enabled, invalidating an
- * inode will first trigger a writeback of all dirty pages. The call
- * will block until all writeback requests have completed and the
- * inode has been invalidated. It will, however, not wait for
- * completion of pending writeback requests that have been issued
- * before.
- *
- * If there are no dirty pages, this function will never block.
- *
- * @param se the session object
- * @param ino the inode number
- * @param off the offset in the inode where to start invalidating
- *            or negative to invalidate attributes only
- * @param len the amount of cache to invalidate or 0 for all
- * @return zero for success, -errno for failure
- */
-int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino,
-                                     off_t off, off_t len);
-
-/**
- * Notify to invalidate parent attributes and the dentry matching
- * parent/name
- *
- * To avoid a deadlock this function must not be called in the
- * execution path of a related filesystem operation or within any code
- * that could hold a lock that could be needed to execute such an
- * operation. As of kernel 4.18, a "related operation" is a lookup(),
- * symlink(), mknod(), mkdir(), unlink(), rename(), link() or create()
- * request for the parent, and a setattr(), unlink(), rmdir(),
- * rename(), setxattr(), removexattr(), readdir() or readdirplus()
- * request for the inode itself.
- *
- * When called correctly, this function will never block.
- *
- * Added in FUSE protocol version 7.12. If the kernel does not support
- * this (or a newer) version, the function will return -ENOSYS and do
- * nothing.
- *
- * @param se the session object
- * @param parent inode number
- * @param name file name
- * @param namelen strlen() of file name
- * @return zero for success, -errno for failure
- */
-int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent,
-                                     const char *name, size_t namelen);
-
-/**
- * This function behaves like fuse_lowlevel_notify_inval_entry() with
- * the following additional effect (at least as of Linux kernel 4.8):
- *
- * If the provided *child* inode matches the inode that is currently
- * associated with the cached dentry, and if there are any inotify
- * watches registered for the dentry, then the watchers are informed
- * that the dentry has been deleted.
- *
- * To avoid a deadlock this function must not be called while
- * executing a related filesystem operation or while holding a lock
- * that could be needed to execute such an operation (see the
- * description of fuse_lowlevel_notify_inval_entry() for more
- * details).
- *
- * When called correctly, this function will never block.
- *
- * Added in FUSE protocol version 7.18. If the kernel does not support
- * this (or a newer) version, the function will return -ENOSYS and do
- * nothing.
- *
- * @param se the session object
- * @param parent inode number
- * @param child inode number
- * @param name file name
- * @param namelen strlen() of file name
- * @return zero for success, -errno for failure
- */
-int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent,
-                                fuse_ino_t child, const char *name,
-                                size_t namelen);
-
-/**
- * Store data to the kernel buffers
- *
- * Synchronously store data in the kernel buffers belonging to the
- * given inode.  The stored data is marked up-to-date (no read will be
- * performed against it, unless it's invalidated or evicted from the
- * cache).
- *
- * If the stored data overflows the current file size, then the size
- * is extended, similarly to a write(2) on the filesystem.
- *
- * If this function returns an error, then the store wasn't fully
- * completed, but it may have been partially completed.
- *
- * Added in FUSE protocol version 7.15. If the kernel does not support
- * this (or a newer) version, the function will return -ENOSYS and do
- * nothing.
- *
- * @param se the session object
- * @param ino the inode number
- * @param offset the starting offset into the file to store to
- * @param bufv buffer vector
- * @return zero for success, -errno for failure
- */
-int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino,
-                               off_t offset, struct fuse_bufvec *bufv);
-
-/*
- * Utility functions
- */
-
-/**
- * Get the userdata from the request
- *
- * @param req request handle
- * @return the user data passed to fuse_session_new()
- */
-void *fuse_req_userdata(fuse_req_t req);
-
-/**
- * Get the context from the request
- *
- * The pointer returned by this function will only be valid for the
- * request's lifetime
- *
- * @param req request handle
- * @return the context structure
- */
-const struct fuse_ctx *fuse_req_ctx(fuse_req_t req);
-
-/**
- * Callback function for an interrupt
- *
- * @param req interrupted request
- * @param data user data
- */
-typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data);
-
-/**
- * Register/unregister callback for an interrupt
- *
- * If an interrupt has already happened, then the callback function is
- * called from within this function, hence it's not possible for
- * interrupts to be lost.
- *
- * @param req request handle
- * @param func the callback function or NULL for unregister
- * @param data user data passed to the callback function
- */
-void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func,
-                             void *data);
-
-/**
- * Check if a request has already been interrupted
- *
- * @param req request handle
- * @return 1 if the request has been interrupted, 0 otherwise
- */
-int fuse_req_interrupted(fuse_req_t req);
-
-/**
- * Check if the session is connected via virtio
- *
- * @param se session object
- * @return 1 if the session is a virtio session
- */
-int fuse_lowlevel_is_virtio(struct fuse_session *se);
-
-/*
- * Inquiry functions
- */
-
-/**
- * Print low-level version information to stdout.
- */
-void fuse_lowlevel_version(void);
-
-/**
- * Print available low-level options to stdout. This is not an
- * exhaustive list, but includes only those options that may be of
- * interest to an end-user of a file system.
- */
-void fuse_lowlevel_help(void);
-
-/**
- * Print available options for `fuse_parse_cmdline()`.
- */
-void fuse_cmdline_help(void);
-
-/*
- * Filesystem setup & teardown
- */
-
-struct fuse_cmdline_opts {
-    int foreground;
-    int debug;
-    int nodefault_subtype;
-    int show_version;
-    int show_help;
-    int print_capabilities;
-    int syslog;
-    int log_level;
-    unsigned int max_idle_threads;
-    unsigned long rlimit_nofile;
-};
-
-/**
- * Utility function to parse common options for simple file systems
- * using the low-level API. A help text that describes the available
- * options can be printed with `fuse_cmdline_help`. A single
- * non-option argument is treated as the mountpoint. Multiple
- * non-option arguments will result in an error.
- *
- * If neither -o subtype= or -o fsname= options are given, a new
- * subtype option will be added and set to the basename of the program
- * (the fsname will remain unset, and then defaults to "fuse").
- *
- * Known options will be removed from *args*, unknown options will
- * remain.
- *
- * @param args argument vector (input+output)
- * @param opts output argument for parsed options
- * @return 0 on success, -1 on failure
- */
-int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts);
-
-/**
- * Create a low level session.
- *
- * Returns a session structure suitable for passing to
- * fuse_session_mount() and fuse_session_loop().
- *
- * This function accepts most file-system independent mount options
- * (like context, nodev, ro - see mount(8)), as well as the general
- * fuse mount options listed in mount.fuse(8) (e.g. -o allow_root and
- * -o default_permissions, but not ``-o use_ino``).  Instead of `-o
- * debug`, debugging may also enabled with `-d` or `--debug`.
- *
- * If not all options are known, an error message is written to stderr
- * and the function returns NULL.
- *
- * Option parsing skips argv[0], which is assumed to contain the
- * program name. To prevent accidentally passing an option in
- * argv[0], this element must always be present (even if no options
- * are specified). It may be set to the empty string ('\0') if no
- * reasonable value can be provided.
- *
- * @param args argument vector
- * @param op the (low-level) filesystem operations
- * @param op_size sizeof(struct fuse_lowlevel_ops)
- * @param userdata user data
- *
- * @return the fuse session on success, NULL on failure
- **/
-struct fuse_session *fuse_session_new(struct fuse_args *args,
-                                      const struct fuse_lowlevel_ops *op,
-                                      size_t op_size, void *userdata);
-
-/**
- * Mount a FUSE file system.
- *
- * @param se session object
- *
- * @return 0 on success, -1 on failure.
- **/
-int fuse_session_mount(struct fuse_session *se);
-
-/**
- * Enter a single threaded, blocking event loop.
- *
- * When the event loop terminates because the connection to the FUSE
- * kernel module has been closed, this function returns zero. This
- * happens when the filesystem is unmounted regularly (by the
- * filesystem owner or root running the umount(8) or fusermount(1)
- * command), or if connection is explicitly severed by writing ``1``
- * to the``abort`` file in ``/sys/fs/fuse/connections/NNN``. The only
- * way to distinguish between these two conditions is to check if the
- * filesystem is still mounted after the session loop returns.
- *
- * When some error occurs during request processing, the function
- * returns a negated errno(3) value.
- *
- * If the loop has been terminated because of a signal handler
- * installed by fuse_set_signal_handlers(), this function returns the
- * (positive) signal value that triggered the exit.
- *
- * @param se the session
- * @return 0, -errno, or a signal value
- */
-int fuse_session_loop(struct fuse_session *se);
-
-/**
- * Flag a session as terminated.
- *
- * This function is invoked by the POSIX signal handlers, when
- * registered using fuse_set_signal_handlers(). It will cause any
- * running event loops to terminate on the next opportunity.
- *
- * @param se the session
- */
-void fuse_session_exit(struct fuse_session *se);
-
-/**
- * Reset the terminated flag of a session
- *
- * @param se the session
- */
-void fuse_session_reset(struct fuse_session *se);
-
-/**
- * Query the terminated flag of a session
- *
- * @param se the session
- * @return 1 if exited, 0 if not exited
- */
-int fuse_session_exited(struct fuse_session *se);
-
-/**
- * Ensure that file system is unmounted.
- *
- * In regular operation, the file system is typically unmounted by the
- * user calling umount(8) or fusermount(1), which then terminates the
- * FUSE session loop. However, the session loop may also terminate as
- * a result of an explicit call to fuse_session_exit() (e.g. by a
- * signal handler installed by fuse_set_signal_handler()). In this
- * case the filesystem remains mounted, but any attempt to access it
- * will block (while the filesystem process is still running) or give
- * an ESHUTDOWN error (after the filesystem process has terminated).
- *
- * If the communication channel with the FUSE kernel module is still
- * open (i.e., if the session loop was terminated by an explicit call
- * to fuse_session_exit()), this function will close it and unmount
- * the filesystem. If the communication channel has been closed by the
- * kernel, this method will do (almost) nothing.
- *
- * NOTE: The above semantics mean that if the connection to the kernel
- * is terminated via the ``/sys/fs/fuse/connections/NNN/abort`` file,
- * this method will *not* unmount the filesystem.
- *
- * @param se the session
- */
-void fuse_session_unmount(struct fuse_session *se);
-
-/**
- * Destroy a session
- *
- * @param se the session
- */
-void fuse_session_destroy(struct fuse_session *se);
-
-/*
- * Custom event loop support
- */
-
-/**
- * Return file descriptor for communication with kernel.
- *
- * The file selector can be used to integrate FUSE with a custom event
- * loop. Whenever data is available for reading on the provided fd,
- * the event loop should call `fuse_session_receive_buf` followed by
- * `fuse_session_process_buf` to process the request.
- *
- * The returned file descriptor is valid until `fuse_session_unmount`
- * is called.
- *
- * @param se the session
- * @return a file descriptor
- */
-int fuse_session_fd(struct fuse_session *se);
-
-/**
- * Process a raw request supplied in a generic buffer
- *
- * The fuse_buf may contain a memory buffer or a pipe file descriptor.
- *
- * @param se the session
- * @param buf the fuse_buf containing the request
- */
-void fuse_session_process_buf(struct fuse_session *se,
-                              const struct fuse_buf *buf);
-
-/**
- * Read a raw request from the kernel into the supplied buffer.
- *
- * Depending on file system options, system capabilities, and request
- * size the request is either read into a memory buffer or spliced
- * into a temporary pipe.
- *
- * @param se the session
- * @param buf the fuse_buf to store the request in
- * @return the actual size of the raw request, or -errno on error
- */
-int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf);
-
-#endif /* FUSE_LOWLEVEL_H_ */
diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h
deleted file mode 100644 (file)
index f252baa..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include <pthread.h>
-
-/*
- * Versioned symbols cannot be used in some cases because it
- *   - confuse the dynamic linker in uClibc
- *   - not supported on MacOSX (in MachO binary format)
- */
-#if (!defined(__UCLIBC__) && !defined(__APPLE__))
-#define FUSE_SYMVER(x) __asm__(x)
-#else
-#define FUSE_SYMVER(x)
-#endif
-
-#ifndef USE_UCLIBC
-#define fuse_mutex_init(mut) pthread_mutex_init(mut, NULL)
-#else
-/* Is this hack still needed? */
-static inline void fuse_mutex_init(pthread_mutex_t *mut)
-{
-    pthread_mutexattr_t attr;
-    pthread_mutexattr_init(&attr);
-    pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
-    pthread_mutex_init(mut, &attr);
-    pthread_mutexattr_destroy(&attr);
-}
-#endif
-
-#ifdef HAVE_STRUCT_STAT_ST_ATIM
-/* Linux */
-#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec)
-#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec)
-#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec)
-#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atim.tv_nsec = (val)
-#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctim.tv_nsec = (val)
-#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtim.tv_nsec = (val)
-#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC)
-/* FreeBSD */
-#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec)
-#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec)
-#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec)
-#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atimespec.tv_nsec = (val)
-#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctimespec.tv_nsec = (val)
-#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtimespec.tv_nsec = (val)
-#else
-#define ST_ATIM_NSEC(stbuf) 0
-#define ST_CTIM_NSEC(stbuf) 0
-#define ST_MTIM_NSEC(stbuf) 0
-#define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0)
-#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0)
-#define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0)
-#endif
diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c
deleted file mode 100644 (file)
index 9d37144..0000000
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * Implementation of option parsing routines (dealing with `struct
- * fuse_args`).
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include "qemu/osdep.h"
-#include "fuse_opt.h"
-#include "fuse_i.h"
-#include "fuse_misc.h"
-
-
-struct fuse_opt_context {
-    void *data;
-    const struct fuse_opt *opt;
-    fuse_opt_proc_t proc;
-    int argctr;
-    int argc;
-    char **argv;
-    struct fuse_args outargs;
-    char *opts;
-    int nonopt;
-};
-
-void fuse_opt_free_args(struct fuse_args *args)
-{
-    if (args) {
-        if (args->argv && args->allocated) {
-            int i;
-            for (i = 0; i < args->argc; i++) {
-                free(args->argv[i]);
-            }
-            free(args->argv);
-        }
-        args->argc = 0;
-        args->argv = NULL;
-        args->allocated = 0;
-    }
-}
-
-static int alloc_failed(void)
-{
-    fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n");
-    return -1;
-}
-
-int fuse_opt_add_arg(struct fuse_args *args, const char *arg)
-{
-    char **newargv;
-    char *newarg;
-
-    assert(!args->argv || args->allocated);
-
-    newarg = strdup(arg);
-    if (!newarg) {
-        return alloc_failed();
-    }
-
-    newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *));
-    if (!newargv) {
-        free(newarg);
-        return alloc_failed();
-    }
-
-    args->argv = newargv;
-    args->allocated = 1;
-    args->argv[args->argc++] = newarg;
-    args->argv[args->argc] = NULL;
-    return 0;
-}
-
-static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos,
-                                      const char *arg)
-{
-    assert(pos <= args->argc);
-    if (fuse_opt_add_arg(args, arg) == -1) {
-        return -1;
-    }
-
-    if (pos != args->argc - 1) {
-        char *newarg = args->argv[args->argc - 1];
-        memmove(&args->argv[pos + 1], &args->argv[pos],
-                sizeof(char *) * (args->argc - pos - 1));
-        args->argv[pos] = newarg;
-    }
-    return 0;
-}
-
-int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg)
-{
-    return fuse_opt_insert_arg_common(args, pos, arg);
-}
-
-static int next_arg(struct fuse_opt_context *ctx, const char *opt)
-{
-    if (ctx->argctr + 1 >= ctx->argc) {
-        fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt);
-        return -1;
-    }
-    ctx->argctr++;
-    return 0;
-}
-
-static int add_arg(struct fuse_opt_context *ctx, const char *arg)
-{
-    return fuse_opt_add_arg(&ctx->outargs, arg);
-}
-
-static int add_opt_common(char **opts, const char *opt, int esc)
-{
-    unsigned oldlen = *opts ? strlen(*opts) : 0;
-    char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1);
-
-    if (!d) {
-        return alloc_failed();
-    }
-
-    *opts = d;
-    if (oldlen) {
-        d += oldlen;
-        *d++ = ',';
-    }
-
-    for (; *opt; opt++) {
-        if (esc && (*opt == ',' || *opt == '\\')) {
-            *d++ = '\\';
-        }
-        *d++ = *opt;
-    }
-    *d = '\0';
-
-    return 0;
-}
-
-int fuse_opt_add_opt(char **opts, const char *opt)
-{
-    return add_opt_common(opts, opt, 0);
-}
-
-int fuse_opt_add_opt_escaped(char **opts, const char *opt)
-{
-    return add_opt_common(opts, opt, 1);
-}
-
-static int add_opt(struct fuse_opt_context *ctx, const char *opt)
-{
-    return add_opt_common(&ctx->opts, opt, 1);
-}
-
-static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key,
-                     int iso)
-{
-    if (key == FUSE_OPT_KEY_DISCARD) {
-        return 0;
-    }
-
-    if (key != FUSE_OPT_KEY_KEEP && ctx->proc) {
-        int res = ctx->proc(ctx->data, arg, key, &ctx->outargs);
-        if (res == -1 || !res) {
-            return res;
-        }
-    }
-    if (iso) {
-        return add_opt(ctx, arg);
-    } else {
-        return add_arg(ctx, arg);
-    }
-}
-
-static int match_template(const char *t, const char *arg, unsigned *sepp)
-{
-    int arglen = strlen(arg);
-    const char *sep = strchr(t, '=');
-    sep = sep ? sep : strchr(t, ' ');
-    if (sep && (!sep[1] || sep[1] == '%')) {
-        int tlen = sep - t;
-        if (sep[0] == '=') {
-            tlen++;
-        }
-        if (arglen >= tlen && strncmp(arg, t, tlen) == 0) {
-            *sepp = sep - t;
-            return 1;
-        }
-    }
-    if (strcmp(t, arg) == 0) {
-        *sepp = 0;
-        return 1;
-    }
-    return 0;
-}
-
-static const struct fuse_opt *find_opt(const struct fuse_opt *opt,
-                                       const char *arg, unsigned *sepp)
-{
-    for (; opt && opt->templ; opt++) {
-        if (match_template(opt->templ, arg, sepp)) {
-            return opt;
-        }
-    }
-    return NULL;
-}
-
-int fuse_opt_match(const struct fuse_opt *opts, const char *opt)
-{
-    unsigned dummy;
-    return find_opt(opts, opt, &dummy) ? 1 : 0;
-}
-
-static int process_opt_param(void *var, const char *format, const char *param,
-                             const char *arg)
-{
-    assert(format[0] == '%');
-    if (format[1] == 's') {
-        char **s = var;
-        char *copy = strdup(param);
-        if (!copy) {
-            return alloc_failed();
-        }
-
-        free(*s);
-        *s = copy;
-    } else {
-        if (sscanf(param, format, var) != 1) {
-            fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n",
-                     arg);
-            return -1;
-        }
-    }
-    return 0;
-}
-
-static int process_opt(struct fuse_opt_context *ctx, const struct fuse_opt *opt,
-                       unsigned sep, const char *arg, int iso)
-{
-    if (opt->offset == -1U) {
-        if (call_proc(ctx, arg, opt->value, iso) == -1) {
-            return -1;
-        }
-    } else {
-        void *var = (char *)ctx->data + opt->offset;
-        if (sep && opt->templ[sep + 1]) {
-            const char *param = arg + sep;
-            if (opt->templ[sep] == '=') {
-                param++;
-            }
-            if (process_opt_param(var, opt->templ + sep + 1, param, arg) ==
-                -1) {
-                return -1;
-            }
-        } else {
-            *(int *)var = opt->value;
-        }
-    }
-    return 0;
-}
-
-static int process_opt_sep_arg(struct fuse_opt_context *ctx,
-                               const struct fuse_opt *opt, unsigned sep,
-                               const char *arg, int iso)
-{
-    int res;
-    char *newarg;
-    char *param;
-
-    if (next_arg(ctx, arg) == -1) {
-        return -1;
-    }
-
-    param = ctx->argv[ctx->argctr];
-    newarg = g_try_malloc(sep + strlen(param) + 1);
-    if (!newarg) {
-        return alloc_failed();
-    }
-
-    memcpy(newarg, arg, sep);
-    strcpy(newarg + sep, param);
-    res = process_opt(ctx, opt, sep, newarg, iso);
-    g_free(newarg);
-
-    return res;
-}
-
-static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso)
-{
-    unsigned sep;
-    const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep);
-    if (opt) {
-        for (; opt; opt = find_opt(opt + 1, arg, &sep)) {
-            int res;
-            if (sep && opt->templ[sep] == ' ' && !arg[sep]) {
-                res = process_opt_sep_arg(ctx, opt, sep, arg, iso);
-            } else {
-                res = process_opt(ctx, opt, sep, arg, iso);
-            }
-            if (res == -1) {
-                return -1;
-            }
-        }
-        return 0;
-    } else {
-        return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso);
-    }
-}
-
-static int process_real_option_group(struct fuse_opt_context *ctx, char *opts)
-{
-    char *s = opts;
-    char *d = s;
-    int end = 0;
-
-    while (!end) {
-        if (*s == '\0') {
-            end = 1;
-        }
-        if (*s == ',' || end) {
-            int res;
-
-            *d = '\0';
-            res = process_gopt(ctx, opts, 1);
-            if (res == -1) {
-                return -1;
-            }
-            d = opts;
-        } else {
-            if (s[0] == '\\' && s[1] != '\0') {
-                s++;
-                if (s[0] >= '0' && s[0] <= '3' && s[1] >= '0' && s[1] <= '7' &&
-                    s[2] >= '0' && s[2] <= '7') {
-                    *d++ = (s[0] - '0') * 0100 + (s[1] - '0') * 0010 +
-                           (s[2] - '0');
-                    s += 2;
-                } else {
-                    *d++ = *s;
-                }
-            } else {
-                *d++ = *s;
-            }
-        }
-        s++;
-    }
-
-    return 0;
-}
-
-static int process_option_group(struct fuse_opt_context *ctx, const char *opts)
-{
-    int res;
-    char *copy = strdup(opts);
-
-    if (!copy) {
-        fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n");
-        return -1;
-    }
-    res = process_real_option_group(ctx, copy);
-    free(copy);
-    return res;
-}
-
-static int process_one(struct fuse_opt_context *ctx, const char *arg)
-{
-    if (ctx->nonopt || arg[0] != '-') {
-        return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0);
-    } else if (arg[1] == 'o') {
-        if (arg[2]) {
-            return process_option_group(ctx, arg + 2);
-        } else {
-            if (next_arg(ctx, arg) == -1) {
-                return -1;
-            }
-
-            return process_option_group(ctx, ctx->argv[ctx->argctr]);
-        }
-    } else if (arg[1] == '-' && !arg[2]) {
-        if (add_arg(ctx, arg) == -1) {
-            return -1;
-        }
-        ctx->nonopt = ctx->outargs.argc;
-        return 0;
-    } else {
-        return process_gopt(ctx, arg, 0);
-    }
-}
-
-static int opt_parse(struct fuse_opt_context *ctx)
-{
-    if (ctx->argc) {
-        if (add_arg(ctx, ctx->argv[0]) == -1) {
-            return -1;
-        }
-    }
-
-    for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) {
-        if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) {
-            return -1;
-        }
-    }
-
-    if (ctx->opts) {
-        if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 ||
-            fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) {
-            return -1;
-        }
-    }
-
-    /* If option separator ("--") is the last argument, remove it */
-    if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc &&
-        strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) {
-        free(ctx->outargs.argv[ctx->outargs.argc - 1]);
-        ctx->outargs.argv[--ctx->outargs.argc] = NULL;
-    }
-
-    return 0;
-}
-
-int fuse_opt_parse(struct fuse_args *args, void *data,
-                   const struct fuse_opt opts[], fuse_opt_proc_t proc)
-{
-    int res;
-    struct fuse_opt_context ctx = {
-        .data = data,
-        .opt = opts,
-        .proc = proc,
-    };
-
-    if (!args || !args->argv || !args->argc) {
-        return 0;
-    }
-
-    ctx.argc = args->argc;
-    ctx.argv = args->argv;
-
-    res = opt_parse(&ctx);
-    if (res != -1) {
-        struct fuse_args tmp = *args;
-        *args = ctx.outargs;
-        ctx.outargs = tmp;
-    }
-    free(ctx.opts);
-    fuse_opt_free_args(&ctx.outargs);
-    return res;
-}
diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h
deleted file mode 100644 (file)
index 8f59b4d..0000000
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB.
- */
-
-#ifndef FUSE_OPT_H_
-#define FUSE_OPT_H_
-
-/** @file
- *
- * This file defines the option parsing interface of FUSE
- */
-
-/**
- * Option description
- *
- * This structure describes a single option, and action associated
- * with it, in case it matches.
- *
- * More than one such match may occur, in which case the action for
- * each match is executed.
- *
- * There are three possible actions in case of a match:
- *
- * i) An integer (int or unsigned) variable determined by 'offset' is
- *    set to 'value'
- *
- * ii) The processing function is called, with 'value' as the key
- *
- * iii) An integer (any) or string (char *) variable determined by
- *    'offset' is set to the value of an option parameter
- *
- * 'offset' should normally be either set to
- *
- *  - 'offsetof(struct foo, member)'  actions i) and iii)
- *
- *  - -1                              action ii)
- *
- * The 'offsetof()' macro is defined in the <stddef.h> header.
- *
- * The template determines which options match, and also have an
- * effect on the action.  Normally the action is either i) or ii), but
- * if a format is present in the template, then action iii) is
- * performed.
- *
- * The types of templates are:
- *
- * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only
- *   themselves.  Invalid values are "--" and anything beginning
- *   with "-o"
- *
- * 2) "foo", "foo-bar", etc.  These match "-ofoo", "-ofoo-bar" or
- *    the relevant option in a comma separated option list
- *
- * 3) "bar=", "--foo=", etc.  These are variations of 1) and 2)
- *    which have a parameter
- *
- * 4) "bar=%s", "--foo=%lu", etc.  Same matching as above but perform
- *    action iii).
- *
- * 5) "-x ", etc.  Matches either "-xparam" or "-x param" as
- *    two separate arguments
- *
- * 6) "-x %s", etc.  Combination of 4) and 5)
- *
- * If the format is "%s", memory is allocated for the string unlike with
- * scanf().  The previous value (if non-NULL) stored at the this location is
- * freed.
- */
-struct fuse_opt {
-    /** Matching template and optional parameter formatting */
-    const char *templ;
-
-    /**
-     * Offset of variable within 'data' parameter of fuse_opt_parse()
-     * or -1
-     */
-    unsigned long offset;
-
-    /**
-     * Value to set the variable to, or to be passed as 'key' to the
-     * processing function. Ignored if template has a format
-     */
-    int value;
-};
-
-/**
- * Key option. In case of a match, the processing function will be
- * called with the specified key.
- */
-#define FUSE_OPT_KEY(templ, key) \
-    {                            \
-        templ, -1U, key          \
-    }
-
-/**
- * Last option. An array of 'struct fuse_opt' must end with a NULL
- * template value
- */
-#define FUSE_OPT_END \
-    {                \
-        NULL, 0, 0   \
-    }
-
-/**
- * Argument list
- */
-struct fuse_args {
-    /** Argument count */
-    int argc;
-
-    /** Argument vector.  NULL terminated */
-    char **argv;
-
-    /** Is 'argv' allocated? */
-    int allocated;
-};
-
-/**
- * Initializer for 'struct fuse_args'
- */
-#define FUSE_ARGS_INIT(argc, argv) \
-    {                              \
-        argc, argv, 0              \
-    }
-
-/**
- * Key value passed to the processing function if an option did not
- * match any template
- */
-#define FUSE_OPT_KEY_OPT -1
-
-/**
- * Key value passed to the processing function for all non-options
- *
- * Non-options are the arguments beginning with a character other than
- * '-' or all arguments after the special '--' option
- */
-#define FUSE_OPT_KEY_NONOPT -2
-
-/**
- * Special key value for options to keep
- *
- * Argument is not passed to processing function, but behave as if the
- * processing function returned 1
- */
-#define FUSE_OPT_KEY_KEEP -3
-
-/**
- * Special key value for options to discard
- *
- * Argument is not passed to processing function, but behave as if the
- * processing function returned zero
- */
-#define FUSE_OPT_KEY_DISCARD -4
-
-/**
- * Processing function
- *
- * This function is called if
- *    - option did not match any 'struct fuse_opt'
- *    - argument is a non-option
- *    - option did match and offset was set to -1
- *
- * The 'arg' parameter will always contain the whole argument or
- * option including the parameter if exists.  A two-argument option
- * ("-x foo") is always converted to single argument option of the
- * form "-xfoo" before this function is called.
- *
- * Options of the form '-ofoo' are passed to this function without the
- * '-o' prefix.
- *
- * The return value of this function determines whether this argument
- * is to be inserted into the output argument vector, or discarded.
- *
- * @param data is the user data passed to the fuse_opt_parse() function
- * @param arg is the whole argument or option
- * @param key determines why the processing function was called
- * @param outargs the current output argument list
- * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept
- */
-typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key,
-                               struct fuse_args *outargs);
-
-/**
- * Option parsing function
- *
- * If 'args' was returned from a previous call to fuse_opt_parse() or
- * it was constructed from
- *
- * A NULL 'args' is equivalent to an empty argument vector
- *
- * A NULL 'opts' is equivalent to an 'opts' array containing a single
- * end marker
- *
- * A NULL 'proc' is equivalent to a processing function always
- * returning '1'
- *
- * @param args is the input and output argument list
- * @param data is the user data
- * @param opts is the option description array
- * @param proc is the processing function
- * @return -1 on error, 0 on success
- */
-int fuse_opt_parse(struct fuse_args *args, void *data,
-                   const struct fuse_opt opts[], fuse_opt_proc_t proc);
-
-/**
- * Add an option to a comma separated option list
- *
- * @param opts is a pointer to an option list, may point to a NULL value
- * @param opt is the option to add
- * @return -1 on allocation error, 0 on success
- */
-int fuse_opt_add_opt(char **opts, const char *opt);
-
-/**
- * Add an option, escaping commas, to a comma separated option list
- *
- * @param opts is a pointer to an option list, may point to a NULL value
- * @param opt is the option to add
- * @return -1 on allocation error, 0 on success
- */
-int fuse_opt_add_opt_escaped(char **opts, const char *opt);
-
-/**
- * Add an argument to a NULL terminated argument vector
- *
- * @param args is the structure containing the current argument list
- * @param arg is the new argument to add
- * @return -1 on allocation error, 0 on success
- */
-int fuse_opt_add_arg(struct fuse_args *args, const char *arg);
-
-/**
- * Add an argument at the specified position in a NULL terminated
- * argument vector
- *
- * Adds the argument to the N-th position.  This is useful for adding
- * options at the beginning of the array which must not come after the
- * special '--' option.
- *
- * @param args is the structure containing the current argument list
- * @param pos is the position at which to add the argument
- * @param arg is the new argument to add
- * @return -1 on allocation error, 0 on success
- */
-int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg);
-
-/**
- * Free the contents of argument list
- *
- * The structure itself is not freed
- *
- * @param args is the structure containing the argument list
- */
-void fuse_opt_free_args(struct fuse_args *args);
-
-
-/**
- * Check if an option matches
- *
- * @param opts is the option description array
- * @param opt is the option to match
- * @return 1 if a match is found, 0 if not
- */
-int fuse_opt_match(const struct fuse_opt opts[], const char *opt);
-
-#endif /* FUSE_OPT_H_ */
diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c
deleted file mode 100644 (file)
index 1de46de..0000000
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * Utility functions for setting signal handlers.
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include "qemu/osdep.h"
-#include "fuse_i.h"
-#include "fuse_lowlevel.h"
-
-
-static struct fuse_session *fuse_instance;
-
-static void exit_handler(int sig)
-{
-    if (fuse_instance) {
-        fuse_session_exit(fuse_instance);
-        if (sig <= 0) {
-            fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n");
-            abort();
-        }
-        fuse_instance->error = sig;
-    }
-}
-
-static void do_nothing(int sig)
-{
-    (void)sig;
-}
-
-static int set_one_signal_handler(int sig, void (*handler)(int), int remove)
-{
-    struct sigaction sa;
-    struct sigaction old_sa;
-
-    memset(&sa, 0, sizeof(struct sigaction));
-    sa.sa_handler = remove ? SIG_DFL : handler;
-    sigemptyset(&(sa.sa_mask));
-    sa.sa_flags = 0;
-
-    if (sigaction(sig, NULL, &old_sa) == -1) {
-        fuse_log(FUSE_LOG_ERR, "fuse: cannot get old signal handler: %s\n",
-                 strerror(errno));
-        return -1;
-    }
-
-    if (old_sa.sa_handler == (remove ? handler : SIG_DFL) &&
-        sigaction(sig, &sa, NULL) == -1) {
-        fuse_log(FUSE_LOG_ERR, "fuse: cannot set signal handler: %s\n",
-                 strerror(errno));
-        return -1;
-    }
-    return 0;
-}
-
-int fuse_set_signal_handlers(struct fuse_session *se)
-{
-    /*
-     * If we used SIG_IGN instead of the do_nothing function,
-     * then we would be unable to tell if we set SIG_IGN (and
-     * thus should reset to SIG_DFL in fuse_remove_signal_handlers)
-     * or if it was already set to SIG_IGN (and should be left
-     * untouched.
-     */
-    if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 ||
-        set_one_signal_handler(SIGINT, exit_handler, 0) == -1 ||
-        set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 ||
-        set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) {
-        return -1;
-    }
-
-    fuse_instance = se;
-    return 0;
-}
-
-void fuse_remove_signal_handlers(struct fuse_session *se)
-{
-    if (fuse_instance != se) {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: fuse_remove_signal_handlers: unknown session\n");
-    } else {
-        fuse_instance = NULL;
-    }
-
-    set_one_signal_handler(SIGHUP, exit_handler, 1);
-    set_one_signal_handler(SIGINT, exit_handler, 1);
-    set_one_signal_handler(SIGTERM, exit_handler, 1);
-    set_one_signal_handler(SIGPIPE, do_nothing, 1);
-}
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
deleted file mode 100644 (file)
index 9368e29..0000000
+++ /dev/null
@@ -1,1081 +0,0 @@
-/*
- * virtio-fs glue for FUSE
- * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- *   Dave Gilbert  <dgilbert@redhat.com>
- *
- * Implements the glue between libfuse and libvhost-user
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include "qemu/osdep.h"
-#include "qemu/iov.h"
-#include "qapi/error.h"
-#include "fuse_i.h"
-#include "standard-headers/linux/fuse.h"
-#include "fuse_misc.h"
-#include "fuse_opt.h"
-#include "fuse_virtio.h"
-
-#include <sys/eventfd.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <grp.h>
-
-#include "libvhost-user.h"
-
-struct fv_VuDev;
-struct fv_QueueInfo {
-    pthread_t thread;
-    /*
-     * This lock protects the VuVirtq preventing races between
-     * fv_queue_thread() and fv_queue_worker().
-     */
-    pthread_mutex_t vq_lock;
-
-    struct fv_VuDev *virtio_dev;
-
-    /* Our queue index, corresponds to array position */
-    int qidx;
-    int kick_fd;
-    int kill_fd; /* For killing the thread */
-};
-
-/* A FUSE request */
-typedef struct {
-    VuVirtqElement elem;
-    struct fuse_chan ch;
-
-    /* Used to complete requests that involve no reply */
-    bool reply_sent;
-} FVRequest;
-
-/*
- * We pass the dev element into libvhost-user
- * and then use it to get back to the outer
- * container for other data.
- */
-struct fv_VuDev {
-    VuDev dev;
-    struct fuse_session *se;
-
-    /*
-     * Either handle virtqueues or vhost-user protocol messages.  Don't do
-     * both at the same time since that could lead to race conditions if
-     * virtqueues or memory tables change while another thread is accessing
-     * them.
-     *
-     * The assumptions are:
-     * 1. fv_queue_thread() reads/writes to virtqueues and only reads VuDev.
-     * 2. virtio_loop() reads/writes virtqueues and VuDev.
-     */
-    pthread_rwlock_t vu_dispatch_rwlock;
-
-    /*
-     * The following pair of fields are only accessed in the main
-     * virtio_loop
-     */
-    size_t nqueues;
-    struct fv_QueueInfo **qi;
-};
-
-/* Callback from libvhost-user */
-static uint64_t fv_get_features(VuDev *dev)
-{
-    return 1ULL << VIRTIO_F_VERSION_1;
-}
-
-/* Callback from libvhost-user */
-static void fv_set_features(VuDev *dev, uint64_t features)
-{
-}
-
-/*
- * Callback from libvhost-user if there's a new fd we're supposed to listen
- * to, typically a queue kick?
- */
-static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb,
-                         void *data)
-{
-    fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
-}
-
-/*
- * Callback from libvhost-user if we're no longer supposed to listen on an fd
- */
-static void fv_remove_watch(VuDev *dev, int fd)
-{
-    fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
-}
-
-/* Callback from libvhost-user to panic */
-static void fv_panic(VuDev *dev, const char *err)
-{
-    fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err);
-    /* TODO: Allow reconnects?? */
-    exit(EXIT_FAILURE);
-}
-
-/*
- * Copy from an iovec into a fuse_buf (memory only)
- * Caller must ensure there is space
- */
-static size_t copy_from_iov(struct fuse_buf *buf, size_t out_num,
-                            const struct iovec *out_sg,
-                            size_t max)
-{
-    void *dest = buf->mem;
-    size_t copied = 0;
-
-    while (out_num && max) {
-        size_t onelen = out_sg->iov_len;
-        onelen = MIN(onelen, max);
-        memcpy(dest, out_sg->iov_base, onelen);
-        dest += onelen;
-        copied += onelen;
-        out_sg++;
-        out_num--;
-        max -= onelen;
-    }
-
-    return copied;
-}
-
-/*
- * Skip 'skip' bytes in the iov; 'sg_1stindex' is set as
- * the index for the 1st iovec to read data from, and
- * 'sg_1stskip' is the number of bytes to skip in that entry.
- *
- * Returns True if there are at least 'skip' bytes in the iovec
- *
- */
-static bool skip_iov(const struct iovec *sg, size_t sg_size,
-                     size_t skip,
-                     size_t *sg_1stindex, size_t *sg_1stskip)
-{
-    size_t vec;
-
-    for (vec = 0; vec < sg_size; vec++) {
-        if (sg[vec].iov_len > skip) {
-            *sg_1stskip = skip;
-            *sg_1stindex = vec;
-
-            return true;
-        }
-
-        skip -= sg[vec].iov_len;
-    }
-
-    *sg_1stindex = vec;
-    *sg_1stskip = 0;
-    return skip == 0;
-}
-
-/*
- * Copy from one iov to another, the given number of bytes
- * The caller must have checked sizes.
- */
-static void copy_iov(struct iovec *src_iov, int src_count,
-                     struct iovec *dst_iov, int dst_count, size_t to_copy)
-{
-    size_t dst_offset = 0;
-    /* Outer loop copies 'src' elements */
-    while (to_copy) {
-        assert(src_count);
-        size_t src_len = src_iov[0].iov_len;
-        size_t src_offset = 0;
-
-        if (src_len > to_copy) {
-            src_len = to_copy;
-        }
-        /* Inner loop copies contents of one 'src' to maybe multiple dst. */
-        while (src_len) {
-            assert(dst_count);
-            size_t dst_len = dst_iov[0].iov_len - dst_offset;
-            if (dst_len > src_len) {
-                dst_len = src_len;
-            }
-
-            memcpy(dst_iov[0].iov_base + dst_offset,
-                   src_iov[0].iov_base + src_offset, dst_len);
-            src_len -= dst_len;
-            to_copy -= dst_len;
-            src_offset += dst_len;
-            dst_offset += dst_len;
-
-            assert(dst_offset <= dst_iov[0].iov_len);
-            if (dst_offset == dst_iov[0].iov_len) {
-                dst_offset = 0;
-                dst_iov++;
-                dst_count--;
-            }
-        }
-        src_iov++;
-        src_count--;
-    }
-}
-
-/*
- * pthread_rwlock_rdlock() and pthread_rwlock_wrlock can fail if
- * a deadlock condition is detected or the current thread already
- * owns the lock. They can also fail, like pthread_rwlock_unlock(),
- * if the mutex wasn't properly initialized. None of these are ever
- * expected to happen.
- */
-static void vu_dispatch_rdlock(struct fv_VuDev *vud)
-{
-    int ret = pthread_rwlock_rdlock(&vud->vu_dispatch_rwlock);
-    assert(ret == 0);
-}
-
-static void vu_dispatch_wrlock(struct fv_VuDev *vud)
-{
-    int ret = pthread_rwlock_wrlock(&vud->vu_dispatch_rwlock);
-    assert(ret == 0);
-}
-
-static void vu_dispatch_unlock(struct fv_VuDev *vud)
-{
-    int ret = pthread_rwlock_unlock(&vud->vu_dispatch_rwlock);
-    assert(ret == 0);
-}
-
-static void vq_send_element(struct fv_QueueInfo *qi, VuVirtqElement *elem,
-                            ssize_t len)
-{
-    struct fuse_session *se = qi->virtio_dev->se;
-    VuDev *dev = &se->virtio_dev->dev;
-    VuVirtq *q = vu_get_queue(dev, qi->qidx);
-
-    vu_dispatch_rdlock(qi->virtio_dev);
-    pthread_mutex_lock(&qi->vq_lock);
-    vu_queue_push(dev, q, elem, len);
-    vu_queue_notify(dev, q);
-    pthread_mutex_unlock(&qi->vq_lock);
-    vu_dispatch_unlock(qi->virtio_dev);
-}
-
-/*
- * Called back by ll whenever it wants to send a reply/message back
- * The 1st element of the iov starts with the fuse_out_header
- * 'unique'==0 means it's a notify message.
- */
-int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
-                    struct iovec *iov, int count)
-{
-    FVRequest *req = container_of(ch, FVRequest, ch);
-    struct fv_QueueInfo *qi = ch->qi;
-    VuVirtqElement *elem = &req->elem;
-    int ret = 0;
-
-    assert(count >= 1);
-    assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
-
-    struct fuse_out_header *out = iov[0].iov_base;
-    /* TODO: Endianness! */
-
-    size_t tosend_len = iov_size(iov, count);
-
-    /* unique == 0 is notification, which we don't support */
-    assert(out->unique);
-    assert(!req->reply_sent);
-
-    /* The 'in' part of the elem is to qemu */
-    unsigned int in_num = elem->in_num;
-    struct iovec *in_sg = elem->in_sg;
-    size_t in_len = iov_size(in_sg, in_num);
-    fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
-             __func__, elem->index, in_num, in_len);
-
-    /*
-     * The elem should have room for a 'fuse_out_header' (out from fuse)
-     * plus the data based on the len in the header.
-     */
-    if (in_len < sizeof(struct fuse_out_header)) {
-        fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
-                 __func__, elem->index);
-        ret = -E2BIG;
-        goto err;
-    }
-    if (in_len < tosend_len) {
-        fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
-                 __func__, elem->index, tosend_len);
-        ret = -E2BIG;
-        goto err;
-    }
-
-    copy_iov(iov, count, in_sg, in_num, tosend_len);
-
-    vq_send_element(qi, elem, tosend_len);
-    req->reply_sent = true;
-
-err:
-    return ret;
-}
-
-/*
- * Callback from fuse_send_data_iov_* when it's virtio and the buffer
- * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK
- * We need send the iov and then the buffer.
- * Return 0 on success
- */
-int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
-                         struct iovec *iov, int count, struct fuse_bufvec *buf,
-                         size_t len)
-{
-    FVRequest *req = container_of(ch, FVRequest, ch);
-    struct fv_QueueInfo *qi = ch->qi;
-    VuVirtqElement *elem = &req->elem;
-    int ret = 0;
-    g_autofree struct iovec *in_sg_cpy = NULL;
-
-    assert(count >= 1);
-    assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
-
-    struct fuse_out_header *out = iov[0].iov_base;
-    /* TODO: Endianness! */
-
-    size_t iov_len = iov_size(iov, count);
-    size_t tosend_len = iov_len + len;
-
-    out->len = tosend_len;
-
-    fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__,
-             count, len, iov_len);
-
-    /* unique == 0 is notification which we don't support */
-    assert(out->unique);
-
-    assert(!req->reply_sent);
-
-    /* The 'in' part of the elem is to qemu */
-    unsigned int in_num = elem->in_num;
-    struct iovec *in_sg = elem->in_sg;
-    size_t in_len = iov_size(in_sg, in_num);
-    fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
-             __func__, elem->index, in_num, in_len);
-
-    /*
-     * The elem should have room for a 'fuse_out_header' (out from fuse)
-     * plus the data based on the len in the header.
-     */
-    if (in_len < sizeof(struct fuse_out_header)) {
-        fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
-                 __func__, elem->index);
-        return E2BIG;
-    }
-    if (in_len < tosend_len) {
-        fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
-                 __func__, elem->index, tosend_len);
-        return E2BIG;
-    }
-
-    /* TODO: Limit to 'len' */
-
-    /* First copy the header data from iov->in_sg */
-    copy_iov(iov, count, in_sg, in_num, iov_len);
-
-    /*
-     * Build a copy of the in_sg iov so we can skip bits in it,
-     * including changing the offsets
-     */
-    in_sg_cpy = g_new(struct iovec, in_num);
-    memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num);
-    /* These get updated as we skip */
-    struct iovec *in_sg_ptr = in_sg_cpy;
-    unsigned int in_sg_cpy_count = in_num;
-
-    /* skip over parts of in_sg that contained the header iov */
-    iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, iov_len);
-
-    do {
-        fuse_log(FUSE_LOG_DEBUG, "%s: in_sg_cpy_count=%d len remaining=%zd\n",
-                 __func__, in_sg_cpy_count, len);
-
-        ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count,
-                     buf->buf[0].pos);
-
-        if (ret == -1) {
-            ret = errno;
-            if (ret == EINTR) {
-                continue;
-            }
-            fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n",
-                     __func__, len);
-            return ret;
-        }
-
-        if (!ret) {
-            /* EOF case? */
-            fuse_log(FUSE_LOG_DEBUG, "%s: !ret len remaining=%zd\n", __func__,
-                     len);
-            break;
-        }
-        fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__,
-                 ret, len);
-
-        len -= ret;
-        /* Short read. Retry reading remaining bytes */
-        if (len) {
-            fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__);
-            /* Skip over this much next time around */
-            iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, ret);
-            buf->buf[0].pos += ret;
-        }
-    } while (len);
-
-    /* Need to fix out->len on EOF */
-    if (len) {
-        struct fuse_out_header *out_sg = in_sg[0].iov_base;
-
-        tosend_len -= len;
-        out_sg->len = tosend_len;
-    }
-
-    vq_send_element(qi, elem, tosend_len);
-    req->reply_sent = true;
-    return 0;
-}
-
-static __thread bool clone_fs_called;
-
-/* Process one FVRequest in a thread pool */
-static void fv_queue_worker(gpointer data, gpointer user_data)
-{
-    struct fv_QueueInfo *qi = user_data;
-    struct fuse_session *se = qi->virtio_dev->se;
-    FVRequest *req = data;
-    VuVirtqElement *elem = &req->elem;
-    struct fuse_buf fbuf = {};
-    bool allocated_bufv = false;
-    struct fuse_bufvec bufv;
-    struct fuse_bufvec *pbufv;
-    struct fuse_in_header inh;
-
-    assert(se->bufsize > sizeof(struct fuse_in_header));
-
-    if (!clone_fs_called) {
-        int ret;
-
-        /* unshare FS for xattr operation */
-        ret = unshare(CLONE_FS);
-        /* should not fail */
-        assert(ret == 0);
-
-        clone_fs_called = true;
-    }
-
-    /*
-     * An element contains one request and the space to send our response
-     * They're spread over multiple descriptors in a scatter/gather set
-     * and we can't trust the guest to keep them still; so copy in/out.
-     */
-    fbuf.mem = g_malloc(se->bufsize);
-
-    fuse_mutex_init(&req->ch.lock);
-    req->ch.fd = -1;
-    req->ch.qi = qi;
-
-    /* The 'out' part of the elem is from qemu */
-    unsigned int out_num = elem->out_num;
-    struct iovec *out_sg = elem->out_sg;
-    size_t out_len = iov_size(out_sg, out_num);
-    fuse_log(FUSE_LOG_DEBUG,
-             "%s: elem %d: with %d out desc of length %zd\n",
-             __func__, elem->index, out_num, out_len);
-
-    /*
-     * The elem should contain a 'fuse_in_header' (in to fuse)
-     * plus the data based on the len in the header.
-     */
-    if (out_len < sizeof(struct fuse_in_header)) {
-        fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n",
-                 __func__, elem->index);
-        assert(0); /* TODO */
-    }
-    if (out_len > se->bufsize) {
-        fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", __func__,
-                 elem->index);
-        assert(0); /* TODO */
-    }
-    /* Copy just the fuse_in_header and look at it */
-    copy_from_iov(&fbuf, out_num, out_sg,
-                  sizeof(struct fuse_in_header));
-    memcpy(&inh, fbuf.mem, sizeof(struct fuse_in_header));
-
-    pbufv = NULL; /* Compiler thinks an unitialised path */
-    if (inh.opcode == FUSE_WRITE &&
-        out_len >= (sizeof(struct fuse_in_header) +
-                    sizeof(struct fuse_write_in))) {
-        /*
-         * For a write we don't actually need to copy the
-         * data, we can just do it straight out of guest memory
-         * but we must still copy the headers in case the guest
-         * was nasty and changed them while we were using them.
-         */
-        fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
-
-        fbuf.size = copy_from_iov(&fbuf, out_num, out_sg,
-                                  sizeof(struct fuse_in_header) +
-                                  sizeof(struct fuse_write_in));
-        /* That copy reread the in_header, make sure we use the original */
-        memcpy(fbuf.mem, &inh, sizeof(struct fuse_in_header));
-
-        /* Allocate the bufv, with space for the rest of the iov */
-        pbufv = g_try_malloc(sizeof(struct fuse_bufvec) +
-                             sizeof(struct fuse_buf) * out_num);
-        if (!pbufv) {
-            fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
-                    __func__);
-            goto out;
-        }
-
-        allocated_bufv = true;
-        pbufv->count = 1;
-        pbufv->buf[0] = fbuf;
-
-        size_t iovindex, pbufvindex, iov_bytes_skip;
-        pbufvindex = 1; /* 2 headers, 1 fusebuf */
-
-        if (!skip_iov(out_sg, out_num,
-                      sizeof(struct fuse_in_header) +
-                      sizeof(struct fuse_write_in),
-                      &iovindex, &iov_bytes_skip)) {
-            fuse_log(FUSE_LOG_ERR, "%s: skip failed\n",
-                    __func__);
-            goto out;
-        }
-
-        for (; iovindex < out_num; iovindex++, pbufvindex++) {
-            pbufv->count++;
-            pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
-            pbufv->buf[pbufvindex].flags = 0;
-            pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
-            pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
-
-            if (iov_bytes_skip) {
-                pbufv->buf[pbufvindex].mem += iov_bytes_skip;
-                pbufv->buf[pbufvindex].size -= iov_bytes_skip;
-                iov_bytes_skip = 0;
-            }
-        }
-    } else {
-        /* Normal (non fast write) path */
-
-        copy_from_iov(&fbuf, out_num, out_sg, se->bufsize);
-        /* That copy reread the in_header, make sure we use the original */
-        memcpy(fbuf.mem, &inh, sizeof(struct fuse_in_header));
-        fbuf.size = out_len;
-
-        /* TODO! Endianness of header */
-
-        /* TODO: Add checks for fuse_session_exited */
-        bufv.buf[0] = fbuf;
-        bufv.count = 1;
-        pbufv = &bufv;
-    }
-    pbufv->idx = 0;
-    pbufv->off = 0;
-    fuse_session_process_buf_int(se, pbufv, &req->ch);
-
-out:
-    if (allocated_bufv) {
-        g_free(pbufv);
-    }
-
-    /* If the request has no reply, still recycle the virtqueue element */
-    if (!req->reply_sent) {
-        fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", __func__,
-                 elem->index);
-        vq_send_element(qi, elem, 0);
-    }
-
-    pthread_mutex_destroy(&req->ch.lock);
-    g_free(fbuf.mem);
-    free(req);
-}
-
-/* Thread function for individual queues, created when a queue is 'started' */
-static void *fv_queue_thread(void *opaque)
-{
-    struct fv_QueueInfo *qi = opaque;
-    struct VuDev *dev = &qi->virtio_dev->dev;
-    struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
-    struct fuse_session *se = qi->virtio_dev->se;
-    GThreadPool *pool = NULL;
-    GList *req_list = NULL;
-
-    if (se->thread_pool_size) {
-        fuse_log(FUSE_LOG_DEBUG, "%s: Creating thread pool for Queue %d\n",
-                 __func__, qi->qidx);
-        pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size,
-                                 FALSE, NULL);
-        if (!pool) {
-            fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__);
-            return NULL;
-        }
-    }
-
-    fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__,
-             qi->qidx, qi->kick_fd);
-    while (1) {
-        struct pollfd pf[2];
-
-        pf[0].fd = qi->kick_fd;
-        pf[0].events = POLLIN;
-        pf[0].revents = 0;
-        pf[1].fd = qi->kill_fd;
-        pf[1].events = POLLIN;
-        pf[1].revents = 0;
-
-        fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__,
-                 qi->qidx);
-        int poll_res = ppoll(pf, 2, NULL, NULL);
-
-        if (poll_res == -1) {
-            if (errno == EINTR) {
-                fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
-                         __func__);
-                continue;
-            }
-            fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n");
-            break;
-        }
-        assert(poll_res >= 1);
-        if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
-            fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n",
-                     __func__, pf[0].revents, qi->qidx);
-            break;
-        }
-        if (pf[1].revents & (POLLERR | POLLHUP | POLLNVAL)) {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Unexpected poll revents %x Queue %d killfd\n",
-                     __func__, pf[1].revents, qi->qidx);
-            break;
-        }
-        if (pf[1].revents) {
-            fuse_log(FUSE_LOG_INFO, "%s: kill event on queue %d - quitting\n",
-                     __func__, qi->qidx);
-            break;
-        }
-        assert(pf[0].revents & POLLIN);
-        fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__,
-                 qi->qidx);
-
-        eventfd_t evalue;
-        if (eventfd_read(qi->kick_fd, &evalue)) {
-            fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n");
-            break;
-        }
-        /* Mutual exclusion with virtio_loop() */
-        vu_dispatch_rdlock(qi->virtio_dev);
-        pthread_mutex_lock(&qi->vq_lock);
-        /* out is from guest, in is too guest */
-        unsigned int in_bytes, out_bytes;
-        vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0);
-
-        fuse_log(FUSE_LOG_DEBUG,
-                 "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n",
-                 __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
-
-        while (1) {
-            FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest));
-            if (!req) {
-                break;
-            }
-
-            req->reply_sent = false;
-
-            if (!se->thread_pool_size) {
-                req_list = g_list_prepend(req_list, req);
-            } else {
-                g_thread_pool_push(pool, req, NULL);
-            }
-        }
-
-        pthread_mutex_unlock(&qi->vq_lock);
-        vu_dispatch_unlock(qi->virtio_dev);
-
-        /* Process all the requests. */
-        if (!se->thread_pool_size && req_list != NULL) {
-            req_list = g_list_reverse(req_list);
-            g_list_foreach(req_list, fv_queue_worker, qi);
-            g_list_free(req_list);
-            req_list = NULL;
-        }
-    }
-
-    if (pool) {
-        g_thread_pool_free(pool, FALSE, TRUE);
-    }
-
-    return NULL;
-}
-
-static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx)
-{
-    int ret;
-    struct fv_QueueInfo *ourqi;
-
-    assert(qidx < vud->nqueues);
-    ourqi = vud->qi[qidx];
-
-    /* Kill the thread */
-    if (eventfd_write(ourqi->kill_fd, 1)) {
-        fuse_log(FUSE_LOG_ERR, "Eventfd_write for queue %d: %s\n",
-                 qidx, strerror(errno));
-    }
-    ret = pthread_join(ourqi->thread, NULL);
-    if (ret) {
-        fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n",
-                 __func__, qidx, ret);
-    }
-    pthread_mutex_destroy(&ourqi->vq_lock);
-    close(ourqi->kill_fd);
-    ourqi->kick_fd = -1;
-    g_free(vud->qi[qidx]);
-    vud->qi[qidx] = NULL;
-}
-
-static void stop_all_queues(struct fv_VuDev *vud)
-{
-    for (int i = 0; i < vud->nqueues; i++) {
-        if (!vud->qi[i]) {
-            continue;
-        }
-
-        fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i);
-        fv_queue_cleanup_thread(vud, i);
-    }
-}
-
-/* Callback from libvhost-user on start or stop of a queue */
-static void fv_queue_set_started(VuDev *dev, int qidx, bool started)
-{
-    struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev);
-    struct fv_QueueInfo *ourqi;
-
-    fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx,
-             started);
-    assert(qidx >= 0);
-
-    /*
-     * Ignore additional request queues for now.  passthrough_ll.c must be
-     * audited for thread-safety issues first.  It was written with a
-     * well-behaved client in mind and may not protect against all types of
-     * races yet.
-     */
-    if (qidx > 1) {
-        fuse_log(FUSE_LOG_ERR,
-                 "%s: multiple request queues not yet implemented, please only "
-                 "configure 1 request queue\n",
-                 __func__);
-        exit(EXIT_FAILURE);
-    }
-
-    if (started) {
-        /* Fire up a thread to watch this queue */
-        if (qidx >= vud->nqueues) {
-            vud->qi = g_realloc_n(vud->qi, qidx + 1, sizeof(vud->qi[0]));
-            memset(vud->qi + vud->nqueues, 0,
-                   sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues)));
-            vud->nqueues = qidx + 1;
-        }
-        if (!vud->qi[qidx]) {
-            vud->qi[qidx] = g_new0(struct fv_QueueInfo, 1);
-            vud->qi[qidx]->virtio_dev = vud;
-            vud->qi[qidx]->qidx = qidx;
-        } else {
-            /* Shouldn't have been started */
-            assert(vud->qi[qidx]->kick_fd == -1);
-        }
-        ourqi = vud->qi[qidx];
-        ourqi->kick_fd = dev->vq[qidx].kick_fd;
-
-        ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE);
-        assert(ourqi->kill_fd != -1);
-        pthread_mutex_init(&ourqi->vq_lock, NULL);
-
-        if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) {
-            fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n",
-                     __func__, qidx);
-            assert(0);
-        }
-    } else {
-        /*
-         * Temporarily drop write-lock taken in virtio_loop() so that
-         * the queue thread doesn't block in virtio_send_msg().
-         */
-        vu_dispatch_unlock(vud);
-        fv_queue_cleanup_thread(vud, qidx);
-        vu_dispatch_wrlock(vud);
-    }
-}
-
-static bool fv_queue_order(VuDev *dev, int qidx)
-{
-    return false;
-}
-
-static const VuDevIface fv_iface = {
-    .get_features = fv_get_features,
-    .set_features = fv_set_features,
-
-    /* Don't need process message, we've not got any at vhost-user level */
-    .queue_set_started = fv_queue_set_started,
-
-    .queue_is_processed_in_order = fv_queue_order,
-};
-
-/*
- * Main loop; this mostly deals with events on the vhost-user
- * socket itself, and not actual fuse data.
- */
-int virtio_loop(struct fuse_session *se)
-{
-    fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__);
-
-    while (!fuse_session_exited(se)) {
-        struct pollfd pf[1];
-        bool ok;
-        pf[0].fd = se->vu_socketfd;
-        pf[0].events = POLLIN;
-        pf[0].revents = 0;
-
-        fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__);
-        int poll_res = ppoll(pf, 1, NULL, NULL);
-
-        if (poll_res == -1) {
-            if (errno == EINTR) {
-                fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
-                         __func__);
-                continue;
-            }
-            fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n");
-            break;
-        }
-        assert(poll_res == 1);
-        if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
-            fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__,
-                     pf[0].revents);
-            break;
-        }
-        assert(pf[0].revents & POLLIN);
-        fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__);
-        /* Mutual exclusion with fv_queue_thread() */
-        vu_dispatch_wrlock(se->virtio_dev);
-
-        ok = vu_dispatch(&se->virtio_dev->dev);
-
-        vu_dispatch_unlock(se->virtio_dev);
-
-        if (!ok) {
-            fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__);
-            break;
-        }
-    }
-
-    /*
-     * Make sure all fv_queue_thread()s quit on exit, as we're about to
-     * free virtio dev and fuse session, no one should access them anymore.
-     */
-    stop_all_queues(se->virtio_dev);
-    fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__);
-
-    return 0;
-}
-
-static void strreplace(char *s, char old, char new)
-{
-    for (; *s; ++s) {
-        if (*s == old) {
-            *s = new;
-        }
-    }
-}
-
-static bool fv_socket_lock(struct fuse_session *se)
-{
-    g_autofree gchar *sk_name = NULL;
-    g_autofree gchar *pidfile = NULL;
-    g_autofree gchar *state = NULL;
-    g_autofree gchar *dir = NULL;
-    Error *local_err = NULL;
-
-    state = qemu_get_local_state_dir();
-    dir = g_build_filename(state, "run", "virtiofsd", NULL);
-
-    if (g_mkdir_with_parents(dir, S_IRWXU) < 0) {
-        fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s\n",
-                 __func__, dir, strerror(errno));
-        return false;
-    }
-
-    sk_name = g_strdup(se->vu_socket_path);
-    strreplace(sk_name, '/', '.');
-    pidfile = g_strdup_printf("%s/%s.pid", dir, sk_name);
-
-    if (!qemu_write_pidfile(pidfile, &local_err)) {
-        error_report_err(local_err);
-        return false;
-    }
-
-    return true;
-}
-
-static int fv_create_listen_socket(struct fuse_session *se)
-{
-    struct sockaddr_un un;
-    mode_t old_umask;
-
-    /* Nothing to do if fd is already initialized */
-    if (se->vu_listen_fd >= 0) {
-        return 0;
-    }
-
-    if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) {
-        fuse_log(FUSE_LOG_ERR, "Socket path too long\n");
-        return -1;
-    }
-
-    if (!strlen(se->vu_socket_path)) {
-        fuse_log(FUSE_LOG_ERR, "Socket path is empty\n");
-        return -1;
-    }
-
-    /* Check the vu_socket_path is already used */
-    if (!fv_socket_lock(se)) {
-        return -1;
-    }
-
-    /*
-     * Create the Unix socket to communicate with qemu
-     * based on QEMU's vhost-user-bridge
-     */
-    unlink(se->vu_socket_path);
-    strcpy(un.sun_path, se->vu_socket_path);
-    size_t addr_len = sizeof(un);
-
-    int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0);
-    if (listen_sock == -1) {
-        fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n");
-        return -1;
-    }
-    un.sun_family = AF_UNIX;
-
-    /*
-     * Unfortunately bind doesn't let you set the mask on the socket,
-     * so set umask appropriately and restore it later.
-     */
-    if (se->vu_socket_group) {
-        old_umask = umask(S_IROTH | S_IWOTH | S_IXOTH);
-    } else {
-        old_umask = umask(S_IRGRP | S_IWGRP | S_IXGRP |
-                          S_IROTH | S_IWOTH | S_IXOTH);
-    }
-    if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) {
-        fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n");
-        close(listen_sock);
-        umask(old_umask);
-        return -1;
-    }
-    if (se->vu_socket_group) {
-        struct group *g = getgrnam(se->vu_socket_group);
-        if (g) {
-            if (chown(se->vu_socket_path, -1, g->gr_gid) == -1) {
-                fuse_log(FUSE_LOG_WARNING,
-                         "vhost socket failed to set group to %s (%d): %m\n",
-                         se->vu_socket_group, g->gr_gid);
-            }
-        } else {
-            fuse_log(FUSE_LOG_ERR,
-                     "vhost socket: unable to find group '%s'\n",
-                     se->vu_socket_group);
-            close(listen_sock);
-            umask(old_umask);
-            return -1;
-        }
-    }
-    umask(old_umask);
-
-    if (listen(listen_sock, 1) == -1) {
-        fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n");
-        close(listen_sock);
-        return -1;
-    }
-
-    se->vu_listen_fd = listen_sock;
-    return 0;
-}
-
-int virtio_session_mount(struct fuse_session *se)
-{
-    int ret;
-
-    /*
-     * Test that unshare(CLONE_FS) works. fv_queue_worker() will need it. It's
-     * an unprivileged system call but some Docker/Moby versions are known to
-     * reject it via seccomp when CAP_SYS_ADMIN is not given.
-     *
-     * Note that the program is single-threaded here so this syscall has no
-     * visible effect and is safe to make.
-     */
-    ret = unshare(CLONE_FS);
-    if (ret == -1 && errno == EPERM) {
-        fuse_log(FUSE_LOG_ERR, "unshare(CLONE_FS) failed with EPERM. If "
-                "running in a container please check that the container "
-                "runtime seccomp policy allows unshare.\n");
-        return -1;
-    }
-
-    ret = fv_create_listen_socket(se);
-    if (ret < 0) {
-        return ret;
-    }
-
-    se->fd = -1;
-
-    fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n",
-             __func__);
-    int data_sock = accept(se->vu_listen_fd, NULL, NULL);
-    if (data_sock == -1) {
-        fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n");
-        close(se->vu_listen_fd);
-        return -1;
-    }
-    close(se->vu_listen_fd);
-    se->vu_listen_fd = -1;
-    fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n",
-             __func__);
-
-    /* TODO: Some cleanup/deallocation! */
-    se->virtio_dev = g_new0(struct fv_VuDev, 1);
-
-    se->vu_socketfd = data_sock;
-    se->virtio_dev->se = se;
-    pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL);
-    if (!vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, NULL,
-                 fv_set_watch, fv_remove_watch, &fv_iface)) {
-        fuse_log(FUSE_LOG_ERR, "%s: vu_init failed\n", __func__);
-        return -1;
-    }
-
-    return 0;
-}
-
-void virtio_session_close(struct fuse_session *se)
-{
-    close(se->vu_socketfd);
-
-    if (!se->virtio_dev) {
-        return;
-    }
-
-    g_free(se->virtio_dev->qi);
-    pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock);
-    g_free(se->virtio_dev);
-    se->virtio_dev = NULL;
-}
diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h
deleted file mode 100644 (file)
index 1116840..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * virtio-fs glue for FUSE
- * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- *   Dave Gilbert  <dgilbert@redhat.com>
- *
- * Implements the glue between libfuse and libvhost-user
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- *  See the file COPYING.LIB
- */
-
-#ifndef FUSE_VIRTIO_H
-#define FUSE_VIRTIO_H
-
-#include "fuse_i.h"
-
-struct fuse_session;
-
-int virtio_session_mount(struct fuse_session *se);
-void virtio_session_close(struct fuse_session *se);
-int virtio_loop(struct fuse_session *se);
-
-
-int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
-                    struct iovec *iov, int count);
-
-int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
-                         struct iovec *iov, int count,
-                         struct fuse_bufvec *buf, size_t len);
-
-#endif
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
deleted file mode 100644 (file)
index f5f66f2..0000000
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * Helper functions to create (simple) standalone programs. With the
- * aid of these functions it should be possible to create full FUSE
- * file system by implementing nothing but the request handlers.
-
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB.
- */
-
-#include "qemu/osdep.h"
-#include "fuse_i.h"
-#include "fuse_lowlevel.h"
-#include "fuse_misc.h"
-#include "fuse_opt.h"
-
-#include <sys/param.h>
-#include <sys/resource.h>
-
-#define FUSE_HELPER_OPT(t, p)                       \
-    {                                               \
-        t, offsetof(struct fuse_cmdline_opts, p), 1 \
-    }
-#define FUSE_HELPER_OPT_VALUE(t, p, v)              \
-    {                                               \
-        t, offsetof(struct fuse_cmdline_opts, p), v \
-    }
-
-static const struct fuse_opt fuse_helper_opts[] = {
-    FUSE_HELPER_OPT("-h", show_help),
-    FUSE_HELPER_OPT("--help", show_help),
-    FUSE_HELPER_OPT("-V", show_version),
-    FUSE_HELPER_OPT("--version", show_version),
-    FUSE_HELPER_OPT("--print-capabilities", print_capabilities),
-    FUSE_HELPER_OPT("-d", debug),
-    FUSE_HELPER_OPT("debug", debug),
-    FUSE_HELPER_OPT("-d", foreground),
-    FUSE_HELPER_OPT("debug", foreground),
-    FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP),
-    FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP),
-    FUSE_HELPER_OPT("-f", foreground),
-    FUSE_HELPER_OPT_VALUE("--daemonize", foreground, 0),
-    FUSE_HELPER_OPT("fsname=", nodefault_subtype),
-    FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP),
-    FUSE_HELPER_OPT("subtype=", nodefault_subtype),
-    FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP),
-    FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads),
-    FUSE_HELPER_OPT("--rlimit-nofile=%lu", rlimit_nofile),
-    FUSE_HELPER_OPT("--syslog", syslog),
-    FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG),
-    FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO),
-    FUSE_HELPER_OPT_VALUE("log_level=warn", log_level, FUSE_LOG_WARNING),
-    FUSE_HELPER_OPT_VALUE("log_level=err", log_level, FUSE_LOG_ERR),
-    FUSE_OPT_END
-};
-
-struct fuse_conn_info_opts {
-    int atomic_o_trunc;
-    int no_remote_posix_lock;
-    int no_remote_flock;
-    int splice_write;
-    int splice_move;
-    int splice_read;
-    int no_splice_write;
-    int no_splice_move;
-    int no_splice_read;
-    int auto_inval_data;
-    int no_auto_inval_data;
-    int no_readdirplus;
-    int no_readdirplus_auto;
-    int async_dio;
-    int no_async_dio;
-    int writeback_cache;
-    int no_writeback_cache;
-    int async_read;
-    int sync_read;
-    unsigned max_write;
-    unsigned max_readahead;
-    unsigned max_background;
-    unsigned congestion_threshold;
-    unsigned time_gran;
-    int set_max_write;
-    int set_max_readahead;
-    int set_max_background;
-    int set_congestion_threshold;
-    int set_time_gran;
-};
-
-#define CONN_OPTION(t, p, v)                          \
-    {                                                 \
-        t, offsetof(struct fuse_conn_info_opts, p), v \
-    }
-static const struct fuse_opt conn_info_opt_spec[] = {
-    CONN_OPTION("max_write=%u", max_write, 0),
-    CONN_OPTION("max_write=", set_max_write, 1),
-    CONN_OPTION("max_readahead=%u", max_readahead, 0),
-    CONN_OPTION("max_readahead=", set_max_readahead, 1),
-    CONN_OPTION("max_background=%u", max_background, 0),
-    CONN_OPTION("max_background=", set_max_background, 1),
-    CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0),
-    CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1),
-    CONN_OPTION("sync_read", sync_read, 1),
-    CONN_OPTION("async_read", async_read, 1),
-    CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1),
-    CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1),
-    CONN_OPTION("no_remote_lock", no_remote_flock, 1),
-    CONN_OPTION("no_remote_flock", no_remote_flock, 1),
-    CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1),
-    CONN_OPTION("splice_write", splice_write, 1),
-    CONN_OPTION("no_splice_write", no_splice_write, 1),
-    CONN_OPTION("splice_move", splice_move, 1),
-    CONN_OPTION("no_splice_move", no_splice_move, 1),
-    CONN_OPTION("splice_read", splice_read, 1),
-    CONN_OPTION("no_splice_read", no_splice_read, 1),
-    CONN_OPTION("auto_inval_data", auto_inval_data, 1),
-    CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1),
-    CONN_OPTION("readdirplus=no", no_readdirplus, 1),
-    CONN_OPTION("readdirplus=yes", no_readdirplus, 0),
-    CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1),
-    CONN_OPTION("readdirplus=auto", no_readdirplus, 0),
-    CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0),
-    CONN_OPTION("async_dio", async_dio, 1),
-    CONN_OPTION("no_async_dio", no_async_dio, 1),
-    CONN_OPTION("writeback_cache", writeback_cache, 1),
-    CONN_OPTION("no_writeback_cache", no_writeback_cache, 1),
-    CONN_OPTION("time_gran=%u", time_gran, 0),
-    CONN_OPTION("time_gran=", set_time_gran, 1),
-    FUSE_OPT_END
-};
-
-
-void fuse_cmdline_help(void)
-{
-    printf("    -h   --help                print help\n"
-           "    -V   --version             print version\n"
-           "    --print-capabilities       print vhost-user.json\n"
-           "    -d   -o debug              enable debug output (implies -f)\n"
-           "    --syslog                   log to syslog (default stderr)\n"
-           "    -f                         foreground operation\n"
-           "    --daemonize                run in background\n"
-           "    -o cache=<mode>            cache mode. could be one of \"auto, "
-           "always, none\"\n"
-           "                               default: auto\n"
-           "    -o flock|no_flock          enable/disable flock\n"
-           "                               default: no_flock\n"
-           "    -o log_level=<level>       log level, default to \"info\"\n"
-           "                               level could be one of \"debug, "
-           "info, warn, err\"\n"
-           "    -o max_idle_threads        the maximum number of idle worker "
-           "threads\n"
-           "                               allowed (default: 10)\n"
-           "    -o posix_lock|no_posix_lock\n"
-           "                               enable/disable remote posix lock\n"
-           "                               default: no_posix_lock\n"
-           "    -o readdirplus|no_readdirplus\n"
-           "                               enable/disable readirplus\n"
-           "                               default: readdirplus except with "
-           "cache=none\n"
-           "    -o sandbox=namespace|chroot\n"
-           "                               sandboxing mode:\n"
-           "                               - namespace: mount, pid, and net\n"
-           "                                 namespaces with pivot_root(2)\n"
-           "                                 into shared directory\n"
-           "                               - chroot: chroot(2) into shared\n"
-           "                                 directory (use in containers)\n"
-           "                               default: namespace\n"
-           "    -o timeout=<number>        I/O timeout (seconds)\n"
-           "                               default: depends on cache= option.\n"
-           "    -o writeback|no_writeback  enable/disable writeback cache\n"
-           "                               default: no_writeback\n"
-           "    -o xattr|no_xattr          enable/disable xattr\n"
-           "                               default: no_xattr\n"
-           "    -o xattrmap=<mapping>      Enable xattr mapping (enables xattr)\n"
-           "                               <mapping> is a string consists of a series of rules\n"
-           "                               e.g. -o xattrmap=:map::user.virtiofs.:\n"
-           "    -o modcaps=CAPLIST         Modify the list of capabilities\n"
-           "                               e.g. -o modcaps=+sys_admin:-chown\n"
-           "    --rlimit-nofile=<num>      set maximum number of file descriptors\n"
-           "                               (0 leaves rlimit unchanged)\n"
-           "                               default: min(1000000, fs.file-max - 16384)\n"
-           "                                        if the current rlimit is lower\n"
-           "    -o allow_direct_io|no_allow_direct_io\n"
-           "                               retain/discard O_DIRECT flags passed down\n"
-           "                               to virtiofsd from guest applications.\n"
-           "                               default: no_allow_direct_io\n"
-           "    -o announce_submounts      Announce sub-mount points to the guest\n"
-           "    -o posix_acl/no_posix_acl  Enable/Disable posix_acl. (default: disabled)\n"
-           "    -o security_label/no_security_label  Enable/Disable security label. (default: disabled)\n"
-           "    -o killpriv_v2/no_killpriv_v2\n"
-           "                               Enable/Disable FUSE_HANDLE_KILLPRIV_V2.\n"
-           "                               (default: enabled as long as client supports it)\n"
-           );
-}
-
-static int fuse_helper_opt_proc(void *data, const char *arg, int key,
-                                struct fuse_args *outargs)
-{
-    (void)data;
-    (void)outargs;
-
-    switch (key) {
-    case FUSE_OPT_KEY_NONOPT:
-        fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg);
-        return -1;
-
-    default:
-        /* Pass through unknown options */
-        return 1;
-    }
-}
-
-static unsigned long get_default_rlimit_nofile(void)
-{
-    g_autofree gchar *file_max_str = NULL;
-    const rlim_t reserved_fds = 16384; /* leave at least this many fds free */
-    rlim_t max_fds = 1000000; /* our default RLIMIT_NOFILE target */
-    rlim_t file_max;
-    struct rlimit rlim;
-
-    /*
-     * Reduce max_fds below the system-wide maximum, if necessary.  This
-     * ensures there are fds available for other processes so we don't
-     * cause resource exhaustion.
-     */
-    if (!g_file_get_contents("/proc/sys/fs/file-max", &file_max_str,
-                             NULL, NULL)) {
-        fuse_log(FUSE_LOG_ERR, "can't read /proc/sys/fs/file-max\n");
-        exit(1);
-    }
-    file_max = g_ascii_strtoull(file_max_str, NULL, 10);
-    if (file_max < 2 * reserved_fds) {
-        fuse_log(FUSE_LOG_ERR,
-                 "The fs.file-max sysctl is too low (%lu) to allow a "
-                 "reasonable number of open files.\n",
-                 (unsigned long)file_max);
-        exit(1);
-    }
-    max_fds = MIN(file_max - reserved_fds, max_fds);
-
-    if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
-        fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n");
-        exit(1);
-    }
-
-    if (rlim.rlim_cur >= max_fds) {
-        return 0; /* we have more fds available than required! */
-    }
-    return max_fds;
-}
-
-int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts)
-{
-    memset(opts, 0, sizeof(struct fuse_cmdline_opts));
-
-    opts->max_idle_threads = 10;
-    opts->rlimit_nofile = get_default_rlimit_nofile();
-    opts->foreground = 1;
-
-    if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) ==
-        -1) {
-        return -1;
-    }
-
-    return 0;
-}
-
-
-int fuse_daemonize(int foreground)
-{
-    int ret = 0, rett;
-    if (!foreground) {
-        int nullfd;
-        int waiter[2];
-        char completed;
-
-        if (!g_unix_open_pipe(waiter, FD_CLOEXEC, NULL)) {
-            fuse_log(FUSE_LOG_ERR, "fuse_daemonize: pipe: %s\n",
-                     strerror(errno));
-            return -1;
-        }
-
-        /*
-         * demonize current process by forking it and killing the
-         * parent.  This makes current process as a child of 'init'.
-         */
-        switch (fork()) {
-        case -1:
-            fuse_log(FUSE_LOG_ERR, "fuse_daemonize: fork: %s\n",
-                     strerror(errno));
-            return -1;
-        case 0:
-            break;
-        default:
-            _exit(read(waiter[0], &completed,
-                       sizeof(completed) != sizeof(completed)));
-        }
-
-        if (setsid() == -1) {
-            fuse_log(FUSE_LOG_ERR, "fuse_daemonize: setsid: %s\n",
-                     strerror(errno));
-            return -1;
-        }
-
-        ret = chdir("/");
-
-        nullfd = open("/dev/null", O_RDWR, 0);
-        if (nullfd != -1) {
-            rett = dup2(nullfd, 0);
-            if (!ret) {
-                ret = rett;
-            }
-            rett = dup2(nullfd, 1);
-            if (!ret) {
-                ret = rett;
-            }
-            rett = dup2(nullfd, 2);
-            if (!ret) {
-                ret = rett;
-            }
-            if (nullfd > 2) {
-                close(nullfd);
-            }
-        }
-
-        /* Propagate completion of daemon initialization */
-        completed = 1;
-        rett = write(waiter[1], &completed, sizeof(completed));
-        if (!ret) {
-            ret = rett;
-        }
-        close(waiter[0]);
-        close(waiter[1]);
-    } else {
-        ret = chdir("/");
-    }
-    return ret;
-}
-
-void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts,
-                               struct fuse_conn_info *conn)
-{
-    if (opts->set_max_write) {
-        conn->max_write = opts->max_write;
-    }
-    if (opts->set_max_background) {
-        conn->max_background = opts->max_background;
-    }
-    if (opts->set_congestion_threshold) {
-        conn->congestion_threshold = opts->congestion_threshold;
-    }
-    if (opts->set_time_gran) {
-        conn->time_gran = opts->time_gran;
-    }
-    if (opts->set_max_readahead) {
-        conn->max_readahead = opts->max_readahead;
-    }
-
-#define LL_ENABLE(cond, cap) \
-    if (cond)                \
-        conn->want |= (cap)
-#define LL_DISABLE(cond, cap) \
-    if (cond)                 \
-        conn->want &= ~(cap)
-
-    LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ);
-    LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ);
-
-    LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE);
-    LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE);
-
-    LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE);
-    LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE);
-
-    LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA);
-    LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA);
-
-    LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS);
-    LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO);
-
-    LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO);
-    LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO);
-
-    LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE);
-    LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE);
-
-    LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ);
-    LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ);
-
-    LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS);
-    LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS);
-}
-
-struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args)
-{
-    struct fuse_conn_info_opts *opts;
-
-    opts = calloc(1, sizeof(struct fuse_conn_info_opts));
-    if (opts == NULL) {
-        fuse_log(FUSE_LOG_ERR, "calloc failed\n");
-        return NULL;
-    }
-    if (fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) {
-        free(opts);
-        return NULL;
-    }
-    return opts;
-}
diff --git a/tools/virtiofsd/meson.build b/tools/virtiofsd/meson.build
deleted file mode 100644 (file)
index c134ba6..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-executable('virtiofsd', files(
-  'buffer.c',
-  'fuse_opt.c',
-  'fuse_log.c',
-  'fuse_lowlevel.c',
-  'fuse_signals.c',
-  'fuse_virtio.c',
-  'helper.c',
-  'passthrough_ll.c',
-  'passthrough_seccomp.c'),
-  dependencies: [seccomp, qemuutil, libcap_ng, vhost_user],
-  install: true,
-  install_dir: get_option('libexecdir'))
-
-configure_file(input: '50-qemu-virtiofsd.json.in',
-               output: '50-qemu-virtiofsd.json',
-               configuration: { 'libexecdir' : get_option('prefix') / get_option('libexecdir') },
-               install_dir: qemu_datadir / 'vhost-user')
diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h
deleted file mode 100644 (file)
index 0b98275..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE
- */
-
-/*
- * Creates files on the underlying file system in response to a FUSE_MKNOD
- * operation
- */
-static int mknod_wrapper(int dirfd, const char *path, const char *link,
-                         int mode, dev_t rdev)
-{
-    int res;
-
-    if (S_ISREG(mode)) {
-        res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode);
-        if (res >= 0) {
-            res = close(res);
-        }
-    } else if (S_ISDIR(mode)) {
-        res = mkdirat(dirfd, path, mode);
-    } else if (S_ISLNK(mode) && link != NULL) {
-        res = symlinkat(link, dirfd, path);
-    } else if (S_ISFIFO(mode)) {
-        res = mkfifoat(dirfd, path, mode);
-    } else {
-        res = mknodat(dirfd, path, mode, rdev);
-    }
-
-    return res;
-}
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
deleted file mode 100644 (file)
index 40ea2ed..0000000
+++ /dev/null
@@ -1,4521 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * This program can be distributed under the terms of the GNU GPLv2.
- * See the file COPYING.
- */
-
-/*
- *
- * This file system mirrors the existing file system hierarchy of the
- * system, starting at the root file system. This is implemented by
- * just "passing through" all requests to the corresponding user-space
- * libc functions. In contrast to passthrough.c and passthrough_fh.c,
- * this implementation uses the low-level API. Its performance should
- * be the least bad among the three, but many operations are not
- * implemented. In particular, it is not possible to remove files (or
- * directories) because the code necessary to defer actual removal
- * until the file is not opened anymore would make the example much
- * more complicated.
- *
- * When writeback caching is enabled (-o writeback mount option), it
- * is only possible to write to files for which the mounting user has
- * read permissions. This is because the writeback cache requires the
- * kernel to be able to issue read requests for all files (which the
- * passthrough filesystem cannot satisfy if it can't read the file in
- * the underlying filesystem).
- *
- * Compile with:
- *
- *     gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o
- * passthrough_ll
- *
- * ## Source code ##
- * \include passthrough_ll.c
- */
-
-#include "qemu/osdep.h"
-#include "qemu/timer.h"
-#include "qemu-version.h"
-#include "qemu/help-texts.h"
-#include "fuse_virtio.h"
-#include "fuse_log.h"
-#include "fuse_lowlevel.h"
-#include "standard-headers/linux/fuse.h"
-#include <cap-ng.h>
-#include <dirent.h>
-#include <pthread.h>
-#include <sys/file.h>
-#include <sys/mount.h>
-#include <sys/prctl.h>
-#include <sys/resource.h>
-#include <sys/syscall.h>
-#include <sys/wait.h>
-#include <sys/xattr.h>
-#include <syslog.h>
-#include <grp.h>
-
-#include "qemu/cutils.h"
-#include "passthrough_helpers.h"
-#include "passthrough_seccomp.h"
-
-/* Keep track of inode posix locks for each owner. */
-struct lo_inode_plock {
-    uint64_t lock_owner;
-    int fd; /* fd for OFD locks */
-};
-
-struct lo_map_elem {
-    union {
-        struct lo_inode *inode;
-        struct lo_dirp *dirp;
-        int fd;
-        ssize_t freelist;
-    };
-    bool in_use;
-};
-
-/* Maps FUSE fh or ino values to internal objects */
-struct lo_map {
-    struct lo_map_elem *elems;
-    size_t nelems;
-    ssize_t freelist;
-};
-
-struct lo_key {
-    ino_t ino;
-    dev_t dev;
-    uint64_t mnt_id;
-};
-
-struct lo_inode {
-    int fd;
-
-    /*
-     * Atomic reference count for this object.  The nlookup field holds a
-     * reference and release it when nlookup reaches 0.
-     */
-    gint refcount;
-
-    struct lo_key key;
-
-    /*
-     * This counter keeps the inode alive during the FUSE session.
-     * Incremented when the FUSE inode number is sent in a reply
-     * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc).  Decremented when an inode is
-     * released by a FUSE_FORGET request.
-     *
-     * Note that this value is untrusted because the client can manipulate
-     * it arbitrarily using FUSE_FORGET requests.
-     *
-     * Protected by lo->mutex.
-     */
-    uint64_t nlookup;
-
-    fuse_ino_t fuse_ino;
-    pthread_mutex_t plock_mutex;
-    GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
-
-    mode_t filetype;
-};
-
-struct lo_cred {
-    uid_t euid;
-    gid_t egid;
-    mode_t umask;
-};
-
-enum {
-    CACHE_NONE,
-    CACHE_AUTO,
-    CACHE_ALWAYS,
-};
-
-enum {
-    SANDBOX_NAMESPACE,
-    SANDBOX_CHROOT,
-};
-
-typedef struct xattr_map_entry {
-    char *key;
-    char *prepend;
-    unsigned int flags;
-} XattrMapEntry;
-
-struct lo_data {
-    pthread_mutex_t mutex;
-    int sandbox;
-    int debug;
-    int writeback;
-    int flock;
-    int posix_lock;
-    int xattr;
-    char *xattrmap;
-    char *xattr_security_capability;
-    char *source;
-    char *modcaps;
-    double timeout;
-    int cache;
-    int timeout_set;
-    int readdirplus_set;
-    int readdirplus_clear;
-    int allow_direct_io;
-    int announce_submounts;
-    bool use_statx;
-    struct lo_inode root;
-    GHashTable *inodes; /* protected by lo->mutex */
-    struct lo_map ino_map; /* protected by lo->mutex */
-    struct lo_map dirp_map; /* protected by lo->mutex */
-    struct lo_map fd_map; /* protected by lo->mutex */
-    XattrMapEntry *xattr_map_list;
-    size_t xattr_map_nentries;
-
-    /* An O_PATH file descriptor to /proc/self/fd/ */
-    int proc_self_fd;
-    /* An O_PATH file descriptor to /proc/self/task/ */
-    int proc_self_task;
-    int user_killpriv_v2, killpriv_v2;
-    /* If set, virtiofsd is responsible for setting umask during creation */
-    bool change_umask;
-    int user_posix_acl, posix_acl;
-    /* Keeps track if /proc/<pid>/attr/fscreate should be used or not */
-    bool use_fscreate;
-    int user_security_label;
-};
-
-static const struct fuse_opt lo_opts[] = {
-    { "sandbox=namespace",
-      offsetof(struct lo_data, sandbox),
-      SANDBOX_NAMESPACE },
-    { "sandbox=chroot",
-      offsetof(struct lo_data, sandbox),
-      SANDBOX_CHROOT },
-    { "writeback", offsetof(struct lo_data, writeback), 1 },
-    { "no_writeback", offsetof(struct lo_data, writeback), 0 },
-    { "source=%s", offsetof(struct lo_data, source), 0 },
-    { "flock", offsetof(struct lo_data, flock), 1 },
-    { "no_flock", offsetof(struct lo_data, flock), 0 },
-    { "posix_lock", offsetof(struct lo_data, posix_lock), 1 },
-    { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 },
-    { "xattr", offsetof(struct lo_data, xattr), 1 },
-    { "no_xattr", offsetof(struct lo_data, xattr), 0 },
-    { "xattrmap=%s", offsetof(struct lo_data, xattrmap), 0 },
-    { "modcaps=%s", offsetof(struct lo_data, modcaps), 0 },
-    { "timeout=%lf", offsetof(struct lo_data, timeout), 0 },
-    { "timeout=", offsetof(struct lo_data, timeout_set), 1 },
-    { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE },
-    { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO },
-    { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
-    { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 },
-    { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 },
-    { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 },
-    { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 },
-    { "announce_submounts", offsetof(struct lo_data, announce_submounts), 1 },
-    { "killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 1 },
-    { "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 },
-    { "posix_acl", offsetof(struct lo_data, user_posix_acl), 1 },
-    { "no_posix_acl", offsetof(struct lo_data, user_posix_acl), 0 },
-    { "security_label", offsetof(struct lo_data, user_security_label), 1 },
-    { "no_security_label", offsetof(struct lo_data, user_security_label), 0 },
-    FUSE_OPT_END
-};
-static bool use_syslog = false;
-static int current_log_level;
-static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
-                                 uint64_t n);
-
-static struct {
-    pthread_mutex_t mutex;
-    void *saved;
-} cap;
-/* That we loaded cap-ng in the current thread from the saved */
-static __thread bool cap_loaded = 0;
-
-static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st,
-                                uint64_t mnt_id);
-static int xattr_map_client(const struct lo_data *lo, const char *client_name,
-                            char **out_name);
-
-#define FCHDIR_NOFAIL(fd) do {                         \
-        int fchdir_res = fchdir(fd);                   \
-        assert(fchdir_res == 0);                       \
-    } while (0)
-
-static bool is_dot_or_dotdot(const char *name)
-{
-    return name[0] == '.' &&
-           (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'));
-}
-
-/* Is `path` a single path component that is not "." or ".."? */
-static bool is_safe_path_component(const char *path)
-{
-    if (strchr(path, '/')) {
-        return false;
-    }
-
-    return !is_dot_or_dotdot(path);
-}
-
-static bool is_empty(const char *name)
-{
-    return name[0] == '\0';
-}
-
-static struct lo_data *lo_data(fuse_req_t req)
-{
-    return (struct lo_data *)fuse_req_userdata(req);
-}
-
-/*
- * Tries to figure out if /proc/<pid>/attr/fscreate is usable or not. With
- * selinux=0, read from fscreate returns -EINVAL.
- *
- * TODO: Link with libselinux and use is_selinux_enabled() instead down
- * the line. It probably will be more reliable indicator.
- */
-static bool is_fscreate_usable(struct lo_data *lo)
-{
-    char procname[64];
-    int fscreate_fd;
-    size_t bytes_read;
-
-    sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid));
-    fscreate_fd = openat(lo->proc_self_task, procname, O_RDWR);
-    if (fscreate_fd == -1) {
-        return false;
-    }
-
-    bytes_read = read(fscreate_fd, procname, 64);
-    close(fscreate_fd);
-    if (bytes_read == -1) {
-        return false;
-    }
-    return true;
-}
-
-/* Helpers to set/reset fscreate */
-static int open_set_proc_fscreate(struct lo_data *lo, const void *ctx,
-                                  size_t ctxlen, int *fd)
-{
-    char procname[64];
-    int fscreate_fd, err = 0;
-    size_t written;
-
-    sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid));
-    fscreate_fd = openat(lo->proc_self_task, procname, O_WRONLY);
-    err = fscreate_fd == -1 ? errno : 0;
-    if (err) {
-        return err;
-    }
-
-    written = write(fscreate_fd, ctx, ctxlen);
-    err = written == -1 ? errno : 0;
-    if (err) {
-        goto out;
-    }
-
-    *fd = fscreate_fd;
-    return 0;
-out:
-    close(fscreate_fd);
-    return err;
-}
-
-static void close_reset_proc_fscreate(int fd)
-{
-    if ((write(fd, NULL, 0)) == -1) {
-        fuse_log(FUSE_LOG_WARNING, "Failed to reset fscreate. err=%d\n", errno);
-    }
-    close(fd);
-    return;
-}
-
-/*
- * Load capng's state from our saved state if the current thread
- * hadn't previously been loaded.
- * returns 0 on success
- */
-static int load_capng(void)
-{
-    if (!cap_loaded) {
-        pthread_mutex_lock(&cap.mutex);
-        capng_restore_state(&cap.saved);
-        /*
-         * restore_state free's the saved copy
-         * so make another.
-         */
-        cap.saved = capng_save_state();
-        if (!cap.saved) {
-            pthread_mutex_unlock(&cap.mutex);
-            fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n");
-            return -EINVAL;
-        }
-        pthread_mutex_unlock(&cap.mutex);
-
-        /*
-         * We want to use the loaded state for our pid,
-         * not the original
-         */
-        capng_setpid(syscall(SYS_gettid));
-        cap_loaded = true;
-    }
-    return 0;
-}
-
-/*
- * Helpers for dropping and regaining effective capabilities. Returns 0
- * on success, error otherwise
- */
-static int drop_effective_cap(const char *cap_name, bool *cap_dropped)
-{
-    int cap, ret;
-
-    cap = capng_name_to_capability(cap_name);
-    if (cap < 0) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n",
-                 cap_name, strerror(errno));
-        goto out;
-    }
-
-    if (load_capng()) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "load_capng() failed\n");
-        goto out;
-    }
-
-    /* We dont have this capability in effective set already. */
-    if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) {
-        ret = 0;
-        goto out;
-    }
-
-    if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n");
-        goto out;
-    }
-
-    if (capng_apply(CAPNG_SELECT_CAPS)) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n");
-        goto out;
-    }
-
-    ret = 0;
-    if (cap_dropped) {
-        *cap_dropped = true;
-    }
-
-out:
-    return ret;
-}
-
-static int gain_effective_cap(const char *cap_name)
-{
-    int cap;
-    int ret = 0;
-
-    cap = capng_name_to_capability(cap_name);
-    if (cap < 0) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n",
-                 cap_name, strerror(errno));
-        goto out;
-    }
-
-    if (load_capng()) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "load_capng() failed\n");
-        goto out;
-    }
-
-    if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n");
-        goto out;
-    }
-
-    if (capng_apply(CAPNG_SELECT_CAPS)) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n");
-        goto out;
-    }
-    ret = 0;
-
-out:
-    return ret;
-}
-
-/*
- * The host kernel normally drops security.capability xattr's on
- * any write, however if we're remapping xattr names we need to drop
- * whatever the clients security.capability is actually stored as.
- */
-static int drop_security_capability(const struct lo_data *lo, int fd)
-{
-    if (!lo->xattr_security_capability) {
-        /* We didn't remap the name, let the host kernel do it */
-        return 0;
-    }
-    if (!fremovexattr(fd, lo->xattr_security_capability)) {
-        /* All good */
-        return 0;
-    }
-
-    switch (errno) {
-    case ENODATA:
-        /* Attribute didn't exist, that's fine */
-        return 0;
-
-    case ENOTSUP:
-        /* FS didn't support attribute anyway, also fine */
-        return 0;
-
-    default:
-        /* Hmm other error */
-        return errno;
-    }
-}
-
-static void lo_map_init(struct lo_map *map)
-{
-    map->elems = NULL;
-    map->nelems = 0;
-    map->freelist = -1;
-}
-
-static void lo_map_destroy(struct lo_map *map)
-{
-    g_free(map->elems);
-}
-
-static int lo_map_grow(struct lo_map *map, size_t new_nelems)
-{
-    struct lo_map_elem *new_elems;
-    size_t i;
-
-    if (new_nelems <= map->nelems) {
-        return 1;
-    }
-
-    new_elems = g_try_realloc_n(map->elems, new_nelems, sizeof(map->elems[0]));
-    if (!new_elems) {
-        return 0;
-    }
-
-    for (i = map->nelems; i < new_nelems; i++) {
-        new_elems[i].freelist = i + 1;
-        new_elems[i].in_use = false;
-    }
-    new_elems[new_nelems - 1].freelist = -1;
-
-    map->elems = new_elems;
-    map->freelist = map->nelems;
-    map->nelems = new_nelems;
-    return 1;
-}
-
-static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map)
-{
-    struct lo_map_elem *elem;
-
-    if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) {
-        return NULL;
-    }
-
-    elem = &map->elems[map->freelist];
-    map->freelist = elem->freelist;
-
-    elem->in_use = true;
-
-    return elem;
-}
-
-static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key)
-{
-    ssize_t *prev;
-
-    if (!lo_map_grow(map, key + 1)) {
-        return NULL;
-    }
-
-    for (prev = &map->freelist; *prev != -1;
-         prev = &map->elems[*prev].freelist) {
-        if (*prev == key) {
-            struct lo_map_elem *elem = &map->elems[key];
-
-            *prev = elem->freelist;
-            elem->in_use = true;
-            return elem;
-        }
-    }
-    return NULL;
-}
-
-static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key)
-{
-    if (key >= map->nelems) {
-        return NULL;
-    }
-    if (!map->elems[key].in_use) {
-        return NULL;
-    }
-    return &map->elems[key];
-}
-
-static void lo_map_remove(struct lo_map *map, size_t key)
-{
-    struct lo_map_elem *elem;
-
-    if (key >= map->nelems) {
-        return;
-    }
-
-    elem = &map->elems[key];
-    if (!elem->in_use) {
-        return;
-    }
-
-    elem->in_use = false;
-
-    elem->freelist = map->freelist;
-    map->freelist = key;
-}
-
-/* Assumes lo->mutex is held */
-static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd)
-{
-    struct lo_map_elem *elem;
-
-    elem = lo_map_alloc_elem(&lo->fd_map);
-    if (!elem) {
-        return -1;
-    }
-
-    elem->fd = fd;
-    return elem - lo->fd_map.elems;
-}
-
-/* Assumes lo->mutex is held */
-static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp)
-{
-    struct lo_map_elem *elem;
-
-    elem = lo_map_alloc_elem(&lo_data(req)->dirp_map);
-    if (!elem) {
-        return -1;
-    }
-
-    elem->dirp = dirp;
-    return elem - lo_data(req)->dirp_map.elems;
-}
-
-/* Assumes lo->mutex is held */
-static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
-{
-    struct lo_map_elem *elem;
-
-    elem = lo_map_alloc_elem(&lo_data(req)->ino_map);
-    if (!elem) {
-        return -1;
-    }
-
-    elem->inode = inode;
-    return elem - lo_data(req)->ino_map.elems;
-}
-
-static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
-{
-    struct lo_inode *inode = *inodep;
-
-    if (!inode) {
-        return;
-    }
-
-    *inodep = NULL;
-
-    if (g_atomic_int_dec_and_test(&inode->refcount)) {
-        close(inode->fd);
-        free(inode);
-    }
-}
-
-/* Caller must release refcount using lo_inode_put() */
-static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_map_elem *elem;
-
-    pthread_mutex_lock(&lo->mutex);
-    elem = lo_map_get(&lo->ino_map, ino);
-    if (elem) {
-        g_atomic_int_inc(&elem->inode->refcount);
-    }
-    pthread_mutex_unlock(&lo->mutex);
-
-    if (!elem) {
-        return NULL;
-    }
-
-    return elem->inode;
-}
-
-/*
- * TODO Remove this helper and force callers to hold an inode refcount until
- * they are done with the fd.  This will be done in a later patch to make
- * review easier.
- */
-static int lo_fd(fuse_req_t req, fuse_ino_t ino)
-{
-    struct lo_inode *inode = lo_inode(req, ino);
-    int fd;
-
-    if (!inode) {
-        return -1;
-    }
-
-    fd = inode->fd;
-    lo_inode_put(lo_data(req), &inode);
-    return fd;
-}
-
-/*
- * Open a file descriptor for an inode. Returns -EBADF if the inode is not a
- * regular file or a directory.
- *
- * Use this helper function instead of raw openat(2) to prevent security issues
- * when a malicious client opens special files such as block device nodes.
- * Symlink inodes are also rejected since symlinks must already have been
- * traversed on the client side.
- */
-static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode,
-                         int open_flags)
-{
-    g_autofree char *fd_str = g_strdup_printf("%d", inode->fd);
-    int fd;
-
-    if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) {
-        return -EBADF;
-    }
-
-    /*
-     * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier
-     * that the inode is not a special file but if an external process races
-     * with us then symlinks are traversed here. It is not possible to escape
-     * the shared directory since it is mounted as "/" though.
-     */
-    fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW);
-    if (fd < 0) {
-        return -errno;
-    }
-    return fd;
-}
-
-static void lo_init(void *userdata, struct fuse_conn_info *conn)
-{
-    struct lo_data *lo = (struct lo_data *)userdata;
-
-    if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) {
-        conn->want |= FUSE_CAP_EXPORT_SUPPORT;
-    }
-
-    if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) {
-        fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n");
-        conn->want |= FUSE_CAP_WRITEBACK_CACHE;
-    }
-    if (conn->capable & FUSE_CAP_FLOCK_LOCKS) {
-        if (lo->flock) {
-            fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
-            conn->want |= FUSE_CAP_FLOCK_LOCKS;
-        } else {
-            fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n");
-            conn->want &= ~FUSE_CAP_FLOCK_LOCKS;
-        }
-    }
-
-    if (conn->capable & FUSE_CAP_POSIX_LOCKS) {
-        if (lo->posix_lock) {
-            fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n");
-            conn->want |= FUSE_CAP_POSIX_LOCKS;
-        } else {
-            fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n");
-            conn->want &= ~FUSE_CAP_POSIX_LOCKS;
-        }
-    }
-
-    if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) ||
-        lo->readdirplus_clear) {
-        fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n");
-        conn->want &= ~FUSE_CAP_READDIRPLUS;
-    }
-
-    if (!(conn->capable & FUSE_CAP_SUBMOUNTS) && lo->announce_submounts) {
-        fuse_log(FUSE_LOG_WARNING, "lo_init: Cannot announce submounts, client "
-                 "does not support it\n");
-        lo->announce_submounts = false;
-    }
-
-    if (lo->user_killpriv_v2 == 1) {
-        /*
-         * User explicitly asked for this option. Enable it unconditionally.
-         * If connection does not have this capability, it should fail
-         * in fuse_lowlevel.c
-         */
-        fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling killpriv_v2\n");
-        conn->want |= FUSE_CAP_HANDLE_KILLPRIV_V2;
-        lo->killpriv_v2 = 1;
-    } else {
-        /*
-         * Either user specified to disable killpriv_v2, or did not
-         * specify anything. Disable killpriv_v2 in both the cases.
-         */
-        fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling killpriv_v2\n");
-        conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2;
-        lo->killpriv_v2 = 0;
-    }
-
-    if (lo->user_posix_acl == 1) {
-        /*
-         * User explicitly asked for this option. Enable it unconditionally.
-         * If connection does not have this capability, print error message
-         * now. It will fail later in fuse_lowlevel.c
-         */
-        if (!(conn->capable & FUSE_CAP_POSIX_ACL) ||
-            !(conn->capable & FUSE_CAP_DONT_MASK) ||
-            !(conn->capable & FUSE_CAP_SETXATTR_EXT)) {
-            fuse_log(FUSE_LOG_ERR, "lo_init: Can not enable posix acl."
-                     " kernel does not support FUSE_POSIX_ACL, FUSE_DONT_MASK"
-                     " or FUSE_SETXATTR_EXT capability.\n");
-        } else {
-            fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling posix acl\n");
-        }
-
-        conn->want |= FUSE_CAP_POSIX_ACL | FUSE_CAP_DONT_MASK |
-                      FUSE_CAP_SETXATTR_EXT;
-        lo->change_umask = true;
-        lo->posix_acl = true;
-    } else {
-        /* User either did not specify anything or wants it disabled */
-        fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix_acl\n");
-        conn->want &= ~FUSE_CAP_POSIX_ACL;
-    }
-
-    if (lo->user_security_label == 1) {
-        if (!(conn->capable & FUSE_CAP_SECURITY_CTX)) {
-            fuse_log(FUSE_LOG_ERR, "lo_init: Can not enable security label."
-                     " kernel does not support FUSE_SECURITY_CTX capability.\n");
-        }
-        conn->want |= FUSE_CAP_SECURITY_CTX;
-    } else {
-        fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling security label\n");
-        conn->want &= ~FUSE_CAP_SECURITY_CTX;
-    }
-}
-
-static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
-                       struct fuse_file_info *fi)
-{
-    int res;
-    struct stat buf;
-    struct lo_data *lo = lo_data(req);
-
-    (void)fi;
-
-    res =
-        fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
-    if (res == -1) {
-        return (void)fuse_reply_err(req, errno);
-    }
-
-    fuse_reply_attr(req, &buf, lo->timeout);
-}
-
-static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_map_elem *elem;
-
-    pthread_mutex_lock(&lo->mutex);
-    elem = lo_map_get(&lo->fd_map, fi->fh);
-    pthread_mutex_unlock(&lo->mutex);
-
-    if (!elem) {
-        return -1;
-    }
-
-    return elem->fd;
-}
-
-static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
-                       int valid, struct fuse_file_info *fi)
-{
-    int saverr;
-    char procname[64];
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode;
-    int ifd;
-    int res;
-    int fd = -1;
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    ifd = inode->fd;
-
-    /* If fi->fh is invalid we'll report EBADF later */
-    if (fi) {
-        fd = lo_fi_fd(req, fi);
-    }
-
-    if (valid & FUSE_SET_ATTR_MODE) {
-        if (fi) {
-            res = fchmod(fd, attr->st_mode);
-        } else {
-            sprintf(procname, "%i", ifd);
-            res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0);
-        }
-        if (res == -1) {
-            saverr = errno;
-            goto out_err;
-        }
-    }
-    if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) {
-        uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1;
-        gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1;
-
-        saverr = drop_security_capability(lo, ifd);
-        if (saverr) {
-            goto out_err;
-        }
-
-        res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
-        if (res == -1) {
-            saverr = errno;
-            goto out_err;
-        }
-    }
-    if (valid & FUSE_SET_ATTR_SIZE) {
-        int truncfd;
-        bool kill_suidgid;
-        bool cap_fsetid_dropped = false;
-
-        kill_suidgid = lo->killpriv_v2 && (valid & FUSE_SET_ATTR_KILL_SUIDGID);
-        if (fi) {
-            truncfd = fd;
-        } else {
-            truncfd = lo_inode_open(lo, inode, O_RDWR);
-            if (truncfd < 0) {
-                saverr = -truncfd;
-                goto out_err;
-            }
-        }
-
-        saverr = drop_security_capability(lo, truncfd);
-        if (saverr) {
-            if (!fi) {
-                close(truncfd);
-            }
-            goto out_err;
-        }
-
-        if (kill_suidgid) {
-            res = drop_effective_cap("FSETID", &cap_fsetid_dropped);
-            if (res != 0) {
-                saverr = res;
-                if (!fi) {
-                    close(truncfd);
-                }
-                goto out_err;
-            }
-        }
-
-        res = ftruncate(truncfd, attr->st_size);
-        saverr = res == -1 ? errno : 0;
-
-        if (cap_fsetid_dropped) {
-            if (gain_effective_cap("FSETID")) {
-                fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
-            }
-        }
-        if (!fi) {
-            close(truncfd);
-        }
-        if (res == -1) {
-            goto out_err;
-        }
-    }
-    if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) {
-        struct timespec tv[2];
-
-        tv[0].tv_sec = 0;
-        tv[1].tv_sec = 0;
-        tv[0].tv_nsec = UTIME_OMIT;
-        tv[1].tv_nsec = UTIME_OMIT;
-
-        if (valid & FUSE_SET_ATTR_ATIME_NOW) {
-            tv[0].tv_nsec = UTIME_NOW;
-        } else if (valid & FUSE_SET_ATTR_ATIME) {
-            tv[0] = attr->st_atim;
-        }
-
-        if (valid & FUSE_SET_ATTR_MTIME_NOW) {
-            tv[1].tv_nsec = UTIME_NOW;
-        } else if (valid & FUSE_SET_ATTR_MTIME) {
-            tv[1] = attr->st_mtim;
-        }
-
-        if (fi) {
-            res = futimens(fd, tv);
-        } else {
-            sprintf(procname, "%i", inode->fd);
-            res = utimensat(lo->proc_self_fd, procname, tv, 0);
-        }
-        if (res == -1) {
-            saverr = errno;
-            goto out_err;
-        }
-    }
-    lo_inode_put(lo, &inode);
-
-    return lo_getattr(req, ino, fi);
-
-out_err:
-    lo_inode_put(lo, &inode);
-    fuse_reply_err(req, saverr);
-}
-
-static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st,
-                                uint64_t mnt_id)
-{
-    struct lo_inode *p;
-    struct lo_key key = {
-        .ino = st->st_ino,
-        .dev = st->st_dev,
-        .mnt_id = mnt_id,
-    };
-
-    pthread_mutex_lock(&lo->mutex);
-    p = g_hash_table_lookup(lo->inodes, &key);
-    if (p) {
-        assert(p->nlookup > 0);
-        p->nlookup++;
-        g_atomic_int_inc(&p->refcount);
-    }
-    pthread_mutex_unlock(&lo->mutex);
-
-    return p;
-}
-
-/* value_destroy_func for posix_locks GHashTable */
-static void posix_locks_value_destroy(gpointer data)
-{
-    struct lo_inode_plock *plock = data;
-
-    /*
-     * We had used open() for locks and had only one fd. So
-     * closing this fd should release all OFD locks.
-     */
-    close(plock->fd);
-    free(plock);
-}
-
-static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
-                    struct stat *statbuf, int flags, uint64_t *mnt_id)
-{
-    int res;
-
-#if defined(CONFIG_STATX) && defined(CONFIG_STATX_MNT_ID)
-    if (lo->use_statx) {
-        struct statx statxbuf;
-
-        res = statx(dirfd, pathname, flags, STATX_BASIC_STATS | STATX_MNT_ID,
-                    &statxbuf);
-        if (!res) {
-            memset(statbuf, 0, sizeof(*statbuf));
-            statbuf->st_dev = makedev(statxbuf.stx_dev_major,
-                                      statxbuf.stx_dev_minor);
-            statbuf->st_ino = statxbuf.stx_ino;
-            statbuf->st_mode = statxbuf.stx_mode;
-            statbuf->st_nlink = statxbuf.stx_nlink;
-            statbuf->st_uid = statxbuf.stx_uid;
-            statbuf->st_gid = statxbuf.stx_gid;
-            statbuf->st_rdev = makedev(statxbuf.stx_rdev_major,
-                                       statxbuf.stx_rdev_minor);
-            statbuf->st_size = statxbuf.stx_size;
-            statbuf->st_blksize = statxbuf.stx_blksize;
-            statbuf->st_blocks = statxbuf.stx_blocks;
-            statbuf->st_atim.tv_sec = statxbuf.stx_atime.tv_sec;
-            statbuf->st_atim.tv_nsec = statxbuf.stx_atime.tv_nsec;
-            statbuf->st_mtim.tv_sec = statxbuf.stx_mtime.tv_sec;
-            statbuf->st_mtim.tv_nsec = statxbuf.stx_mtime.tv_nsec;
-            statbuf->st_ctim.tv_sec = statxbuf.stx_ctime.tv_sec;
-            statbuf->st_ctim.tv_nsec = statxbuf.stx_ctime.tv_nsec;
-
-            if (statxbuf.stx_mask & STATX_MNT_ID) {
-                *mnt_id = statxbuf.stx_mnt_id;
-            } else {
-                *mnt_id = 0;
-            }
-            return 0;
-        } else if (errno != ENOSYS) {
-            return -1;
-        }
-        lo->use_statx = false;
-        /* fallback */
-    }
-#endif
-    res = fstatat(dirfd, pathname, statbuf, flags);
-    if (res == -1) {
-        return -1;
-    }
-    *mnt_id = 0;
-
-    return 0;
-}
-
-/*
- * Increments nlookup on the inode on success. unref_inode_lolocked() must be
- * called eventually to decrement nlookup again. If inodep is non-NULL, the
- * inode pointer is stored and the caller must call lo_inode_put().
- */
-static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
-                        struct fuse_entry_param *e,
-                        struct lo_inode **inodep)
-{
-    int newfd;
-    int res;
-    int saverr;
-    uint64_t mnt_id;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode = NULL;
-    struct lo_inode *dir = lo_inode(req, parent);
-
-    if (inodep) {
-        *inodep = NULL; /* in case there is an error */
-    }
-
-    /*
-     * name_to_handle_at() and open_by_handle_at() can reach here with fuse
-     * mount point in guest, but we don't have its inode info in the
-     * ino_map.
-     */
-    if (!dir) {
-        return ENOENT;
-    }
-
-    memset(e, 0, sizeof(*e));
-    e->attr_timeout = lo->timeout;
-    e->entry_timeout = lo->timeout;
-
-    /* Do not allow escaping root directory */
-    if (dir == &lo->root && strcmp(name, "..") == 0) {
-        name = ".";
-    }
-
-    newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW);
-    if (newfd == -1) {
-        goto out_err;
-    }
-
-    res = do_statx(lo, newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW,
-                   &mnt_id);
-    if (res == -1) {
-        goto out_err;
-    }
-
-    if (S_ISDIR(e->attr.st_mode) && lo->announce_submounts &&
-        (e->attr.st_dev != dir->key.dev || mnt_id != dir->key.mnt_id)) {
-        e->attr_flags |= FUSE_ATTR_SUBMOUNT;
-    }
-
-    inode = lo_find(lo, &e->attr, mnt_id);
-    if (inode) {
-        close(newfd);
-    } else {
-        inode = calloc(1, sizeof(struct lo_inode));
-        if (!inode) {
-            goto out_err;
-        }
-
-        /* cache only filetype */
-        inode->filetype = (e->attr.st_mode & S_IFMT);
-
-        /*
-         * One for the caller and one for nlookup (released in
-         * unref_inode_lolocked())
-         */
-        g_atomic_int_set(&inode->refcount, 2);
-
-        inode->nlookup = 1;
-        inode->fd = newfd;
-        inode->key.ino = e->attr.st_ino;
-        inode->key.dev = e->attr.st_dev;
-        inode->key.mnt_id = mnt_id;
-        if (lo->posix_lock) {
-            pthread_mutex_init(&inode->plock_mutex, NULL);
-            inode->posix_locks = g_hash_table_new_full(
-                g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
-        }
-        pthread_mutex_lock(&lo->mutex);
-        inode->fuse_ino = lo_add_inode_mapping(req, inode);
-        g_hash_table_insert(lo->inodes, &inode->key, inode);
-        pthread_mutex_unlock(&lo->mutex);
-    }
-    e->ino = inode->fuse_ino;
-
-    /* Transfer ownership of inode pointer to caller or drop it */
-    if (inodep) {
-        *inodep = inode;
-    } else {
-        lo_inode_put(lo, &inode);
-    }
-
-    lo_inode_put(lo, &dir);
-
-    fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent,
-             name, (unsigned long long)e->ino);
-
-    return 0;
-
-out_err:
-    saverr = errno;
-    if (newfd != -1) {
-        close(newfd);
-    }
-    lo_inode_put(lo, &inode);
-    lo_inode_put(lo, &dir);
-    return saverr;
-}
-
-static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
-{
-    struct fuse_entry_param e;
-    int err;
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent,
-             name);
-
-    if (is_empty(name)) {
-        fuse_reply_err(req, ENOENT);
-        return;
-    }
-
-    /*
-     * Don't use is_safe_path_component(), allow "." and ".." for NFS export
-     * support.
-     */
-    if (strchr(name, '/')) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    err = lo_do_lookup(req, parent, name, &e, NULL);
-    if (err) {
-        fuse_reply_err(req, err);
-    } else {
-        fuse_reply_entry(req, &e);
-    }
-}
-
-/*
- * On some archs, setres*id is limited to 2^16 but they
- * provide setres*id32 variants that allow 2^32.
- * Others just let setres*id do 2^32 anyway.
- */
-#ifdef SYS_setresgid32
-#define OURSYS_setresgid SYS_setresgid32
-#else
-#define OURSYS_setresgid SYS_setresgid
-#endif
-
-#ifdef SYS_setresuid32
-#define OURSYS_setresuid SYS_setresuid32
-#else
-#define OURSYS_setresuid SYS_setresuid
-#endif
-
-static void drop_supplementary_groups(void)
-{
-    int ret;
-
-    ret = getgroups(0, NULL);
-    if (ret == -1) {
-        fuse_log(FUSE_LOG_ERR, "getgroups() failed with error=%d:%s\n",
-                 errno, strerror(errno));
-        exit(1);
-    }
-
-    if (!ret) {
-        return;
-    }
-
-    /* Drop all supplementary groups. We should not need it */
-    ret = setgroups(0, NULL);
-    if (ret == -1) {
-        fuse_log(FUSE_LOG_ERR, "setgroups() failed with error=%d:%s\n",
-                 errno, strerror(errno));
-        exit(1);
-    }
-}
-
-/*
- * Change to uid/gid of caller so that file is created with
- * ownership of caller.
- * TODO: What about selinux context?
- */
-static int lo_change_cred(fuse_req_t req, struct lo_cred *old,
-                          bool change_umask)
-{
-    int res;
-
-    old->euid = geteuid();
-    old->egid = getegid();
-
-    res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1);
-    if (res == -1) {
-        return errno;
-    }
-
-    res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1);
-    if (res == -1) {
-        int errno_save = errno;
-
-        syscall(OURSYS_setresgid, -1, old->egid, -1);
-        return errno_save;
-    }
-
-    if (change_umask) {
-        old->umask = umask(req->ctx.umask);
-    }
-    return 0;
-}
-
-/* Regain Privileges */
-static void lo_restore_cred(struct lo_cred *old, bool restore_umask)
-{
-    int res;
-
-    res = syscall(OURSYS_setresuid, -1, old->euid, -1);
-    if (res == -1) {
-        fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid);
-        exit(1);
-    }
-
-    res = syscall(OURSYS_setresgid, -1, old->egid, -1);
-    if (res == -1) {
-        fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid);
-        exit(1);
-    }
-
-    if (restore_umask)
-        umask(old->umask);
-}
-
-/*
- * A helper to change cred and drop capability. Returns 0 on success and
- * errno on error
- */
-static int lo_drop_cap_change_cred(fuse_req_t req, struct lo_cred *old,
-                                   bool change_umask, const char *cap_name,
-                                   bool *cap_dropped)
-{
-    int ret;
-    bool __cap_dropped;
-
-    assert(cap_name);
-
-    ret = drop_effective_cap(cap_name, &__cap_dropped);
-    if (ret) {
-        return ret;
-    }
-
-    ret = lo_change_cred(req, old, change_umask);
-    if (ret) {
-        if (__cap_dropped) {
-            if (gain_effective_cap(cap_name)) {
-                fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_%s\n", cap_name);
-            }
-        }
-    }
-
-    if (cap_dropped) {
-        *cap_dropped = __cap_dropped;
-    }
-    return ret;
-}
-
-static void lo_restore_cred_gain_cap(struct lo_cred *old, bool restore_umask,
-                                     const char *cap_name)
-{
-    assert(cap_name);
-
-    lo_restore_cred(old, restore_umask);
-
-    if (gain_effective_cap(cap_name)) {
-        fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_%s\n", cap_name);
-    }
-}
-
-static int do_mknod_symlink_secctx(fuse_req_t req, struct lo_inode *dir,
-                                   const char *name, const char *secctx_name)
-{
-    int path_fd, err;
-    char procname[64];
-    struct lo_data *lo = lo_data(req);
-
-    if (!req->secctx.ctxlen) {
-        return 0;
-    }
-
-    /* Open newly created element with O_PATH */
-    path_fd = openat(dir->fd, name, O_PATH | O_NOFOLLOW);
-    err = path_fd == -1 ? errno : 0;
-    if (err) {
-        return err;
-    }
-    sprintf(procname, "%i", path_fd);
-    FCHDIR_NOFAIL(lo->proc_self_fd);
-    /* Set security context. This is not atomic w.r.t file creation */
-    err = setxattr(procname, secctx_name, req->secctx.ctx, req->secctx.ctxlen,
-                   0);
-    if (err) {
-        err = errno;
-    }
-    FCHDIR_NOFAIL(lo->root.fd);
-    close(path_fd);
-    return err;
-}
-
-static int do_mknod_symlink(fuse_req_t req, struct lo_inode *dir,
-                            const char *name, mode_t mode, dev_t rdev,
-                            const char *link)
-{
-    int err, fscreate_fd = -1;
-    const char *secctx_name = req->secctx.name;
-    struct lo_cred old = {};
-    struct lo_data *lo = lo_data(req);
-    char *mapped_name = NULL;
-    bool secctx_enabled = req->secctx.ctxlen;
-    bool do_fscreate = false;
-
-    if (secctx_enabled && lo->xattrmap) {
-        err = xattr_map_client(lo, req->secctx.name, &mapped_name);
-        if (err < 0) {
-            return -err;
-        }
-        secctx_name = mapped_name;
-    }
-
-    /*
-     * If security xattr has not been remapped and selinux is enabled on
-     * host, set fscreate and no need to do a setxattr() after file creation
-     */
-    if (secctx_enabled && !mapped_name && lo->use_fscreate) {
-        do_fscreate = true;
-        err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen,
-                                     &fscreate_fd);
-        if (err) {
-            goto out;
-        }
-    }
-
-    err = lo_change_cred(req, &old, lo->change_umask && !S_ISLNK(mode));
-    if (err) {
-        goto out;
-    }
-
-    err = mknod_wrapper(dir->fd, name, link, mode, rdev);
-    err = err == -1 ? errno : 0;
-    lo_restore_cred(&old, lo->change_umask && !S_ISLNK(mode));
-    if (err) {
-        goto out;
-    }
-
-    if (!do_fscreate) {
-        err = do_mknod_symlink_secctx(req, dir, name, secctx_name);
-        if (err) {
-            unlinkat(dir->fd, name, S_ISDIR(mode) ? AT_REMOVEDIR : 0);
-        }
-    }
-out:
-    if (fscreate_fd != -1) {
-        close_reset_proc_fscreate(fscreate_fd);
-    }
-    g_free(mapped_name);
-    return err;
-}
-
-static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
-                             const char *name, mode_t mode, dev_t rdev,
-                             const char *link)
-{
-    int saverr;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *dir;
-    struct fuse_entry_param e;
-
-    if (is_empty(name)) {
-        fuse_reply_err(req, ENOENT);
-        return;
-    }
-
-    if (!is_safe_path_component(name)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    dir = lo_inode(req, parent);
-    if (!dir) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    saverr = do_mknod_symlink(req, dir, name, mode, rdev, link);
-    if (saverr) {
-        goto out;
-    }
-
-    saverr = lo_do_lookup(req, parent, name, &e, NULL);
-    if (saverr) {
-        goto out;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent,
-             name, (unsigned long long)e.ino);
-
-    fuse_reply_entry(req, &e);
-    lo_inode_put(lo, &dir);
-    return;
-
-out:
-    lo_inode_put(lo, &dir);
-    fuse_reply_err(req, saverr);
-}
-
-static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name,
-                     mode_t mode, dev_t rdev)
-{
-    lo_mknod_symlink(req, parent, name, mode, rdev, NULL);
-}
-
-static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name,
-                     mode_t mode)
-{
-    lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL);
-}
-
-static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
-                       const char *name)
-{
-    lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
-}
-
-static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
-                    const char *name)
-{
-    int res;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *parent_inode;
-    struct lo_inode *inode;
-    struct fuse_entry_param e;
-    char procname[64];
-    int saverr;
-
-    if (is_empty(name)) {
-        fuse_reply_err(req, ENOENT);
-        return;
-    }
-
-    if (!is_safe_path_component(name)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    parent_inode = lo_inode(req, parent);
-    inode = lo_inode(req, ino);
-    if (!parent_inode || !inode) {
-        errno = EBADF;
-        goto out_err;
-    }
-
-    memset(&e, 0, sizeof(struct fuse_entry_param));
-    e.attr_timeout = lo->timeout;
-    e.entry_timeout = lo->timeout;
-
-    sprintf(procname, "%i", inode->fd);
-    res = linkat(lo->proc_self_fd, procname, parent_inode->fd, name,
-                 AT_SYMLINK_FOLLOW);
-    if (res == -1) {
-        goto out_err;
-    }
-
-    res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
-    if (res == -1) {
-        goto out_err;
-    }
-
-    pthread_mutex_lock(&lo->mutex);
-    inode->nlookup++;
-    pthread_mutex_unlock(&lo->mutex);
-    e.ino = inode->fuse_ino;
-
-    fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent,
-             name, (unsigned long long)e.ino);
-
-    fuse_reply_entry(req, &e);
-    lo_inode_put(lo, &parent_inode);
-    lo_inode_put(lo, &inode);
-    return;
-
-out_err:
-    saverr = errno;
-    lo_inode_put(lo, &parent_inode);
-    lo_inode_put(lo, &inode);
-    fuse_reply_err(req, saverr);
-}
-
-/* Increments nlookup and caller must release refcount using lo_inode_put() */
-static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent,
-                                    const char *name)
-{
-    int res;
-    uint64_t mnt_id;
-    struct stat attr;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *dir = lo_inode(req, parent);
-
-    if (!dir) {
-        return NULL;
-    }
-
-    res = do_statx(lo, dir->fd, name, &attr, AT_SYMLINK_NOFOLLOW, &mnt_id);
-    lo_inode_put(lo, &dir);
-    if (res == -1) {
-        return NULL;
-    }
-
-    return lo_find(lo, &attr, mnt_id);
-}
-
-static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
-{
-    int res;
-    struct lo_inode *inode;
-    struct lo_data *lo = lo_data(req);
-
-    if (is_empty(name)) {
-        fuse_reply_err(req, ENOENT);
-        return;
-    }
-
-    if (!is_safe_path_component(name)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    inode = lookup_name(req, parent, name);
-    if (!inode) {
-        fuse_reply_err(req, EIO);
-        return;
-    }
-
-    res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR);
-
-    fuse_reply_err(req, res == -1 ? errno : 0);
-    unref_inode_lolocked(lo, inode, 1);
-    lo_inode_put(lo, &inode);
-}
-
-static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
-                      fuse_ino_t newparent, const char *newname,
-                      unsigned int flags)
-{
-    int res;
-    struct lo_inode *parent_inode;
-    struct lo_inode *newparent_inode;
-    struct lo_inode *oldinode = NULL;
-    struct lo_inode *newinode = NULL;
-    struct lo_data *lo = lo_data(req);
-
-    if (is_empty(name) || is_empty(newname)) {
-        fuse_reply_err(req, ENOENT);
-        return;
-    }
-
-    if (!is_safe_path_component(name) || !is_safe_path_component(newname)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    parent_inode = lo_inode(req, parent);
-    newparent_inode = lo_inode(req, newparent);
-    if (!parent_inode || !newparent_inode) {
-        fuse_reply_err(req, EBADF);
-        goto out;
-    }
-
-    oldinode = lookup_name(req, parent, name);
-    newinode = lookup_name(req, newparent, newname);
-
-    if (!oldinode) {
-        fuse_reply_err(req, EIO);
-        goto out;
-    }
-
-    if (flags) {
-#ifndef SYS_renameat2
-        fuse_reply_err(req, EINVAL);
-#else
-        res = syscall(SYS_renameat2, parent_inode->fd, name,
-                        newparent_inode->fd, newname, flags);
-        if (res == -1 && errno == ENOSYS) {
-            fuse_reply_err(req, EINVAL);
-        } else {
-            fuse_reply_err(req, res == -1 ? errno : 0);
-        }
-#endif
-        goto out;
-    }
-
-    res = renameat(parent_inode->fd, name, newparent_inode->fd, newname);
-
-    fuse_reply_err(req, res == -1 ? errno : 0);
-out:
-    unref_inode_lolocked(lo, oldinode, 1);
-    unref_inode_lolocked(lo, newinode, 1);
-    lo_inode_put(lo, &oldinode);
-    lo_inode_put(lo, &newinode);
-    lo_inode_put(lo, &parent_inode);
-    lo_inode_put(lo, &newparent_inode);
-}
-
-static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
-{
-    int res;
-    struct lo_inode *inode;
-    struct lo_data *lo = lo_data(req);
-
-    if (is_empty(name)) {
-        fuse_reply_err(req, ENOENT);
-        return;
-    }
-
-    if (!is_safe_path_component(name)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    inode = lookup_name(req, parent, name);
-    if (!inode) {
-        fuse_reply_err(req, EIO);
-        return;
-    }
-
-    res = unlinkat(lo_fd(req, parent), name, 0);
-
-    fuse_reply_err(req, res == -1 ? errno : 0);
-    unref_inode_lolocked(lo, inode, 1);
-    lo_inode_put(lo, &inode);
-}
-
-/* To be called with lo->mutex held */
-static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n)
-{
-    if (!inode) {
-        return;
-    }
-
-    assert(inode->nlookup >= n);
-    inode->nlookup -= n;
-    if (!inode->nlookup) {
-        lo_map_remove(&lo->ino_map, inode->fuse_ino);
-        g_hash_table_remove(lo->inodes, &inode->key);
-        if (lo->posix_lock) {
-            if (g_hash_table_size(inode->posix_locks)) {
-                fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n");
-            }
-            g_hash_table_destroy(inode->posix_locks);
-            pthread_mutex_destroy(&inode->plock_mutex);
-        }
-        /* Drop our refcount from lo_do_lookup() */
-        lo_inode_put(lo, &inode);
-    }
-}
-
-static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
-                                 uint64_t n)
-{
-    if (!inode) {
-        return;
-    }
-
-    pthread_mutex_lock(&lo->mutex);
-    unref_inode(lo, inode, n);
-    pthread_mutex_unlock(&lo->mutex);
-}
-
-static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode;
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        return;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "  forget %lli %lli -%lli\n",
-             (unsigned long long)ino, (unsigned long long)inode->nlookup,
-             (unsigned long long)nlookup);
-
-    unref_inode_lolocked(lo, inode, nlookup);
-    lo_inode_put(lo, &inode);
-}
-
-static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
-{
-    lo_forget_one(req, ino, nlookup);
-    fuse_reply_none(req);
-}
-
-static void lo_forget_multi(fuse_req_t req, size_t count,
-                            struct fuse_forget_data *forgets)
-{
-    int i;
-
-    for (i = 0; i < count; i++) {
-        lo_forget_one(req, forgets[i].ino, forgets[i].nlookup);
-    }
-    fuse_reply_none(req);
-}
-
-static void lo_readlink(fuse_req_t req, fuse_ino_t ino)
-{
-    char buf[PATH_MAX + 1];
-    int res;
-
-    res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf));
-    if (res == -1) {
-        return (void)fuse_reply_err(req, errno);
-    }
-
-    if (res == sizeof(buf)) {
-        return (void)fuse_reply_err(req, ENAMETOOLONG);
-    }
-
-    buf[res] = '\0';
-
-    fuse_reply_readlink(req, buf);
-}
-
-struct lo_dirp {
-    gint refcount;
-    DIR *dp;
-    struct dirent *entry;
-    off_t offset;
-};
-
-static void lo_dirp_put(struct lo_dirp **dp)
-{
-    struct lo_dirp *d = *dp;
-
-    if (!d) {
-        return;
-    }
-    *dp = NULL;
-
-    if (g_atomic_int_dec_and_test(&d->refcount)) {
-        closedir(d->dp);
-        free(d);
-    }
-}
-
-/* Call lo_dirp_put() on the return value when no longer needed */
-static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_map_elem *elem;
-
-    pthread_mutex_lock(&lo->mutex);
-    elem = lo_map_get(&lo->dirp_map, fi->fh);
-    if (elem) {
-        g_atomic_int_inc(&elem->dirp->refcount);
-    }
-    pthread_mutex_unlock(&lo->mutex);
-    if (!elem) {
-        return NULL;
-    }
-
-    return elem->dirp;
-}
-
-static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
-                       struct fuse_file_info *fi)
-{
-    int error = ENOMEM;
-    struct lo_data *lo = lo_data(req);
-    struct lo_dirp *d;
-    int fd;
-    ssize_t fh;
-
-    d = calloc(1, sizeof(struct lo_dirp));
-    if (d == NULL) {
-        goto out_err;
-    }
-
-    fd = openat(lo_fd(req, ino), ".", O_RDONLY);
-    if (fd == -1) {
-        goto out_errno;
-    }
-
-    d->dp = fdopendir(fd);
-    if (d->dp == NULL) {
-        goto out_errno;
-    }
-
-    d->offset = 0;
-    d->entry = NULL;
-
-    g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */
-    pthread_mutex_lock(&lo->mutex);
-    fh = lo_add_dirp_mapping(req, d);
-    pthread_mutex_unlock(&lo->mutex);
-    if (fh == -1) {
-        goto out_err;
-    }
-
-    fi->fh = fh;
-    if (lo->cache == CACHE_ALWAYS) {
-        fi->cache_readdir = 1;
-    }
-    fuse_reply_open(req, fi);
-    return;
-
-out_errno:
-    error = errno;
-out_err:
-    if (d) {
-        if (d->dp) {
-            closedir(d->dp);
-        } else if (fd != -1) {
-            close(fd);
-        }
-        free(d);
-    }
-    fuse_reply_err(req, error);
-}
-
-static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
-                          off_t offset, struct fuse_file_info *fi, int plus)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_dirp *d = NULL;
-    struct lo_inode *dinode;
-    g_autofree char *buf = NULL;
-    char *p;
-    size_t rem = size;
-    int err = EBADF;
-
-    dinode = lo_inode(req, ino);
-    if (!dinode) {
-        goto error;
-    }
-
-    d = lo_dirp(req, fi);
-    if (!d) {
-        goto error;
-    }
-
-    err = ENOMEM;
-    buf = g_try_malloc0(size);
-    if (!buf) {
-        goto error;
-    }
-    p = buf;
-
-    if (offset != d->offset) {
-        seekdir(d->dp, offset);
-        d->entry = NULL;
-        d->offset = offset;
-    }
-    while (1) {
-        size_t entsize;
-        off_t nextoff;
-        const char *name;
-
-        if (!d->entry) {
-            errno = 0;
-            d->entry = readdir(d->dp);
-            if (!d->entry) {
-                if (errno) { /* Error */
-                    err = errno;
-                    goto error;
-                } else { /* End of stream */
-                    break;
-                }
-            }
-        }
-        nextoff = d->entry->d_off;
-        name = d->entry->d_name;
-
-        fuse_ino_t entry_ino = 0;
-        struct fuse_entry_param e = (struct fuse_entry_param){
-            .attr.st_ino = d->entry->d_ino,
-            .attr.st_mode = d->entry->d_type << 12,
-        };
-
-        /* Hide root's parent directory */
-        if (dinode == &lo->root && strcmp(name, "..") == 0) {
-            e.attr.st_ino = lo->root.key.ino;
-            e.attr.st_mode = DT_DIR << 12;
-        }
-
-        if (plus) {
-            if (!is_dot_or_dotdot(name)) {
-                err = lo_do_lookup(req, ino, name, &e, NULL);
-                if (err) {
-                    goto error;
-                }
-                entry_ino = e.ino;
-            }
-
-            entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff);
-        } else {
-            entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff);
-        }
-        if (entsize > rem) {
-            if (entry_ino != 0) {
-                lo_forget_one(req, entry_ino, 1);
-            }
-            break;
-        }
-
-        p += entsize;
-        rem -= entsize;
-
-        d->entry = NULL;
-        d->offset = nextoff;
-    }
-
-    err = 0;
-error:
-    lo_dirp_put(&d);
-    lo_inode_put(lo, &dinode);
-
-    /*
-     * If there's an error, we can only signal it if we haven't stored
-     * any entries yet - otherwise we'd end up with wrong lookup
-     * counts for the entries that are already in the buffer. So we
-     * return what we've collected until that point.
-     */
-    if (err && rem == size) {
-        fuse_reply_err(req, err);
-    } else {
-        fuse_reply_buf(req, buf, size - rem);
-    }
-}
-
-static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
-                       off_t offset, struct fuse_file_info *fi)
-{
-    lo_do_readdir(req, ino, size, offset, fi, 0);
-}
-
-static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size,
-                           off_t offset, struct fuse_file_info *fi)
-{
-    lo_do_readdir(req, ino, size, offset, fi, 1);
-}
-
-static void lo_releasedir(fuse_req_t req, fuse_ino_t ino,
-                          struct fuse_file_info *fi)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_map_elem *elem;
-    struct lo_dirp *d;
-
-    (void)ino;
-
-    pthread_mutex_lock(&lo->mutex);
-    elem = lo_map_get(&lo->dirp_map, fi->fh);
-    if (!elem) {
-        pthread_mutex_unlock(&lo->mutex);
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    d = elem->dirp;
-    lo_map_remove(&lo->dirp_map, fi->fh);
-    pthread_mutex_unlock(&lo->mutex);
-
-    lo_dirp_put(&d); /* paired with lo_opendir() */
-
-    fuse_reply_err(req, 0);
-}
-
-static void update_open_flags(int writeback, int allow_direct_io,
-                              struct fuse_file_info *fi)
-{
-    /*
-     * With writeback cache, kernel may send read requests even
-     * when userspace opened write-only
-     */
-    if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) {
-        fi->flags &= ~O_ACCMODE;
-        fi->flags |= O_RDWR;
-    }
-
-    /*
-     * With writeback cache, O_APPEND is handled by the kernel.
-     * This breaks atomicity (since the file may change in the
-     * underlying filesystem, so that the kernel's idea of the
-     * end of the file isn't accurate anymore). In this example,
-     * we just accept that. A more rigorous filesystem may want
-     * to return an error here
-     */
-    if (writeback && (fi->flags & O_APPEND)) {
-        fi->flags &= ~O_APPEND;
-    }
-
-    /*
-     * O_DIRECT in guest should not necessarily mean bypassing page
-     * cache on host as well. Therefore, we discard it by default
-     * ('-o no_allow_direct_io'). If somebody needs that behavior,
-     * the '-o allow_direct_io' option should be set.
-     */
-    if (!allow_direct_io) {
-        fi->flags &= ~O_DIRECT;
-    }
-}
-
-/*
- * Open a regular file, set up an fd mapping, and fill out the struct
- * fuse_file_info for it. If existing_fd is not negative, use that fd instead
- * opening a new one. Takes ownership of existing_fd.
- *
- * Returns 0 on success or a positive errno.
- */
-static int lo_do_open(struct lo_data *lo, struct lo_inode *inode,
-                      int existing_fd, struct fuse_file_info *fi)
-{
-    ssize_t fh;
-    int fd = existing_fd;
-    int err;
-    bool cap_fsetid_dropped = false;
-    bool kill_suidgid = lo->killpriv_v2 && fi->kill_priv;
-
-    update_open_flags(lo->writeback, lo->allow_direct_io, fi);
-
-    if (fd < 0) {
-        if (kill_suidgid) {
-            err = drop_effective_cap("FSETID", &cap_fsetid_dropped);
-            if (err) {
-                return err;
-            }
-        }
-
-        fd = lo_inode_open(lo, inode, fi->flags);
-
-        if (cap_fsetid_dropped) {
-            if (gain_effective_cap("FSETID")) {
-                fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
-            }
-        }
-        if (fd < 0) {
-            return -fd;
-        }
-        if (fi->flags & (O_TRUNC)) {
-            int err = drop_security_capability(lo, fd);
-            if (err) {
-                close(fd);
-                return err;
-            }
-        }
-    }
-
-    pthread_mutex_lock(&lo->mutex);
-    fh = lo_add_fd_mapping(lo, fd);
-    pthread_mutex_unlock(&lo->mutex);
-    if (fh == -1) {
-        close(fd);
-        return ENOMEM;
-    }
-
-    fi->fh = fh;
-    if (lo->cache == CACHE_NONE) {
-        fi->direct_io = 1;
-    } else if (lo->cache == CACHE_ALWAYS) {
-        fi->keep_cache = 1;
-    }
-    return 0;
-}
-
-static int do_create_nosecctx(fuse_req_t req, struct lo_inode *parent_inode,
-                               const char *name, mode_t mode,
-                               struct fuse_file_info *fi, int *open_fd,
-                              bool tmpfile)
-{
-    int err, fd;
-    struct lo_cred old = {};
-    struct lo_data *lo = lo_data(req);
-    int flags;
-
-    if (tmpfile) {
-        flags = fi->flags | O_TMPFILE;
-        /*
-         * Don't use O_EXCL as we want to link file later. Also reset O_CREAT
-         * otherwise openat() returns -EINVAL.
-         */
-        flags &= ~(O_CREAT | O_EXCL);
-
-        /* O_TMPFILE needs either O_RDWR or O_WRONLY */
-        if ((flags & O_ACCMODE) == O_RDONLY) {
-            flags |= O_RDWR;
-        }
-    } else {
-        flags = fi->flags | O_CREAT | O_EXCL;
-    }
-
-    err = lo_change_cred(req, &old, lo->change_umask);
-    if (err) {
-        return err;
-    }
-
-    /* Try to create a new file but don't open existing files */
-    fd = openat(parent_inode->fd, name, flags, mode);
-    err = fd == -1 ? errno : 0;
-    lo_restore_cred(&old, lo->change_umask);
-    if (!err) {
-        *open_fd = fd;
-    }
-    return err;
-}
-
-static int do_create_secctx_fscreate(fuse_req_t req,
-                                     struct lo_inode *parent_inode,
-                                     const char *name, mode_t mode,
-                                     struct fuse_file_info *fi, int *open_fd)
-{
-    int err = 0, fd = -1, fscreate_fd = -1;
-    struct lo_data *lo = lo_data(req);
-
-    err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen,
-                                 &fscreate_fd);
-    if (err) {
-        return err;
-    }
-
-    err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false);
-
-    close_reset_proc_fscreate(fscreate_fd);
-    if (!err) {
-        *open_fd = fd;
-    }
-    return err;
-}
-
-static int do_create_secctx_tmpfile(fuse_req_t req,
-                                    struct lo_inode *parent_inode,
-                                    const char *name, mode_t mode,
-                                    struct fuse_file_info *fi,
-                                    const char *secctx_name, int *open_fd)
-{
-    int err, fd = -1;
-    struct lo_data *lo = lo_data(req);
-    char procname[64];
-
-    err = do_create_nosecctx(req, parent_inode, ".", mode, fi, &fd, true);
-    if (err) {
-        return err;
-    }
-
-    err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0);
-    if (err) {
-        err = errno;
-        goto out;
-    }
-
-    /* Security context set on file. Link it in place */
-    sprintf(procname, "%d", fd);
-    FCHDIR_NOFAIL(lo->proc_self_fd);
-    err = linkat(AT_FDCWD, procname, parent_inode->fd, name,
-                 AT_SYMLINK_FOLLOW);
-    err = err == -1 ? errno : 0;
-    FCHDIR_NOFAIL(lo->root.fd);
-
-out:
-    if (!err) {
-        *open_fd = fd;
-    } else if (fd != -1) {
-        close(fd);
-    }
-    return err;
-}
-
-static int do_create_secctx_noatomic(fuse_req_t req,
-                                     struct lo_inode *parent_inode,
-                                     const char *name, mode_t mode,
-                                     struct fuse_file_info *fi,
-                                     const char *secctx_name, int *open_fd)
-{
-    int err = 0, fd = -1;
-
-    err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false);
-    if (err) {
-        goto out;
-    }
-
-    /* Set security context. This is not atomic w.r.t file creation */
-    err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0);
-    err = err == -1 ? errno : 0;
-out:
-    if (!err) {
-        *open_fd = fd;
-    } else {
-        if (fd != -1) {
-            close(fd);
-            unlinkat(parent_inode->fd, name, 0);
-        }
-    }
-    return err;
-}
-
-static int do_lo_create(fuse_req_t req, struct lo_inode *parent_inode,
-                        const char *name, mode_t mode,
-                        struct fuse_file_info *fi, int *open_fd)
-{
-    struct lo_data *lo = lo_data(req);
-    char *mapped_name = NULL;
-    int err;
-    const char *ctxname = req->secctx.name;
-    bool secctx_enabled = req->secctx.ctxlen;
-
-    if (secctx_enabled && lo->xattrmap) {
-        err = xattr_map_client(lo, req->secctx.name, &mapped_name);
-        if (err < 0) {
-            return -err;
-        }
-
-        ctxname = mapped_name;
-    }
-
-    if (secctx_enabled) {
-        /*
-         * If security.selinux has not been remapped and selinux is enabled,
-         * use fscreate to set context before file creation. If not, use
-         * tmpfile method for regular files. Otherwise fallback to
-         * non-atomic method of file creation and xattr setting.
-         */
-        if (!mapped_name && lo->use_fscreate) {
-            err = do_create_secctx_fscreate(req, parent_inode, name, mode, fi,
-                                            open_fd);
-            goto out;
-        } else if (S_ISREG(mode)) {
-            err = do_create_secctx_tmpfile(req, parent_inode, name, mode, fi,
-                                           ctxname, open_fd);
-            /*
-             * If filesystem does not support O_TMPFILE, fallback to non-atomic
-             * method.
-             */
-            if (!err || err != EOPNOTSUPP) {
-                goto out;
-            }
-        }
-
-        err = do_create_secctx_noatomic(req, parent_inode, name, mode, fi,
-                                        ctxname, open_fd);
-    } else {
-        err = do_create_nosecctx(req, parent_inode, name, mode, fi, open_fd,
-                                 false);
-    }
-
-out:
-    g_free(mapped_name);
-    return err;
-}
-
-static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
-                      mode_t mode, struct fuse_file_info *fi)
-{
-    int fd = -1;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *parent_inode;
-    struct lo_inode *inode = NULL;
-    struct fuse_entry_param e;
-    int err;
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)"
-             " kill_priv=%d\n", parent, name, fi->kill_priv);
-
-    if (!is_safe_path_component(name)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    parent_inode = lo_inode(req, parent);
-    if (!parent_inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    update_open_flags(lo->writeback, lo->allow_direct_io, fi);
-
-    err = do_lo_create(req, parent_inode, name, mode, fi, &fd);
-
-    /* Ignore the error if file exists and O_EXCL was not given */
-    if (err && (err != EEXIST || (fi->flags & O_EXCL))) {
-        goto out;
-    }
-
-    err = lo_do_lookup(req, parent, name, &e, &inode);
-    if (err) {
-        goto out;
-    }
-
-    err = lo_do_open(lo, inode, fd, fi);
-    fd = -1; /* lo_do_open() takes ownership of fd */
-    if (err) {
-        /* Undo lo_do_lookup() nlookup ref */
-        unref_inode_lolocked(lo, inode, 1);
-    }
-
-out:
-    lo_inode_put(lo, &inode);
-    lo_inode_put(lo, &parent_inode);
-
-    if (err) {
-        if (fd >= 0) {
-            close(fd);
-        }
-
-        fuse_reply_err(req, err);
-    } else {
-        fuse_reply_create(req, &e, fi);
-    }
-}
-
-/* Should be called with inode->plock_mutex held */
-static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo,
-                                                      struct lo_inode *inode,
-                                                      uint64_t lock_owner,
-                                                      pid_t pid, int *err)
-{
-    struct lo_inode_plock *plock;
-    int fd;
-
-    plock =
-        g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner));
-
-    if (plock) {
-        return plock;
-    }
-
-    plock = malloc(sizeof(struct lo_inode_plock));
-    if (!plock) {
-        *err = ENOMEM;
-        return NULL;
-    }
-
-    /* Open another instance of file which can be used for ofd locks. */
-    /* TODO: What if file is not writable? */
-    fd = lo_inode_open(lo, inode, O_RDWR);
-    if (fd < 0) {
-        *err = -fd;
-        free(plock);
-        return NULL;
-    }
-
-    plock->lock_owner = lock_owner;
-    plock->fd = fd;
-    g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner),
-                        plock);
-    return plock;
-}
-
-static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                     struct flock *lock)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode;
-    struct lo_inode_plock *plock;
-    int ret, saverr = 0;
-
-    fuse_log(FUSE_LOG_DEBUG,
-             "lo_getlk(ino=%" PRIu64 ", flags=%d)"
-             " owner=0x%" PRIx64 ", l_type=%d l_start=0x%" PRIx64
-             " l_len=0x%" PRIx64 "\n",
-             ino, fi->flags, fi->lock_owner, lock->l_type,
-             (uint64_t)lock->l_start, (uint64_t)lock->l_len);
-
-    if (!lo->posix_lock) {
-        fuse_reply_err(req, ENOSYS);
-        return;
-    }
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    pthread_mutex_lock(&inode->plock_mutex);
-    plock =
-        lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
-    if (!plock) {
-        saverr = ret;
-        goto out;
-    }
-
-    ret = fcntl(plock->fd, F_OFD_GETLK, lock);
-    if (ret == -1) {
-        saverr = errno;
-    }
-
-out:
-    pthread_mutex_unlock(&inode->plock_mutex);
-    lo_inode_put(lo, &inode);
-
-    if (saverr) {
-        fuse_reply_err(req, saverr);
-    } else {
-        fuse_reply_lock(req, lock);
-    }
-}
-
-static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                     struct flock *lock, int sleep)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode;
-    struct lo_inode_plock *plock;
-    int ret, saverr = 0;
-
-    fuse_log(FUSE_LOG_DEBUG,
-             "lo_setlk(ino=%" PRIu64 ", flags=%d)"
-             " cmd=%d pid=%d owner=0x%" PRIx64 " sleep=%d l_whence=%d"
-             " l_start=0x%" PRIx64 " l_len=0x%" PRIx64 "\n",
-             ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep,
-             lock->l_whence, (uint64_t)lock->l_start, (uint64_t)lock->l_len);
-
-    if (!lo->posix_lock) {
-        fuse_reply_err(req, ENOSYS);
-        return;
-    }
-
-    if (sleep) {
-        fuse_reply_err(req, EOPNOTSUPP);
-        return;
-    }
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    pthread_mutex_lock(&inode->plock_mutex);
-    plock =
-        lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
-
-    if (!plock) {
-        saverr = ret;
-        goto out;
-    }
-
-    /* TODO: Is it alright to modify flock? */
-    lock->l_pid = 0;
-    ret = fcntl(plock->fd, F_OFD_SETLK, lock);
-    if (ret == -1) {
-        saverr = errno;
-    }
-
-out:
-    pthread_mutex_unlock(&inode->plock_mutex);
-    lo_inode_put(lo, &inode);
-
-    fuse_reply_err(req, saverr);
-}
-
-static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
-                        struct fuse_file_info *fi)
-{
-    int res;
-    struct lo_dirp *d;
-    int fd;
-
-    (void)ino;
-
-    d = lo_dirp(req, fi);
-    if (!d) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    fd = dirfd(d->dp);
-    if (datasync) {
-        res = fdatasync(fd);
-    } else {
-        res = fsync(fd);
-    }
-
-    lo_dirp_put(&d);
-
-    fuse_reply_err(req, res == -1 ? errno : 0);
-}
-
-static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode = lo_inode(req, ino);
-    int err;
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d, kill_priv=%d)"
-             "\n", ino, fi->flags, fi->kill_priv);
-
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    err = lo_do_open(lo, inode, -1, fi);
-    lo_inode_put(lo, &inode);
-    if (err) {
-        fuse_reply_err(req, err);
-    } else {
-        fuse_reply_open(req, fi);
-    }
-}
-
-static void lo_release(fuse_req_t req, fuse_ino_t ino,
-                       struct fuse_file_info *fi)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_map_elem *elem;
-    int fd = -1;
-
-    (void)ino;
-
-    pthread_mutex_lock(&lo->mutex);
-    elem = lo_map_get(&lo->fd_map, fi->fh);
-    if (elem) {
-        fd = elem->fd;
-        elem = NULL;
-        lo_map_remove(&lo->fd_map, fi->fh);
-    }
-    pthread_mutex_unlock(&lo->mutex);
-
-    close(fd);
-    fuse_reply_err(req, 0);
-}
-
-static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
-{
-    int res;
-    (void)ino;
-    struct lo_inode *inode;
-    struct lo_data *lo = lo_data(req);
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    if (!S_ISREG(inode->filetype)) {
-        lo_inode_put(lo, &inode);
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    /* An fd is going away. Cleanup associated posix locks */
-    if (lo->posix_lock) {
-        pthread_mutex_lock(&inode->plock_mutex);
-        g_hash_table_remove(inode->posix_locks,
-            GUINT_TO_POINTER(fi->lock_owner));
-        pthread_mutex_unlock(&inode->plock_mutex);
-    }
-    res = close(dup(lo_fi_fd(req, fi)));
-    lo_inode_put(lo, &inode);
-    fuse_reply_err(req, res == -1 ? errno : 0);
-}
-
-static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
-                     struct fuse_file_info *fi)
-{
-    struct lo_inode *inode = lo_inode(req, ino);
-    struct lo_data *lo = lo_data(req);
-    int res;
-    int fd;
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino,
-             (void *)fi);
-
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    if (!fi) {
-        fd = lo_inode_open(lo, inode, O_RDWR);
-        if (fd < 0) {
-            res = -fd;
-            goto out;
-        }
-    } else {
-        fd = lo_fi_fd(req, fi);
-    }
-
-    if (datasync) {
-        res = fdatasync(fd) == -1 ? errno : 0;
-    } else {
-        res = fsync(fd) == -1 ? errno : 0;
-    }
-    if (!fi) {
-        close(fd);
-    }
-out:
-    lo_inode_put(lo, &inode);
-    fuse_reply_err(req, res);
-}
-
-static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset,
-                    struct fuse_file_info *fi)
-{
-    struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size);
-
-    fuse_log(FUSE_LOG_DEBUG,
-             "lo_read(ino=%" PRIu64 ", size=%zd, "
-             "off=%lu)\n",
-             ino, size, (unsigned long)offset);
-
-    buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
-    buf.buf[0].fd = lo_fi_fd(req, fi);
-    buf.buf[0].pos = offset;
-
-    fuse_reply_data(req, &buf);
-}
-
-static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
-                         struct fuse_bufvec *in_buf, off_t off,
-                         struct fuse_file_info *fi)
-{
-    (void)ino;
-    ssize_t res;
-    struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf));
-    bool cap_fsetid_dropped = false;
-
-    out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
-    out_buf.buf[0].fd = lo_fi_fd(req, fi);
-    out_buf.buf[0].pos = off;
-
-    fuse_log(FUSE_LOG_DEBUG,
-             "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu kill_priv=%d)\n",
-             ino, out_buf.buf[0].size, (unsigned long)off, fi->kill_priv);
-
-    res = drop_security_capability(lo_data(req), out_buf.buf[0].fd);
-    if (res) {
-        fuse_reply_err(req, res);
-        return;
-    }
-
-    /*
-     * If kill_priv is set, drop CAP_FSETID which should lead to kernel
-     * clearing setuid/setgid on file. Note, for WRITE, we need to do
-     * this even if killpriv_v2 is not enabled. fuse direct write path
-     * relies on this.
-     */
-    if (fi->kill_priv) {
-        res = drop_effective_cap("FSETID", &cap_fsetid_dropped);
-        if (res != 0) {
-            fuse_reply_err(req, res);
-            return;
-        }
-    }
-
-    res = fuse_buf_copy(&out_buf, in_buf);
-    if (res < 0) {
-        fuse_reply_err(req, -res);
-    } else {
-        fuse_reply_write(req, (size_t)res);
-    }
-
-    if (cap_fsetid_dropped) {
-        res = gain_effective_cap("FSETID");
-        if (res) {
-            fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
-        }
-    }
-}
-
-static void lo_statfs(fuse_req_t req, fuse_ino_t ino)
-{
-    int res;
-    struct statvfs stbuf;
-
-    res = fstatvfs(lo_fd(req, ino), &stbuf);
-    if (res == -1) {
-        fuse_reply_err(req, errno);
-    } else {
-        fuse_reply_statfs(req, &stbuf);
-    }
-}
-
-static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset,
-                         off_t length, struct fuse_file_info *fi)
-{
-    int err = EOPNOTSUPP;
-    (void)ino;
-
-#ifdef CONFIG_FALLOCATE
-    err = fallocate(lo_fi_fd(req, fi), mode, offset, length);
-    if (err < 0) {
-        err = errno;
-    }
-
-#elif defined(CONFIG_POSIX_FALLOCATE)
-    if (mode) {
-        fuse_reply_err(req, EOPNOTSUPP);
-        return;
-    }
-
-    err = posix_fallocate(lo_fi_fd(req, fi), offset, length);
-#endif
-
-    fuse_reply_err(req, err);
-}
-
-static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                     int op)
-{
-    int res;
-    (void)ino;
-
-    if (!(op & LOCK_NB)) {
-        /*
-         * Blocking flock can deadlock as there is only one thread
-         * serving the queue.
-         */
-        fuse_reply_err(req, EOPNOTSUPP);
-        return;
-    }
-
-    res = flock(lo_fi_fd(req, fi), op);
-
-    fuse_reply_err(req, res == -1 ? errno : 0);
-}
-
-/* types */
-/*
- * Exit; process attribute unmodified if matched.
- * An empty key applies to all.
- */
-#define XATTR_MAP_FLAG_OK      (1 <<  0)
-/*
- * The attribute is unwanted;
- * EPERM on write, hidden on read.
- */
-#define XATTR_MAP_FLAG_BAD     (1 <<  1)
-/*
- * For attr that start with 'key' prepend 'prepend'
- * 'key' may be empty to prepend for all attrs
- * key is defined from set/remove point of view.
- * Automatically reversed on read
- */
-#define XATTR_MAP_FLAG_PREFIX  (1 <<  2)
-/*
- * The attribute is unsupported;
- * ENOTSUP on write, hidden on read.
- */
-#define XATTR_MAP_FLAG_UNSUPPORTED     (1 <<  3)
-
-/* scopes */
-/* Apply rule to get/set/remove */
-#define XATTR_MAP_FLAG_CLIENT  (1 << 16)
-/* Apply rule to list */
-#define XATTR_MAP_FLAG_SERVER  (1 << 17)
-/* Apply rule to all */
-#define XATTR_MAP_FLAG_ALL   (XATTR_MAP_FLAG_SERVER | XATTR_MAP_FLAG_CLIENT)
-
-static void add_xattrmap_entry(struct lo_data *lo,
-                               const XattrMapEntry *new_entry)
-{
-    XattrMapEntry *res = g_realloc_n(lo->xattr_map_list,
-                                     lo->xattr_map_nentries + 1,
-                                     sizeof(XattrMapEntry));
-    res[lo->xattr_map_nentries++] = *new_entry;
-
-    lo->xattr_map_list = res;
-}
-
-static void free_xattrmap(struct lo_data *lo)
-{
-    XattrMapEntry *map = lo->xattr_map_list;
-    size_t i;
-
-    if (!map) {
-        return;
-    }
-
-    for (i = 0; i < lo->xattr_map_nentries; i++) {
-        g_free(map[i].key);
-        g_free(map[i].prepend);
-    };
-
-    g_free(map);
-    lo->xattr_map_list = NULL;
-    lo->xattr_map_nentries = -1;
-}
-
-/*
- * Handle the 'map' type, which is sugar for a set of commands
- * for the common case of prefixing a subset or everything,
- * and allowing anything not prefixed through.
- * It must be the last entry in the stream, although there
- * can be other entries before it.
- * The form is:
- *    :map:key:prefix:
- *
- * key maybe empty in which case all entries are prefixed.
- */
-static void parse_xattrmap_map(struct lo_data *lo,
-                               const char *rule, char sep)
-{
-    const char *tmp;
-    char *key;
-    char *prefix;
-    XattrMapEntry tmp_entry;
-
-    if (*rule != sep) {
-        fuse_log(FUSE_LOG_ERR,
-                 "%s: Expecting '%c' after 'map' keyword, found '%c'\n",
-                 __func__, sep, *rule);
-        exit(1);
-    }
-
-    rule++;
-
-    /* At start of 'key' field */
-    tmp = strchr(rule, sep);
-    if (!tmp) {
-        fuse_log(FUSE_LOG_ERR,
-                 "%s: Missing '%c' at end of key field in map rule\n",
-                 __func__, sep);
-        exit(1);
-    }
-
-    key = g_strndup(rule, tmp - rule);
-    rule = tmp + 1;
-
-    /* At start of prefix field */
-    tmp = strchr(rule, sep);
-    if (!tmp) {
-        fuse_log(FUSE_LOG_ERR,
-                 "%s: Missing '%c' at end of prefix field in map rule\n",
-                 __func__, sep);
-        exit(1);
-    }
-
-    prefix = g_strndup(rule, tmp - rule);
-    rule = tmp + 1;
-
-    /*
-     * This should be the end of the string, we don't allow
-     * any more commands after 'map'.
-     */
-    if (*rule) {
-        fuse_log(FUSE_LOG_ERR,
-                 "%s: Expecting end of command after map, found '%c'\n",
-                 __func__, *rule);
-        exit(1);
-    }
-
-    /* 1st: Prefix matches/everything */
-    tmp_entry.flags = XATTR_MAP_FLAG_PREFIX | XATTR_MAP_FLAG_ALL;
-    tmp_entry.key = g_strdup(key);
-    tmp_entry.prepend = g_strdup(prefix);
-    add_xattrmap_entry(lo, &tmp_entry);
-
-    if (!*key) {
-        /* Prefix all case */
-
-        /* 2nd: Hide any non-prefixed entries on the host */
-        tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_ALL;
-        tmp_entry.key = g_strdup("");
-        tmp_entry.prepend = g_strdup("");
-        add_xattrmap_entry(lo, &tmp_entry);
-    } else {
-        /* Prefix matching case */
-
-        /* 2nd: Hide non-prefixed but matching entries on the host */
-        tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_SERVER;
-        tmp_entry.key = g_strdup(""); /* Not used */
-        tmp_entry.prepend = g_strdup(key);
-        add_xattrmap_entry(lo, &tmp_entry);
-
-        /* 3rd: Stop the client accessing prefixed attributes directly */
-        tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_CLIENT;
-        tmp_entry.key = g_strdup(prefix);
-        tmp_entry.prepend = g_strdup(""); /* Not used */
-        add_xattrmap_entry(lo, &tmp_entry);
-
-        /* 4th: Everything else is OK */
-        tmp_entry.flags = XATTR_MAP_FLAG_OK | XATTR_MAP_FLAG_ALL;
-        tmp_entry.key = g_strdup("");
-        tmp_entry.prepend = g_strdup("");
-        add_xattrmap_entry(lo, &tmp_entry);
-    }
-
-    g_free(key);
-    g_free(prefix);
-}
-
-static void parse_xattrmap(struct lo_data *lo)
-{
-    const char *map = lo->xattrmap;
-    const char *tmp;
-    int ret;
-
-    lo->xattr_map_nentries = 0;
-    while (*map) {
-        XattrMapEntry tmp_entry;
-        char sep;
-
-        if (isspace(*map)) {
-            map++;
-            continue;
-        }
-        /* The separator is the first non-space of the rule */
-        sep = *map++;
-        if (!sep) {
-            break;
-        }
-
-        tmp_entry.flags = 0;
-        /* Start of 'type' */
-        if (strstart(map, "prefix", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_PREFIX;
-        } else if (strstart(map, "ok", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_OK;
-        } else if (strstart(map, "bad", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_BAD;
-        } else if (strstart(map, "unsupported", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_UNSUPPORTED;
-        } else if (strstart(map, "map", &map)) {
-            /*
-             * map is sugar that adds a number of rules, and must be
-             * the last entry.
-             */
-            parse_xattrmap_map(lo, map, sep);
-            break;
-        } else {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Unexpected type;"
-                     "Expecting 'prefix', 'ok', 'bad', 'unsupported' or 'map'"
-                     " in rule %zu\n", __func__, lo->xattr_map_nentries);
-            exit(1);
-        }
-
-        if (*map++ != sep) {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Missing '%c' at end of type field of rule %zu\n",
-                     __func__, sep, lo->xattr_map_nentries);
-            exit(1);
-        }
-
-        /* Start of 'scope' */
-        if (strstart(map, "client", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_CLIENT;
-        } else if (strstart(map, "server", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_SERVER;
-        } else if (strstart(map, "all", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_ALL;
-        } else {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Unexpected scope;"
-                     " Expecting 'client', 'server', or 'all', in rule %zu\n",
-                     __func__, lo->xattr_map_nentries);
-            exit(1);
-        }
-
-        if (*map++ != sep) {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Expecting '%c' found '%c'"
-                     " after scope in rule %zu\n",
-                     __func__, sep, *map, lo->xattr_map_nentries);
-            exit(1);
-        }
-
-        /* At start of 'key' field */
-        tmp = strchr(map, sep);
-        if (!tmp) {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Missing '%c' at end of key field of rule %zu",
-                     __func__, sep, lo->xattr_map_nentries);
-            exit(1);
-        }
-        tmp_entry.key = g_strndup(map, tmp - map);
-        map = tmp + 1;
-
-        /* At start of 'prepend' field */
-        tmp = strchr(map, sep);
-        if (!tmp) {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Missing '%c' at end of prepend field of rule %zu",
-                     __func__, sep, lo->xattr_map_nentries);
-            exit(1);
-        }
-        tmp_entry.prepend = g_strndup(map, tmp - map);
-        map = tmp + 1;
-
-        add_xattrmap_entry(lo, &tmp_entry);
-        /* End of rule - go around again for another rule */
-    }
-
-    if (!lo->xattr_map_nentries) {
-        fuse_log(FUSE_LOG_ERR, "Empty xattr map\n");
-        exit(1);
-    }
-
-    ret = xattr_map_client(lo, "security.capability",
-                           &lo->xattr_security_capability);
-    if (ret) {
-        fuse_log(FUSE_LOG_ERR, "Failed to map security.capability: %s\n",
-                strerror(ret));
-        exit(1);
-    }
-    if (!lo->xattr_security_capability ||
-        !strcmp(lo->xattr_security_capability, "security.capability")) {
-        /* 1-1 mapping, don't need to do anything */
-        free(lo->xattr_security_capability);
-        lo->xattr_security_capability = NULL;
-    }
-}
-
-/*
- * For use with getxattr/setxattr/removexattr, where the client
- * gives us a name and we may need to choose a different one.
- * Allocates a buffer for the result placing it in *out_name.
- *   If there's no change then *out_name is not set.
- * Returns 0 on success
- * Can return -EPERM to indicate we block a given attribute
- *   (in which case out_name is not allocated)
- * Can return -ENOMEM to indicate out_name couldn't be allocated.
- */
-static int xattr_map_client(const struct lo_data *lo, const char *client_name,
-                            char **out_name)
-{
-    size_t i;
-    for (i = 0; i < lo->xattr_map_nentries; i++) {
-        const XattrMapEntry *cur_entry = lo->xattr_map_list + i;
-
-        if ((cur_entry->flags & XATTR_MAP_FLAG_CLIENT) &&
-            (strstart(client_name, cur_entry->key, NULL))) {
-            if (cur_entry->flags & XATTR_MAP_FLAG_BAD) {
-                return -EPERM;
-            }
-            if (cur_entry->flags & XATTR_MAP_FLAG_UNSUPPORTED) {
-                return -ENOTSUP;
-            }
-            if (cur_entry->flags & XATTR_MAP_FLAG_OK) {
-                /* Unmodified name */
-                return 0;
-            }
-            if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) {
-                *out_name = g_try_malloc(strlen(client_name) +
-                                         strlen(cur_entry->prepend) + 1);
-                if (!*out_name) {
-                    return -ENOMEM;
-                }
-                sprintf(*out_name, "%s%s", cur_entry->prepend, client_name);
-                return 0;
-            }
-        }
-    }
-
-    return -EPERM;
-}
-
-/*
- * For use with listxattr where the server fs gives us a name and we may need
- * to sanitize this for the client.
- * Returns a pointer to the result in *out_name
- *   This is always the original string or the current string with some prefix
- *   removed; no reallocation is done.
- * Returns 0 on success
- * Can return -ENODATA to indicate the name should be dropped from the list.
- */
-static int xattr_map_server(const struct lo_data *lo, const char *server_name,
-                            const char **out_name)
-{
-    size_t i;
-    const char *end;
-
-    for (i = 0; i < lo->xattr_map_nentries; i++) {
-        const XattrMapEntry *cur_entry = lo->xattr_map_list + i;
-
-        if ((cur_entry->flags & XATTR_MAP_FLAG_SERVER) &&
-            (strstart(server_name, cur_entry->prepend, &end))) {
-            if (cur_entry->flags & XATTR_MAP_FLAG_BAD ||
-                cur_entry->flags & XATTR_MAP_FLAG_UNSUPPORTED) {
-                return -ENODATA;
-            }
-            if (cur_entry->flags & XATTR_MAP_FLAG_OK) {
-                *out_name = server_name;
-                return 0;
-            }
-            if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) {
-                /* Remove prefix */
-                *out_name = end;
-                return 0;
-            }
-        }
-    }
-
-    return -ENODATA;
-}
-
-static bool block_xattr(struct lo_data *lo, const char *name)
-{
-    /*
-     * If user explicitly enabled posix_acl or did not provide any option,
-     * do not block acl. Otherwise block system.posix_acl_access and
-     * system.posix_acl_default xattrs.
-     */
-    if (lo->user_posix_acl) {
-        return false;
-    }
-    if (!strcmp(name, "system.posix_acl_access") ||
-        !strcmp(name, "system.posix_acl_default"))
-            return true;
-
-    return false;
-}
-
-/*
- * Returns number of bytes in xattr_list after filtering on success. This
- * could be zero as well if nothing is left after filtering.
- *
- * Returns negative error code on failure.
- * xattr_list is modified in place.
- */
-static int remove_blocked_xattrs(struct lo_data *lo, char *xattr_list,
-                                 unsigned in_size)
-{
-    size_t out_index, in_index;
-
-    /*
-     * As of now we only filter out acl xattrs. If acls are enabled or
-     * they have not been explicitly disabled, there is nothing to
-     * filter.
-     */
-    if (lo->user_posix_acl) {
-        return in_size;
-    }
-
-    out_index = 0;
-    in_index = 0;
-    while (in_index < in_size) {
-        char *in_ptr = xattr_list + in_index;
-
-        /* Length of current attribute name */
-        size_t in_len = strlen(xattr_list + in_index) + 1;
-
-        if (!block_xattr(lo, in_ptr)) {
-            if (in_index != out_index) {
-                memmove(xattr_list + out_index, xattr_list + in_index, in_len);
-            }
-            out_index += in_len;
-        }
-        in_index += in_len;
-     }
-    return out_index;
-}
-
-static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
-                        size_t size)
-{
-    struct lo_data *lo = lo_data(req);
-    g_autofree char *value = NULL;
-    char procname[64];
-    const char *name;
-    char *mapped_name;
-    struct lo_inode *inode;
-    ssize_t ret;
-    int saverr;
-    int fd = -1;
-
-    if (block_xattr(lo, in_name)) {
-        fuse_reply_err(req, EOPNOTSUPP);
-        return;
-    }
-
-    mapped_name = NULL;
-    name = in_name;
-    if (lo->xattrmap) {
-        ret = xattr_map_client(lo, in_name, &mapped_name);
-        if (ret < 0) {
-            if (ret == -EPERM) {
-                ret = -ENODATA;
-            }
-            fuse_reply_err(req, -ret);
-            return;
-        }
-        if (mapped_name) {
-            name = mapped_name;
-        }
-    }
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        g_free(mapped_name);
-        return;
-    }
-
-    saverr = ENOSYS;
-    if (!lo_data(req)->xattr) {
-        goto out;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n",
-             ino, name, size);
-
-    if (size) {
-        value = g_try_malloc(size);
-        if (!value) {
-            goto out_err;
-        }
-    }
-
-    sprintf(procname, "%i", inode->fd);
-    /*
-     * It is not safe to open() non-regular/non-dir files in file server
-     * unless O_PATH is used, so use that method for regular files/dir
-     * only (as it seems giving less performance overhead).
-     * Otherwise, call fchdir() to avoid open().
-     */
-    if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
-        fd = openat(lo->proc_self_fd, procname, O_RDONLY);
-        if (fd < 0) {
-            goto out_err;
-        }
-        ret = fgetxattr(fd, name, value, size);
-        saverr = ret == -1 ? errno : 0;
-    } else {
-        /* fchdir should not fail here */
-        FCHDIR_NOFAIL(lo->proc_self_fd);
-        ret = getxattr(procname, name, value, size);
-        saverr = ret == -1 ? errno : 0;
-        FCHDIR_NOFAIL(lo->root.fd);
-    }
-
-    if (ret == -1) {
-        goto out;
-    }
-    if (size) {
-        saverr = 0;
-        if (ret == 0) {
-            goto out;
-        }
-        fuse_reply_buf(req, value, ret);
-    } else {
-        fuse_reply_xattr(req, ret);
-    }
-out_free:
-    if (fd >= 0) {
-        close(fd);
-    }
-
-    lo_inode_put(lo, &inode);
-    return;
-
-out_err:
-    saverr = errno;
-out:
-    fuse_reply_err(req, saverr);
-    g_free(mapped_name);
-    goto out_free;
-}
-
-static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
-{
-    struct lo_data *lo = lo_data(req);
-    g_autofree char *value = NULL;
-    char procname[64];
-    struct lo_inode *inode;
-    ssize_t ret;
-    int saverr;
-    int fd = -1;
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    saverr = ENOSYS;
-    if (!lo_data(req)->xattr) {
-        goto out;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino,
-             size);
-
-    if (size) {
-        value = g_try_malloc(size);
-        if (!value) {
-            goto out_err;
-        }
-    }
-
-    sprintf(procname, "%i", inode->fd);
-    if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
-        fd = openat(lo->proc_self_fd, procname, O_RDONLY);
-        if (fd < 0) {
-            goto out_err;
-        }
-        ret = flistxattr(fd, value, size);
-        saverr = ret == -1 ? errno : 0;
-    } else {
-        /* fchdir should not fail here */
-        FCHDIR_NOFAIL(lo->proc_self_fd);
-        ret = listxattr(procname, value, size);
-        saverr = ret == -1 ? errno : 0;
-        FCHDIR_NOFAIL(lo->root.fd);
-    }
-
-    if (ret == -1) {
-        goto out;
-    }
-    if (size) {
-        saverr = 0;
-        if (ret == 0) {
-            goto out;
-        }
-
-        if (lo->xattr_map_list) {
-            /*
-             * Map the names back, some attributes might be dropped,
-             * some shortened, but not increased, so we shouldn't
-             * run out of room.
-             */
-            size_t out_index, in_index;
-            out_index = 0;
-            in_index = 0;
-            while (in_index < ret) {
-                const char *map_out;
-                char *in_ptr = value + in_index;
-                /* Length of current attribute name */
-                size_t in_len = strlen(value + in_index) + 1;
-
-                int mapret = xattr_map_server(lo, in_ptr, &map_out);
-                if (mapret != -ENODATA && mapret != 0) {
-                    /* Shouldn't happen */
-                    saverr = -mapret;
-                    goto out;
-                }
-                if (mapret == 0) {
-                    /* Either unchanged, or truncated */
-                    size_t out_len;
-                    if (map_out != in_ptr) {
-                        /* +1 copies the NIL */
-                        out_len = strlen(map_out) + 1;
-                    } else {
-                        /* No change */
-                        out_len = in_len;
-                    }
-                    /*
-                     * Move result along, may still be needed for an unchanged
-                     * entry if a previous entry was changed.
-                     */
-                    memmove(value + out_index, map_out, out_len);
-
-                    out_index += out_len;
-                }
-                in_index += in_len;
-            }
-            ret = out_index;
-            if (ret == 0) {
-                goto out;
-            }
-        }
-
-        ret = remove_blocked_xattrs(lo, value, ret);
-        if (ret <= 0) {
-            saverr = -ret;
-            goto out;
-        }
-        fuse_reply_buf(req, value, ret);
-    } else {
-        /*
-         * xattrmap only ever shortens the result,
-         * so we don't need to do anything clever with the
-         * allocation length here.
-         */
-        fuse_reply_xattr(req, ret);
-    }
-out_free:
-    if (fd >= 0) {
-        close(fd);
-    }
-
-    lo_inode_put(lo, &inode);
-    return;
-
-out_err:
-    saverr = errno;
-out:
-    fuse_reply_err(req, saverr);
-    goto out_free;
-}
-
-static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
-                        const char *value, size_t size, int flags,
-                        uint32_t extra_flags)
-{
-    char procname[64];
-    const char *name;
-    char *mapped_name;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode;
-    ssize_t ret;
-    int saverr;
-    int fd = -1;
-    bool switched_creds = false;
-    bool cap_fsetid_dropped = false;
-    struct lo_cred old = {};
-
-    if (block_xattr(lo, in_name)) {
-        fuse_reply_err(req, EOPNOTSUPP);
-        return;
-    }
-
-    mapped_name = NULL;
-    name = in_name;
-    if (lo->xattrmap) {
-        ret = xattr_map_client(lo, in_name, &mapped_name);
-        if (ret < 0) {
-            fuse_reply_err(req, -ret);
-            return;
-        }
-        if (mapped_name) {
-            name = mapped_name;
-        }
-    }
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        g_free(mapped_name);
-        return;
-    }
-
-    saverr = ENOSYS;
-    if (!lo_data(req)->xattr) {
-        goto out;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64
-             ", name=%s value=%s size=%zd)\n", ino, name, value, size);
-
-    sprintf(procname, "%i", inode->fd);
-    /*
-     * If we are setting posix access acl and if SGID needs to be
-     * cleared, then switch to caller's gid and drop CAP_FSETID
-     * and that should make sure host kernel clears SGID.
-     *
-     * This probably will not work when we support idmapped mounts.
-     * In that case we will need to find a non-root gid and switch
-     * to it. (Instead of gid in request). Fix it when we support
-     * idmapped mounts.
-     */
-    if (lo->posix_acl && !strcmp(name, "system.posix_acl_access")
-        && (extra_flags & FUSE_SETXATTR_ACL_KILL_SGID)) {
-        ret = lo_drop_cap_change_cred(req, &old, false, "FSETID",
-                                      &cap_fsetid_dropped);
-        if (ret) {
-            saverr = ret;
-            goto out;
-        }
-        switched_creds = true;
-    }
-    if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
-        fd = openat(lo->proc_self_fd, procname, O_RDONLY);
-        if (fd < 0) {
-            saverr = errno;
-            goto out;
-        }
-        ret = fsetxattr(fd, name, value, size, flags);
-        saverr = ret == -1 ? errno : 0;
-    } else {
-        /* fchdir should not fail here */
-        FCHDIR_NOFAIL(lo->proc_self_fd);
-        ret = setxattr(procname, name, value, size, flags);
-        saverr = ret == -1 ? errno : 0;
-        FCHDIR_NOFAIL(lo->root.fd);
-    }
-    if (switched_creds) {
-        if (cap_fsetid_dropped)
-            lo_restore_cred_gain_cap(&old, false, "FSETID");
-        else
-            lo_restore_cred(&old, false);
-    }
-
-out:
-    if (fd >= 0) {
-        close(fd);
-    }
-
-    lo_inode_put(lo, &inode);
-    g_free(mapped_name);
-    fuse_reply_err(req, saverr);
-}
-
-static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *in_name)
-{
-    char procname[64];
-    const char *name;
-    char *mapped_name;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode;
-    ssize_t ret;
-    int saverr;
-    int fd = -1;
-
-    if (block_xattr(lo, in_name)) {
-        fuse_reply_err(req, EOPNOTSUPP);
-        return;
-    }
-
-    mapped_name = NULL;
-    name = in_name;
-    if (lo->xattrmap) {
-        ret = xattr_map_client(lo, in_name, &mapped_name);
-        if (ret < 0) {
-            fuse_reply_err(req, -ret);
-            return;
-        }
-        if (mapped_name) {
-            name = mapped_name;
-        }
-    }
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        g_free(mapped_name);
-        return;
-    }
-
-    saverr = ENOSYS;
-    if (!lo_data(req)->xattr) {
-        goto out;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino,
-             name);
-
-    sprintf(procname, "%i", inode->fd);
-    if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
-        fd = openat(lo->proc_self_fd, procname, O_RDONLY);
-        if (fd < 0) {
-            saverr = errno;
-            goto out;
-        }
-        ret = fremovexattr(fd, name);
-        saverr = ret == -1 ? errno : 0;
-    } else {
-        /* fchdir should not fail here */
-        FCHDIR_NOFAIL(lo->proc_self_fd);
-        ret = removexattr(procname, name);
-        saverr = ret == -1 ? errno : 0;
-        FCHDIR_NOFAIL(lo->root.fd);
-    }
-
-out:
-    if (fd >= 0) {
-        close(fd);
-    }
-
-    lo_inode_put(lo, &inode);
-    g_free(mapped_name);
-    fuse_reply_err(req, saverr);
-}
-
-#ifdef HAVE_COPY_FILE_RANGE
-static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in,
-                               struct fuse_file_info *fi_in, fuse_ino_t ino_out,
-                               off_t off_out, struct fuse_file_info *fi_out,
-                               size_t len, int flags)
-{
-    int in_fd, out_fd;
-    ssize_t res;
-
-    in_fd = lo_fi_fd(req, fi_in);
-    out_fd = lo_fi_fd(req, fi_out);
-
-    fuse_log(FUSE_LOG_DEBUG,
-             "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, "
-             "off=%ju, ino=%" PRIu64 "/fd=%d, "
-             "off=%ju, size=%zd, flags=0x%x)\n",
-             ino_in, in_fd, (intmax_t)off_in,
-             ino_out, out_fd, (intmax_t)off_out, len, flags);
-
-    res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags);
-    if (res < 0) {
-        fuse_reply_err(req, errno);
-    } else {
-        fuse_reply_write(req, res);
-    }
-}
-#endif
-
-static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
-                     struct fuse_file_info *fi)
-{
-    off_t res;
-
-    (void)ino;
-    res = lseek(lo_fi_fd(req, fi), off, whence);
-    if (res != -1) {
-        fuse_reply_lseek(req, res);
-    } else {
-        fuse_reply_err(req, errno);
-    }
-}
-
-static int lo_do_syncfs(struct lo_data *lo, struct lo_inode *inode)
-{
-    int fd, ret = 0;
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_do_syncfs(ino=%" PRIu64 ")\n",
-             inode->fuse_ino);
-
-    fd = lo_inode_open(lo, inode, O_RDONLY);
-    if (fd < 0) {
-        return -fd;
-    }
-
-    if (syncfs(fd) < 0) {
-        ret = errno;
-    }
-
-    close(fd);
-    return ret;
-}
-
-static void lo_syncfs(fuse_req_t req, fuse_ino_t ino)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode = lo_inode(req, ino);
-    int err;
-
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    err = lo_do_syncfs(lo, inode);
-    lo_inode_put(lo, &inode);
-
-    /*
-     * If submounts aren't announced, the client only sends a request to
-     * sync the root inode. TODO: Track submounts internally and iterate
-     * over them as well.
-     */
-
-    fuse_reply_err(req, err);
-}
-
-static void lo_destroy(void *userdata)
-{
-    struct lo_data *lo = (struct lo_data *)userdata;
-
-    pthread_mutex_lock(&lo->mutex);
-    while (true) {
-        GHashTableIter iter;
-        gpointer key, value;
-
-        g_hash_table_iter_init(&iter, lo->inodes);
-        if (!g_hash_table_iter_next(&iter, &key, &value)) {
-            break;
-        }
-
-        struct lo_inode *inode = value;
-        unref_inode(lo, inode, inode->nlookup);
-    }
-    pthread_mutex_unlock(&lo->mutex);
-}
-
-static struct fuse_lowlevel_ops lo_oper = {
-    .init = lo_init,
-    .lookup = lo_lookup,
-    .mkdir = lo_mkdir,
-    .mknod = lo_mknod,
-    .symlink = lo_symlink,
-    .link = lo_link,
-    .unlink = lo_unlink,
-    .rmdir = lo_rmdir,
-    .rename = lo_rename,
-    .forget = lo_forget,
-    .forget_multi = lo_forget_multi,
-    .getattr = lo_getattr,
-    .setattr = lo_setattr,
-    .readlink = lo_readlink,
-    .opendir = lo_opendir,
-    .readdir = lo_readdir,
-    .readdirplus = lo_readdirplus,
-    .releasedir = lo_releasedir,
-    .fsyncdir = lo_fsyncdir,
-    .create = lo_create,
-    .getlk = lo_getlk,
-    .setlk = lo_setlk,
-    .open = lo_open,
-    .release = lo_release,
-    .flush = lo_flush,
-    .fsync = lo_fsync,
-    .read = lo_read,
-    .write_buf = lo_write_buf,
-    .statfs = lo_statfs,
-    .fallocate = lo_fallocate,
-    .flock = lo_flock,
-    .getxattr = lo_getxattr,
-    .listxattr = lo_listxattr,
-    .setxattr = lo_setxattr,
-    .removexattr = lo_removexattr,
-#ifdef HAVE_COPY_FILE_RANGE
-    .copy_file_range = lo_copy_file_range,
-#endif
-    .lseek = lo_lseek,
-    .syncfs = lo_syncfs,
-    .destroy = lo_destroy,
-};
-
-/* Print vhost-user.json backend program capabilities */
-static void print_capabilities(void)
-{
-    printf("{\n");
-    printf("  \"type\": \"fs\"\n");
-    printf("}\n");
-}
-
-/*
- * Drop all Linux capabilities because the wait parent process only needs to
- * sit in waitpid(2) and terminate.
- */
-static void setup_wait_parent_capabilities(void)
-{
-    capng_setpid(syscall(SYS_gettid));
-    capng_clear(CAPNG_SELECT_BOTH);
-    capng_apply(CAPNG_SELECT_BOTH);
-}
-
-/*
- * Move to a new mount, net, and pid namespaces to isolate this process.
- */
-static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
-{
-    pid_t child;
-
-    /*
-     * Create a new pid namespace for *child* processes.  We'll have to
-     * fork in order to enter the new pid namespace.  A new mount namespace
-     * is also needed so that we can remount /proc for the new pid
-     * namespace.
-     *
-     * Our UNIX domain sockets have been created.  Now we can move to
-     * an empty network namespace to prevent TCP/IP and other network
-     * activity in case this process is compromised.
-     */
-    if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) {
-        fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n");
-        exit(1);
-    }
-
-    child = fork();
-    if (child < 0) {
-        fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n");
-        exit(1);
-    }
-    if (child > 0) {
-        pid_t waited;
-        int wstatus;
-
-        setup_wait_parent_capabilities();
-
-        /* The parent waits for the child */
-        do {
-            waited = waitpid(child, &wstatus, 0);
-        } while (waited < 0 && errno == EINTR && !se->exited);
-
-        /* We were terminated by a signal, see fuse_signals.c */
-        if (se->exited) {
-            exit(0);
-        }
-
-        if (WIFEXITED(wstatus)) {
-            exit(WEXITSTATUS(wstatus));
-        }
-
-        exit(1);
-    }
-
-    /* Send us SIGTERM when the parent thread terminates, see prctl(2) */
-    prctl(PR_SET_PDEATHSIG, SIGTERM);
-
-    /*
-     * If the mounts have shared propagation then we want to opt out so our
-     * mount changes don't affect the parent mount namespace.
-     */
-    if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) {
-        fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n");
-        exit(1);
-    }
-
-    /* The child must remount /proc to use the new pid namespace */
-    if (mount("proc", "/proc", "proc",
-              MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) {
-        fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n");
-        exit(1);
-    }
-
-    /* Get the /proc/self/task descriptor */
-    lo->proc_self_task = open("/proc/self/task/", O_PATH);
-    if (lo->proc_self_task == -1) {
-        fuse_log(FUSE_LOG_ERR, "open(/proc/self/task, O_PATH): %m\n");
-        exit(1);
-    }
-
-    lo->use_fscreate = is_fscreate_usable(lo);
-
-    /*
-     * We only need /proc/self/fd. Prevent ".." from accessing parent
-     * directories of /proc/self/fd by bind-mounting it over /proc. Since / was
-     * previously remounted with MS_REC | MS_SLAVE this mount change only
-     * affects our process.
-     */
-    if (mount("/proc/self/fd", "/proc", NULL, MS_BIND, NULL) < 0) {
-        fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, MS_BIND): %m\n");
-        exit(1);
-    }
-
-    /* Get the /proc (actually /proc/self/fd, see above) file descriptor */
-    lo->proc_self_fd = open("/proc", O_PATH);
-    if (lo->proc_self_fd == -1) {
-        fuse_log(FUSE_LOG_ERR, "open(/proc, O_PATH): %m\n");
-        exit(1);
-    }
-}
-
-/*
- * Capture the capability state, we'll need to restore this for individual
- * threads later; see load_capng.
- */
-static void setup_capng(void)
-{
-    /* Note this accesses /proc so has to happen before the sandbox */
-    if (capng_get_caps_process()) {
-        fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n");
-        exit(1);
-    }
-    pthread_mutex_init(&cap.mutex, NULL);
-    pthread_mutex_lock(&cap.mutex);
-    cap.saved = capng_save_state();
-    if (!cap.saved) {
-        fuse_log(FUSE_LOG_ERR, "capng_save_state\n");
-        exit(1);
-    }
-    pthread_mutex_unlock(&cap.mutex);
-}
-
-static void cleanup_capng(void)
-{
-    free(cap.saved);
-    cap.saved = NULL;
-    pthread_mutex_destroy(&cap.mutex);
-}
-
-
-/*
- * Make the source directory our root so symlinks cannot escape and no other
- * files are accessible.  Assumes unshare(CLONE_NEWNS) was already called.
- */
-static void setup_mounts(const char *source)
-{
-    int oldroot;
-    int newroot;
-
-    if (mount(source, source, NULL, MS_BIND | MS_REC, NULL) < 0) {
-        fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source);
-        exit(1);
-    }
-
-    /* This magic is based on lxc's lxc_pivot_root() */
-    oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
-    if (oldroot < 0) {
-        fuse_log(FUSE_LOG_ERR, "open(/): %m\n");
-        exit(1);
-    }
-
-    newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
-    if (newroot < 0) {
-        fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source);
-        exit(1);
-    }
-
-    if (fchdir(newroot) < 0) {
-        fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
-        exit(1);
-    }
-
-    if (syscall(__NR_pivot_root, ".", ".") < 0) {
-        fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n");
-        exit(1);
-    }
-
-    if (fchdir(oldroot) < 0) {
-        fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n");
-        exit(1);
-    }
-
-    if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) {
-        fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n");
-        exit(1);
-    }
-
-    if (umount2(".", MNT_DETACH) < 0) {
-        fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n");
-        exit(1);
-    }
-
-    if (fchdir(newroot) < 0) {
-        fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
-        exit(1);
-    }
-
-    close(newroot);
-    close(oldroot);
-}
-
-/*
- * Only keep capabilities in allowlist that are needed for file system operation
- * The (possibly NULL) modcaps_in string passed in is free'd before exit.
- */
-static void setup_capabilities(char *modcaps_in)
-{
-    char *modcaps = modcaps_in;
-    pthread_mutex_lock(&cap.mutex);
-    capng_restore_state(&cap.saved);
-
-    /*
-     * Add to allowlist file system-related capabilities that are needed for a
-     * file server to act like root.  Drop everything else like networking and
-     * sysadmin capabilities.
-     *
-     * Exclusions:
-     * 1. CAP_LINUX_IMMUTABLE is not included because it's only used via ioctl
-     *    and we don't support that.
-     * 2. CAP_MAC_OVERRIDE is not included because it only seems to be
-     *    used by the Smack LSM.  Omit it until there is demand for it.
-     */
-    capng_setpid(syscall(SYS_gettid));
-    capng_clear(CAPNG_SELECT_BOTH);
-    if (capng_updatev(CAPNG_ADD, CAPNG_PERMITTED | CAPNG_EFFECTIVE,
-            CAP_CHOWN,
-            CAP_DAC_OVERRIDE,
-            CAP_FOWNER,
-            CAP_FSETID,
-            CAP_SETGID,
-            CAP_SETUID,
-            CAP_MKNOD,
-            CAP_SETFCAP,
-            -1)) {
-        fuse_log(FUSE_LOG_ERR, "%s: capng_updatev failed\n", __func__);
-        exit(1);
-    }
-
-    /*
-     * The modcaps option is a colon separated list of caps,
-     * each preceded by either + or -.
-     */
-    while (modcaps) {
-        capng_act_t action;
-        int cap;
-
-        char *next = strchr(modcaps, ':');
-        if (next) {
-            *next = '\0';
-            next++;
-        }
-
-        switch (modcaps[0]) {
-        case '+':
-            action = CAPNG_ADD;
-            break;
-
-        case '-':
-            action = CAPNG_DROP;
-            break;
-
-        default:
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Expecting '+'/'-' in modcaps but found '%c'\n",
-                     __func__, modcaps[0]);
-            exit(1);
-        }
-        cap = capng_name_to_capability(modcaps + 1);
-        if (cap < 0) {
-            fuse_log(FUSE_LOG_ERR, "%s: Unknown capability '%s'\n", __func__,
-                     modcaps);
-            exit(1);
-        }
-        if (capng_update(action, CAPNG_PERMITTED | CAPNG_EFFECTIVE, cap)) {
-            fuse_log(FUSE_LOG_ERR, "%s: capng_update failed for '%s'\n",
-                     __func__, modcaps);
-            exit(1);
-        }
-
-        modcaps = next;
-    }
-    g_free(modcaps_in);
-
-    if (capng_apply(CAPNG_SELECT_BOTH)) {
-        fuse_log(FUSE_LOG_ERR, "%s: capng_apply failed\n", __func__);
-        exit(1);
-    }
-
-    cap.saved = capng_save_state();
-    if (!cap.saved) {
-        fuse_log(FUSE_LOG_ERR, "%s: capng_save_state failed\n", __func__);
-        exit(1);
-    }
-    pthread_mutex_unlock(&cap.mutex);
-}
-
-/*
- * Use chroot as a weaker sandbox for environments where the process is
- * launched without CAP_SYS_ADMIN.
- */
-static void setup_chroot(struct lo_data *lo)
-{
-    lo->proc_self_fd = open("/proc/self/fd", O_PATH);
-    if (lo->proc_self_fd == -1) {
-        fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/fd\", O_PATH): %m\n");
-        exit(1);
-    }
-
-    lo->proc_self_task = open("/proc/self/task", O_PATH);
-    if (lo->proc_self_fd == -1) {
-        fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/task\", O_PATH): %m\n");
-        exit(1);
-    }
-
-    lo->use_fscreate = is_fscreate_usable(lo);
-
-    /*
-     * Make the shared directory the file system root so that FUSE_OPEN
-     * (lo_open()) cannot escape the shared directory by opening a symlink.
-     *
-     * The chroot(2) syscall is later disabled by seccomp and the
-     * CAP_SYS_CHROOT capability is dropped so that tampering with the chroot
-     * is not possible.
-     *
-     * However, it's still possible to escape the chroot via lo->proc_self_fd
-     * but that requires first gaining control of the process.
-     */
-    if (chroot(lo->source) != 0) {
-        fuse_log(FUSE_LOG_ERR, "chroot(\"%s\"): %m\n", lo->source);
-        exit(1);
-    }
-
-    /* Move into the chroot */
-    if (chdir("/") != 0) {
-        fuse_log(FUSE_LOG_ERR, "chdir(\"/\"): %m\n");
-        exit(1);
-    }
-}
-
-/*
- * Lock down this process to prevent access to other processes or files outside
- * source directory.  This reduces the impact of arbitrary code execution bugs.
- */
-static void setup_sandbox(struct lo_data *lo, struct fuse_session *se,
-                          bool enable_syslog)
-{
-    if (lo->sandbox == SANDBOX_NAMESPACE) {
-        setup_namespaces(lo, se);
-        setup_mounts(lo->source);
-    } else {
-        setup_chroot(lo);
-    }
-
-    setup_seccomp(enable_syslog);
-    setup_capabilities(g_strdup(lo->modcaps));
-}
-
-/* Set the maximum number of open file descriptors */
-static void setup_nofile_rlimit(unsigned long rlimit_nofile)
-{
-    struct rlimit rlim = {
-        .rlim_cur = rlimit_nofile,
-        .rlim_max = rlimit_nofile,
-    };
-
-    if (rlimit_nofile == 0) {
-        return; /* nothing to do */
-    }
-
-    if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {
-        /* Ignore SELinux denials */
-        if (errno == EPERM) {
-            return;
-        }
-
-        fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n");
-        exit(1);
-    }
-}
-
-G_GNUC_PRINTF(2, 0)
-static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
-{
-    g_autofree char *localfmt = NULL;
-    char buf[64];
-
-    if (current_log_level < level) {
-        return;
-    }
-
-    if (current_log_level == FUSE_LOG_DEBUG) {
-        if (use_syslog) {
-            /* no timestamp needed */
-            localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid),
-                                       fmt);
-        } else {
-            g_autoptr(GDateTime) now = g_date_time_new_now_utc();
-            g_autofree char *nowstr = g_date_time_format(now,
-                                       "%Y-%m-%d %H:%M:%S.%%06d%z");
-            snprintf(buf, 64, nowstr, g_date_time_get_microsecond(now));
-            localfmt = g_strdup_printf("[%s] [ID: %08ld] %s",
-                                       buf, syscall(__NR_gettid), fmt);
-        }
-        fmt = localfmt;
-    }
-
-    if (use_syslog) {
-        int priority = LOG_ERR;
-        switch (level) {
-        case FUSE_LOG_EMERG:
-            priority = LOG_EMERG;
-            break;
-        case FUSE_LOG_ALERT:
-            priority = LOG_ALERT;
-            break;
-        case FUSE_LOG_CRIT:
-            priority = LOG_CRIT;
-            break;
-        case FUSE_LOG_ERR:
-            priority = LOG_ERR;
-            break;
-        case FUSE_LOG_WARNING:
-            priority = LOG_WARNING;
-            break;
-        case FUSE_LOG_NOTICE:
-            priority = LOG_NOTICE;
-            break;
-        case FUSE_LOG_INFO:
-            priority = LOG_INFO;
-            break;
-        case FUSE_LOG_DEBUG:
-            priority = LOG_DEBUG;
-            break;
-        }
-        vsyslog(priority, fmt, ap);
-    } else {
-        vfprintf(stderr, fmt, ap);
-    }
-}
-
-static void setup_root(struct lo_data *lo, struct lo_inode *root)
-{
-    int fd, res;
-    struct stat stat;
-    uint64_t mnt_id;
-
-    fd = open("/", O_PATH);
-    if (fd == -1) {
-        fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source);
-        exit(1);
-    }
-
-    res = do_statx(lo, fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW,
-                   &mnt_id);
-    if (res == -1) {
-        fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source);
-        exit(1);
-    }
-
-    root->filetype = S_IFDIR;
-    root->fd = fd;
-    root->key.ino = stat.st_ino;
-    root->key.dev = stat.st_dev;
-    root->key.mnt_id = mnt_id;
-    root->nlookup = 2;
-    g_atomic_int_set(&root->refcount, 2);
-    if (lo->posix_lock) {
-        pthread_mutex_init(&root->plock_mutex, NULL);
-        root->posix_locks = g_hash_table_new_full(
-            g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
-    }
-}
-
-static guint lo_key_hash(gconstpointer key)
-{
-    const struct lo_key *lkey = key;
-
-    return (guint)lkey->ino + (guint)lkey->dev + (guint)lkey->mnt_id;
-}
-
-static gboolean lo_key_equal(gconstpointer a, gconstpointer b)
-{
-    const struct lo_key *la = a;
-    const struct lo_key *lb = b;
-
-    return la->ino == lb->ino && la->dev == lb->dev && la->mnt_id == lb->mnt_id;
-}
-
-static void fuse_lo_data_cleanup(struct lo_data *lo)
-{
-    if (lo->inodes) {
-        g_hash_table_destroy(lo->inodes);
-    }
-
-    if (lo->root.posix_locks) {
-        g_hash_table_destroy(lo->root.posix_locks);
-    }
-    lo_map_destroy(&lo->fd_map);
-    lo_map_destroy(&lo->dirp_map);
-    lo_map_destroy(&lo->ino_map);
-
-    if (lo->proc_self_fd >= 0) {
-        close(lo->proc_self_fd);
-    }
-
-    if (lo->proc_self_task >= 0) {
-        close(lo->proc_self_task);
-    }
-
-    if (lo->root.fd >= 0) {
-        close(lo->root.fd);
-    }
-
-    free(lo->xattrmap);
-    free_xattrmap(lo);
-    free(lo->xattr_security_capability);
-    free(lo->source);
-}
-
-static void qemu_version(void)
-{
-    printf("virtiofsd version " QEMU_FULL_VERSION "\n" QEMU_COPYRIGHT "\n");
-}
-
-int main(int argc, char *argv[])
-{
-    struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
-    struct fuse_session *se;
-    struct fuse_cmdline_opts opts;
-    struct lo_data lo = {
-        .sandbox = SANDBOX_NAMESPACE,
-        .debug = 0,
-        .writeback = 0,
-        .posix_lock = 0,
-        .allow_direct_io = 0,
-        .proc_self_fd = -1,
-        .proc_self_task = -1,
-        .user_killpriv_v2 = -1,
-        .user_posix_acl = -1,
-        .user_security_label = -1,
-    };
-    struct lo_map_elem *root_elem;
-    struct lo_map_elem *reserve_elem;
-    int ret = -1;
-
-    /* Initialize time conversion information for localtime_r(). */
-    tzset();
-
-    /* Don't mask creation mode, kernel already did that */
-    umask(0);
-
-    qemu_init_exec_dir(argv[0]);
-
-    drop_supplementary_groups();
-
-    pthread_mutex_init(&lo.mutex, NULL);
-    lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal);
-    lo.root.fd = -1;
-    lo.root.fuse_ino = FUSE_ROOT_ID;
-    lo.cache = CACHE_AUTO;
-
-    /*
-     * Set up the ino map like this:
-     * [0] Reserved (will not be used)
-     * [1] Root inode
-     */
-    lo_map_init(&lo.ino_map);
-    reserve_elem = lo_map_reserve(&lo.ino_map, 0);
-    if (!reserve_elem) {
-        fuse_log(FUSE_LOG_ERR, "failed to alloc reserve_elem.\n");
-        goto err_out1;
-    }
-    reserve_elem->in_use = false;
-    root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino);
-    if (!root_elem) {
-        fuse_log(FUSE_LOG_ERR, "failed to alloc root_elem.\n");
-        goto err_out1;
-    }
-    root_elem->inode = &lo.root;
-
-    lo_map_init(&lo.dirp_map);
-    lo_map_init(&lo.fd_map);
-
-    if (fuse_parse_cmdline(&args, &opts) != 0) {
-        goto err_out1;
-    }
-    fuse_set_log_func(log_func);
-    use_syslog = opts.syslog;
-    if (use_syslog) {
-        openlog("virtiofsd", LOG_PID, LOG_DAEMON);
-    }
-
-    if (opts.show_help) {
-        printf("usage: %s [options]\n\n", argv[0]);
-        fuse_cmdline_help();
-        printf("    -o source=PATH             shared directory tree\n");
-        fuse_lowlevel_help();
-        ret = 0;
-        goto err_out1;
-    } else if (opts.show_version) {
-        qemu_version();
-        fuse_lowlevel_version();
-        ret = 0;
-        goto err_out1;
-    } else if (opts.print_capabilities) {
-        print_capabilities();
-        ret = 0;
-        goto err_out1;
-    }
-
-    if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) {
-        goto err_out1;
-    }
-
-    if (opts.log_level != 0) {
-        current_log_level = opts.log_level;
-    } else {
-        /* default log level is INFO */
-        current_log_level = FUSE_LOG_INFO;
-    }
-    lo.debug = opts.debug;
-    if (lo.debug) {
-        current_log_level = FUSE_LOG_DEBUG;
-    }
-    if (lo.source) {
-        struct stat stat;
-        int res;
-
-        res = lstat(lo.source, &stat);
-        if (res == -1) {
-            fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n",
-                     lo.source);
-            exit(1);
-        }
-        if (!S_ISDIR(stat.st_mode)) {
-            fuse_log(FUSE_LOG_ERR, "source is not a directory\n");
-            exit(1);
-        }
-    } else {
-        lo.source = strdup("/");
-        if (!lo.source) {
-            fuse_log(FUSE_LOG_ERR, "failed to strdup source\n");
-            goto err_out1;
-        }
-    }
-
-    if (lo.xattrmap) {
-        lo.xattr = 1;
-        parse_xattrmap(&lo);
-    }
-
-    if (!lo.timeout_set) {
-        switch (lo.cache) {
-        case CACHE_NONE:
-            lo.timeout = 0.0;
-            break;
-
-        case CACHE_AUTO:
-            lo.timeout = 1.0;
-            break;
-
-        case CACHE_ALWAYS:
-            lo.timeout = 86400.0;
-            break;
-        }
-    } else if (lo.timeout < 0) {
-        fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout);
-        exit(1);
-    }
-
-    if (lo.user_posix_acl == 1 && !lo.xattr) {
-        fuse_log(FUSE_LOG_ERR, "Can't enable posix ACLs. xattrs are disabled."
-                 "\n");
-        exit(1);
-    }
-
-    lo.use_statx = true;
-
-    se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo);
-    if (se == NULL) {
-        goto err_out1;
-    }
-
-    if (fuse_set_signal_handlers(se) != 0) {
-        goto err_out2;
-    }
-
-    if (fuse_session_mount(se) != 0) {
-        goto err_out3;
-    }
-
-    fuse_daemonize(opts.foreground);
-
-    setup_nofile_rlimit(opts.rlimit_nofile);
-
-    /* Must be before sandbox since it wants /proc */
-    setup_capng();
-
-    setup_sandbox(&lo, se, opts.syslog);
-
-    setup_root(&lo, &lo.root);
-    /* Block until ctrl+c or fusermount -u */
-    ret = virtio_loop(se);
-
-    fuse_session_unmount(se);
-    cleanup_capng();
-err_out3:
-    fuse_remove_signal_handlers(se);
-err_out2:
-    fuse_session_destroy(se);
-err_out1:
-    fuse_opt_free_args(&args);
-
-    fuse_lo_data_cleanup(&lo);
-
-    return ret ? 1 : 0;
-}
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
deleted file mode 100644 (file)
index 0033dab..0000000
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Seccomp sandboxing for virtiofsd
- *
- * Copyright (C) 2019 Red Hat, Inc.
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#include "qemu/osdep.h"
-#include "passthrough_seccomp.h"
-#include "fuse_i.h"
-#include "fuse_log.h"
-#include <seccomp.h>
-
-/* Bodge for libseccomp 2.4.2 which broke ppoll */
-#if !defined(__SNR_ppoll) && defined(__SNR_brk)
-#ifdef __NR_ppoll
-#define __SNR_ppoll __NR_ppoll
-#else
-#define __SNR_ppoll __PNR_ppoll
-#endif
-#endif
-
-static const int syscall_allowlist[] = {
-    /* TODO ireg sem*() syscalls */
-    SCMP_SYS(brk),
-    SCMP_SYS(capget), /* For CAP_FSETID */
-    SCMP_SYS(capset),
-    SCMP_SYS(clock_gettime),
-    SCMP_SYS(clone),
-#ifdef __NR_clone3
-    SCMP_SYS(clone3),
-#endif
-    SCMP_SYS(close),
-    SCMP_SYS(copy_file_range),
-    SCMP_SYS(dup),
-    SCMP_SYS(eventfd2),
-    SCMP_SYS(exit),
-    SCMP_SYS(exit_group),
-    SCMP_SYS(fallocate),
-    SCMP_SYS(fchdir),
-    SCMP_SYS(fchmod),
-    SCMP_SYS(fchmodat),
-    SCMP_SYS(fchownat),
-    SCMP_SYS(fcntl),
-    SCMP_SYS(fdatasync),
-    SCMP_SYS(fgetxattr),
-    SCMP_SYS(flistxattr),
-    SCMP_SYS(flock),
-    SCMP_SYS(fremovexattr),
-    SCMP_SYS(fsetxattr),
-    SCMP_SYS(fstat),
-    SCMP_SYS(fstatfs),
-    SCMP_SYS(fstatfs64),
-    SCMP_SYS(fsync),
-    SCMP_SYS(ftruncate),
-    SCMP_SYS(futex),
-    SCMP_SYS(getdents),
-    SCMP_SYS(getdents64),
-    SCMP_SYS(getegid),
-    SCMP_SYS(geteuid),
-    SCMP_SYS(getpid),
-    SCMP_SYS(gettid),
-    SCMP_SYS(gettimeofday),
-    SCMP_SYS(getxattr),
-    SCMP_SYS(linkat),
-    SCMP_SYS(listxattr),
-    SCMP_SYS(lseek),
-    SCMP_SYS(_llseek), /* For POWER */
-    SCMP_SYS(madvise),
-    SCMP_SYS(mkdirat),
-    SCMP_SYS(mknodat),
-    SCMP_SYS(mmap),
-    SCMP_SYS(mprotect),
-    SCMP_SYS(mremap),
-    SCMP_SYS(munmap),
-    SCMP_SYS(newfstatat),
-    SCMP_SYS(statx),
-    SCMP_SYS(open),
-    SCMP_SYS(openat),
-    SCMP_SYS(ppoll),
-    SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
-    SCMP_SYS(preadv),
-    SCMP_SYS(pread64),
-    SCMP_SYS(pwritev),
-    SCMP_SYS(pwrite64),
-    SCMP_SYS(read),
-    SCMP_SYS(readlinkat),
-    SCMP_SYS(recvmsg),
-    SCMP_SYS(renameat),
-    SCMP_SYS(renameat2),
-    SCMP_SYS(removexattr),
-    SCMP_SYS(restart_syscall),
-#ifdef __NR_rseq
-    SCMP_SYS(rseq), /* required since glibc 2.35 */
-#endif
-    SCMP_SYS(rt_sigaction),
-    SCMP_SYS(rt_sigprocmask),
-    SCMP_SYS(rt_sigreturn),
-    SCMP_SYS(sched_getattr),
-    SCMP_SYS(sched_setattr),
-    SCMP_SYS(sendmsg),
-    SCMP_SYS(setresgid),
-    SCMP_SYS(setresuid),
-#ifdef __NR_setresgid32
-    SCMP_SYS(setresgid32),
-#endif
-#ifdef __NR_setresuid32
-    SCMP_SYS(setresuid32),
-#endif
-    SCMP_SYS(set_robust_list),
-    SCMP_SYS(setxattr),
-    SCMP_SYS(sigreturn),
-    SCMP_SYS(symlinkat),
-    SCMP_SYS(syncfs),
-    SCMP_SYS(time), /* Rarely needed, except on static builds */
-    SCMP_SYS(tgkill),
-    SCMP_SYS(unlinkat),
-    SCMP_SYS(unshare),
-    SCMP_SYS(utimensat),
-    SCMP_SYS(write),
-    SCMP_SYS(writev),
-    SCMP_SYS(umask),
-};
-
-/* Syscalls used when --syslog is enabled */
-static const int syscall_allowlist_syslog[] = {
-    SCMP_SYS(send),
-    SCMP_SYS(sendto),
-};
-
-static void add_allowlist(scmp_filter_ctx ctx, const int syscalls[], size_t len)
-{
-    size_t i;
-
-    for (i = 0; i < len; i++) {
-        if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) {
-            fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n",
-                     syscalls[i]);
-            exit(1);
-        }
-    }
-}
-
-void setup_seccomp(bool enable_syslog)
-{
-    scmp_filter_ctx ctx;
-
-#ifdef SCMP_ACT_KILL_PROCESS
-    ctx = seccomp_init(SCMP_ACT_KILL_PROCESS);
-    /* Handle a newer libseccomp but an older kernel */
-    if (!ctx && errno == EOPNOTSUPP) {
-        ctx = seccomp_init(SCMP_ACT_TRAP);
-    }
-#else
-    ctx = seccomp_init(SCMP_ACT_TRAP);
-#endif
-    if (!ctx) {
-        fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n");
-        exit(1);
-    }
-
-    add_allowlist(ctx, syscall_allowlist, G_N_ELEMENTS(syscall_allowlist));
-    if (enable_syslog) {
-        add_allowlist(ctx, syscall_allowlist_syslog,
-                      G_N_ELEMENTS(syscall_allowlist_syslog));
-    }
-
-    /* libvhost-user calls this for post-copy migration, we don't need it */
-    if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS),
-                         SCMP_SYS(userfaultfd), 0) != 0) {
-        fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n");
-        exit(1);
-    }
-
-    if (seccomp_load(ctx) < 0) {
-        fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n");
-        exit(1);
-    }
-
-    seccomp_release(ctx);
-}
diff --git a/tools/virtiofsd/passthrough_seccomp.h b/tools/virtiofsd/passthrough_seccomp.h
deleted file mode 100644 (file)
index 12674fc..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Seccomp sandboxing for virtiofsd
- *
- * Copyright (C) 2019 Red Hat, Inc.
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#ifndef VIRTIOFSD_PASSTHROUGH_SECCOMP_H
-#define VIRTIOFSD_PASSTHROUGH_SECCOMP_H
-
-void setup_seccomp(bool enable_syslog);
-
-#endif /* VIRTIOFSD_PASSTHROUGH_SECCOMP_H */
index ab43561fe152cee7a5a4839e1cbdeeb8e6b9fae7..98b701f5a312224c47d51631d0d6e507ce2af452 100644 (file)
@@ -28,6 +28,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-commands-ui.h"
 #include "qemu/coroutine.h"
+#include "qemu/error-report.h"
 #include "qemu/fifo8.h"
 #include "qemu/main-loop.h"
 #include "qemu/module.h"
index 5843d26cd2cb6b681c1f85cda4fe7cdf707904f2..df9a754a8dc5555dd953b4a45ef406d019720682 100644 (file)
@@ -23,6 +23,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/dbus.h"
+#include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qom/object_interfaces.h"
 #include "sysemu/sysemu.h"
index 898a4ac8a5bae944c6b221d2229cf0bdf0443483..0bfaa2298d2e55c6f19dce2247b6f45d7e365496 100644 (file)
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "ui/input.h"
 #include "ui/kbd-state.h"
index f9fc8eda519ac90cc739ab2cc03491be3311a738..57d4e401db5b837403592ca6c994b67ea540d3a2 100644 (file)
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "sysemu/sysemu.h"
 #include "dbus.h"
 #include <gio/gunixfdlist.h>
index 32d88dc94a7e253ffbb5f7555bc807d1c70c073f..f2dcba03d08884b0f2d88c6f3f2cee1fa409992a 100644 (file)
--- a/ui/dbus.c
+++ b/ui/dbus.c
@@ -23,6 +23,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
+#include "qemu/error-report.h"
 #include "qemu/dbus.h"
 #include "qemu/main-loop.h"
 #include "qemu/option.h"
index 7a30fd9777654931c55e0b66bbdfa7c98480831d..ae07e9130247f395f36c56dea2b608ddbef0da39 100644 (file)
@@ -1,4 +1,5 @@
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "sysemu/sysemu.h"
 #include "ui/console.h"
index 7f752d8b7d6ad95b5475c60a8f226e5ad22cdf80..fd82e9b1cae11e135306ac881f8da762defd35d1 100644 (file)
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -36,6 +36,7 @@
 #include "qapi/qapi-commands-machine.h"
 #include "qapi/qapi-commands-misc.h"
 #include "qemu/cutils.h"
+#include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 
 #include "ui/console.h"
index 7e71e18da9ada83170b426b64f4ad0e4ded1a582..ad7f0551ad613004d72cd846154b91be5f262a7d 100644 (file)
@@ -29,6 +29,7 @@
 #include "ui/console.h"
 #include "ui/spice-display.h"
 #include "qemu/config-file.h"
+#include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "qemu/cutils.h"
 #include "qemu/module.h"
index 72f8f1681c6f4c289963da0547a07e3bc81ed07e..76f7c2bc3d12334a9f2ad9fe96fef4df8be8b228 100644 (file)
@@ -412,9 +412,6 @@ static QemuOptsList qemu_spice_opts = {
             .name = "unix",
             .type = QEMU_OPT_BOOL,
 #endif
-        },{
-            .name = "password",
-            .type = QEMU_OPT_STRING,
         },{
             .name = "password-secret",
             .type = QEMU_OPT_STRING,
@@ -666,20 +663,8 @@ static void qemu_spice_init(void)
     }
     passwordSecret = qemu_opt_get(opts, "password-secret");
     if (passwordSecret) {
-        if (qemu_opt_get(opts, "password")) {
-            error_report("'password' option is mutually exclusive with "
-                         "'password-secret'");
-            exit(1);
-        }
         password = qcrypto_secret_lookup_as_utf8(passwordSecret,
                                                  &error_fatal);
-    } else {
-        str = qemu_opt_get(opts, "password");
-        if (str) {
-            warn_report("'password' option is deprecated and insecure, "
-                        "use 'password-secret' instead");
-            password = g_strdup(str);
-        }
     }
 
     if (tls_port) {
index 0616a6982f1f5955358c1239bf707eac904c10f4..16802f99cbcb13dfda0f392f8a28b4db0675e85f 100644 (file)
@@ -17,6 +17,7 @@
 
 #include "qemu/osdep.h"
 #include "ui/qemu-spice.h"
+#include "qemu/error-report.h"
 #include "qemu/timer.h"
 #include "qemu/lockable.h"
 #include "qemu/main-loop.h"
index cbf4357bb1c2eb5cc82974c6483fbd15a7862f00..6a0a11a85defb6c6e65c0f11e5f8f44269c30ff3 100644 (file)
@@ -7,6 +7,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "ui/console.h"
+#include "qemu/error-report.h"
 
 #include <sys/ioctl.h>
 
index 1f51a78da186fe97373f7ffca3ce71c667d31a52..8a651492f0955290886d57ed1a313cdf5730187c 100644 (file)
@@ -2,6 +2,7 @@
 #include "qapi/error.h"
 #include "chardev/char.h"
 #include "qemu/buffer.h"
+#include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "qemu/units.h"
 #include "hw/qdev-core.h"
index 2c2c73e085c2a9f466f791d653ef8a6b16b8f905..06c2333a60005b3d8851763e1d50a9ff2a48520e 100644 (file)
@@ -121,8 +121,12 @@ static void sys_cache_info(int *isize, int *dsize)
 static bool have_coherent_icache;
 #endif
 
-#if defined(__aarch64__) && !defined(CONFIG_DARWIN)
-/* Apple does not expose CTR_EL0, so we must use system interfaces. */
+#if defined(__aarch64__) && !defined(CONFIG_DARWIN) && !defined(CONFIG_WIN32)
+/*
+ * Apple does not expose CTR_EL0, so we must use system interfaces.
+ * Windows neither, but we use a generic implementation of flush_idcache_range
+ * in this case.
+ */
 static uint64_t save_ctr_el0;
 static void arch_cache_info(int *isize, int *dsize)
 {
@@ -225,7 +229,11 @@ static void __attribute__((constructor)) init_cache_info(void)
 
 /* Caches are coherent and do not require flushing; symbol inline. */
 
-#elif defined(__aarch64__)
+#elif defined(__aarch64__) && !defined(CONFIG_WIN32)
+/*
+ * For Windows, we use generic implementation of flush_idcache_range, that
+ * performs a call to FlushInstructionCache, through __builtin___clear_cache.
+ */
 
 #ifdef CONFIG_DARWIN
 /* Apple does not expose CTR_EL0, so we must use system interfaces. */
index 297db35fb11e1f02eee5e3a3ced40d44876ff20c..6d6e1b595d9ade3edc3c5c73b9e49da05027ac57 100644 (file)
@@ -331,7 +331,7 @@ bool hbitmap_status(const HBitmap *hb, int64_t start, int64_t count,
 
     assert(next_zero > start);
     *pnum = next_zero - start;
-    return false;
+    return true;
 }
 
 bool hbitmap_empty(const HBitmap *hb)
index d63f27438dc4c5b351945295cd3cba0710ce357e..42076efe1ef2bd512c33c67d1edeafa43ac027bf 100644 (file)
@@ -2,7 +2,6 @@
 #include "block/qdict.h" /* for qdict_extract_subqdict() */
 #include "qapi/error.h"
 #include "qapi/qapi-commands-misc.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qlist.h"
 #include "qemu/error-report.h"
index bae938c670845f5fa5c43edbe46f824bc335a19f..93d25057974105d9c5e6bfc7cf6cd7360fb3a8ea 100644 (file)
 #include "qemu/tsan.h"
 #include "qemu/bitmap.h"
 
+#ifdef CONFIG_PTHREAD_SET_NAME_NP
+#include <pthread_np.h>
+#endif
+
 static bool name_threads;
 
 void qemu_thread_naming(bool enable)
@@ -25,7 +29,8 @@ void qemu_thread_naming(bool enable)
     name_threads = enable;
 
 #if !defined CONFIG_PTHREAD_SETNAME_NP_W_TID && \
-    !defined CONFIG_PTHREAD_SETNAME_NP_WO_TID
+    !defined CONFIG_PTHREAD_SETNAME_NP_WO_TID && \
+    !defined CONFIG_PTHREAD_SET_NAME_NP
     /* This is a debugging option, not fatal */
     if (enable) {
         fprintf(stderr, "qemu: thread naming not supported on this host\n");
@@ -223,7 +228,7 @@ void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, con
         error_exit(err, __func__);
 }
 
-static bool
+static bool TSA_NO_TSA
 qemu_cond_timedwait_ts(QemuCond *cond, QemuMutex *mutex, struct timespec *ts,
                        const char *file, const int line)
 {
@@ -480,6 +485,8 @@ static void *qemu_thread_start(void *args)
         pthread_setname_np(pthread_self(), qemu_thread_args->name);
 # elif defined(CONFIG_PTHREAD_SETNAME_NP_WO_TID)
         pthread_setname_np(qemu_thread_args->name);
+# elif defined(CONFIG_PTHREAD_SET_NAME_NP)
+        pthread_set_name_np(pthread_self(), qemu_thread_args->name);
 # endif
     }
     QEMU_TSAN_ANNOTATE_THREAD_NAME(qemu_thread_args->name);
index c8f53d7d9fc3ddd395099467e52e6a8dbb8575a3..16f78d8fe50ac9d1159c3b30ab2d77f35bde1045 100644 (file)
@@ -93,6 +93,7 @@ qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_siz
 qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p"
 
 #userfaultfd.c
+uffd_detect_open_mode(int mode) "%d"
 uffd_query_features_nosys(int err) "errno: %i"
 uffd_query_features_api_failed(int err) "errno: %i"
 uffd_create_fd_nosys(int err) "errno: %i"
index 4953b3137d5e75091d16293e430e25ed8abe3233..fdff4867e8bd8c0adaac44a4aec468c198214897 100644 (file)
 #include <poll.h>
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
+#include <fcntl.h>
+
+typedef enum {
+    UFFD_UNINITIALIZED = 0,
+    UFFD_USE_DEV_PATH,
+    UFFD_USE_SYSCALL,
+} uffd_open_mode;
 
 int uffd_open(int flags)
 {
 #if defined(__NR_userfaultfd)
+    static uffd_open_mode open_mode;
+    static int uffd_dev;
+
+    /* Detect how to generate uffd desc when run the 1st time */
+    if (open_mode == UFFD_UNINITIALIZED) {
+        /*
+         * Make /dev/userfaultfd the default approach because it has better
+         * permission controls, meanwhile allows kernel faults without any
+         * privilege requirement (e.g. SYS_CAP_PTRACE).
+         */
+        uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
+        if (uffd_dev >= 0) {
+            open_mode = UFFD_USE_DEV_PATH;
+        } else {
+            /* Fallback to the system call */
+            open_mode = UFFD_USE_SYSCALL;
+        }
+        trace_uffd_detect_open_mode(open_mode);
+    }
+
+    if (open_mode == UFFD_USE_DEV_PATH) {
+        assert(uffd_dev >= 0);
+        return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags);
+    }
+
     return syscall(__NR_userfaultfd, flags);
 #else
     return -EINVAL;
index 145eb17c086a585cd2219191c55298c97bf0deb7..40f36ea214b26c2a057e6fc22af6890f141db587 100644 (file)
@@ -8,6 +8,7 @@
  * later.  See the COPYING file in the top-level directory.
  */
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/vhost-user-server.h"
 #include "block/aio-wait.h"