]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Jul 2017 01:38:31 +0000 (18:38 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Jul 2017 01:38:31 +0000 (18:38 -0700)
Pull KVM updates from Paolo Bonzini:
 "PPC:
   - Better machine check handling for HV KVM
   - Ability to support guests with threads=2, 4 or 8 on POWER9
   - Fix for a race that could cause delayed recognition of signals
   - Fix for a bug where POWER9 guests could sleep with interrupts pending.

  ARM:
   - VCPU request overhaul
   - allow timer and PMU to have their interrupt number selected from userspace
   - workaround for Cavium erratum 30115
   - handling of memory poisonning
   - the usual crop of fixes and cleanups

  s390:
   - initial machine check forwarding
   - migration support for the CMMA page hinting information
   - cleanups and fixes

  x86:
   - nested VMX bugfixes and improvements
   - more reliable NMI window detection on AMD
   - APIC timer optimizations

  Generic:
   - VCPU request overhaul + documentation of common code patterns
   - kvm_stat improvements"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits)
  Update my email address
  kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS
  x86: kvm: mmu: use ept a/d in vmcs02 iff used in vmcs12
  kvm: x86: mmu: allow A/D bits to be disabled in an mmu
  x86: kvm: mmu: make spte mmio mask more explicit
  x86: kvm: mmu: dead code thanks to access tracking
  KVM: PPC: Book3S: Fix typo in XICS-on-XIVE state saving code
  KVM: PPC: Book3S HV: Close race with testing for signals on guest entry
  KVM: PPC: Book3S HV: Simplify dynamic micro-threading code
  KVM: x86: remove ignored type attribute
  KVM: LAPIC: Fix lapic timer injection delay
  KVM: lapic: reorganize restart_apic_timer
  KVM: lapic: reorganize start_hv_timer
  kvm: nVMX: Check memory operand to INVVPID
  KVM: s390: Inject machine check into the nested guest
  KVM: s390: Inject machine check into the guest
  tools/kvm_stat: add new interactive command 'b'
  tools/kvm_stat: add new command line switch '-i'
  tools/kvm_stat: fix error on interactive command 'g'
  KVM: SVM: suppress unnecessary NMI singlestep on GIF=0 and nested exit
  ...

18 files changed:
1  2 
Documentation/admin-guide/kernel-parameters.txt
MAINTAINERS
arch/arm64/Kconfig
arch/arm64/include/asm/esr.h
arch/powerpc/kvm/book3s_hv.c
arch/s390/include/asm/kvm_host.h
arch/s390/kvm/gaccess.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/priv.c
arch/x86/include/asm/msr-index.h
arch/x86/kvm/emulate.c
arch/x86/kvm/mmu.c
arch/x86/kvm/mmu.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
virt/kvm/arm/mmu.c

index f24ee1c99412578650eec7bc83e42bcbb3601ed9,aa8341e73b352bb3d091e5f55a7f6612e976fd70..aa1d4409fe0aee0f7af9b7084e8f6e0fd7f697d0
                        /proc/<pid>/coredump_filter.
                        See also Documentation/filesystems/proc.txt.
  
 +      coresight_cpu_debug.enable
 +                      [ARM,ARM64]
 +                      Format: <bool>
 +                      Enable/disable the CPU sampling based debugging.
 +                      0: default value, disable debugging
 +                      1: enable debugging at boot time
 +
        cpuidle.off=1   [CPU_IDLE]
                        disable the cpuidle sub-system
  
                        See also Documentation/input/joystick-parport.txt
  
        ddebug_query=   [KNL,DYNAMIC_DEBUG] Enable debug messages at early boot
 -                      time. See Documentation/dynamic-debug-howto.txt for
 +                      time. See
 +                      Documentation/admin-guide/dynamic-debug-howto.rst for
                        details.  Deprecated, see dyndbg.
  
        debug           [KNL] Enable kernel debugging (events log level).
  
        dscc4.setup=    [NET]
  
 +      dt_cpu_ftrs=    [PPC]
 +                      Format: {"off" | "known"}
 +                      Control how the dt_cpu_ftrs device-tree binding is
 +                      used for CPU feature discovery and setup (if it
 +                      exists).
 +                      off: Do not use it, fall back to legacy cpu table.
 +                      known: Do not pass through unknown features to guests
 +                      or userspace, only those that the kernel is aware of.
 +
        dump_apple_properties   [X86]
                        Dump name and content of EFI device properties on
                        x86 Macs.  Useful for driver authors to determine
        dyndbg[="val"]          [KNL,DYNAMIC_DEBUG]
        module.dyndbg[="val"]
                        Enable debug messages at boot time.  See
 -                      Documentation/dynamic-debug-howto.txt for details.
 +                      Documentation/admin-guide/dynamic-debug-howto.rst
 +                      for details.
  
        nompx           [X86] Disables Intel Memory Protection Extensions.
                        See Documentation/x86/intel_mpx.txt for more
                        must already be setup and configured. Options are not
                        yet supported.
  
 +              owl,<addr>
 +                      Start an early, polled-mode console on a serial port
 +                      of an Actions Semi SoC, such as S500 or S900, at the
 +                      specified address. The serial port must already be
 +                      setup and configured. Options are not yet supported.
 +
                smh     Use ARM semihosting calls for early console.
  
                s3c2410,<addr>
                        in crypto/hash_info.h.
  
        ima_policy=     [IMA]
 -                      The builtin measurement policy to load during IMA
 -                      setup.  Specyfing "tcb" as the value, measures all
 -                      programs exec'd, files mmap'd for exec, and all files
 -                      opened with the read mode bit set by either the
 -                      effective uid (euid=0) or uid=0.
 -                      Format: "tcb"
 +                      The builtin policies to load during IMA setup.
 +                      Format: "tcb | appraise_tcb | secure_boot"
 +
 +                      The "tcb" policy measures all programs exec'd, files
 +                      mmap'd for exec, and all files opened with the read
 +                      mode bit set by either the effective uid (euid=0) or
 +                      uid=0.
 +
 +                      The "appraise_tcb" policy appraises the integrity of
 +                      all files owned by root. (This is the equivalent
 +                      of ima_appraise_tcb.)
 +
 +                      The "secure_boot" policy appraises the integrity
 +                      of files (eg. kexec kernel image, kernel modules,
 +                      firmware, policy, etc) based on file signatures.
  
        ima_tcb         [IMA] Deprecated.  Use ima_policy= instead.
                        Load a policy which meets the needs of the Trusted
                        for all guests.
                        Default is 1 (enabled) if in 64-bit or 32-bit PAE mode.
  
+       kvm-arm.vgic_v3_group0_trap=
+                       [KVM,ARM] Trap guest accesses to GICv3 group-0
+                       system registers
+       kvm-arm.vgic_v3_group1_trap=
+                       [KVM,ARM] Trap guest accesses to GICv3 group-1
+                       system registers
+       kvm-arm.vgic_v3_common_trap=
+                       [KVM,ARM] Trap guest accesses to GICv3 common
+                       system registers
        kvm-intel.ept=  [KVM,Intel] Disable extended page tables
                        (virtualized MMU) support on capable Intel chips.
                        Default is 1 (enabled)
        memmap=nn[KMG]@ss[KMG]
                        [KNL] Force usage of a specific region of memory.
                        Region of memory to be used is from ss to ss+nn.
 +                      If @ss[KMG] is omitted, it is equivalent to mem=nn[KMG],
 +                      which limits max address to nn[KMG].
 +                      Multiple different regions can be specified,
 +                      comma delimited.
 +                      Example:
 +                              memmap=100M@2G,100M#3G,1G!1024G
  
        memmap=nn[KMG]#ss[KMG]
                        [KNL,ACPI] Mark specific memory as ACPI data.
                                 memmap=64K$0x18690000
                                 or
                                 memmap=0x10000$0x18690000
 +                      Some bootloaders may need an escape character before '$',
 +                      like Grub2, otherwise '$' and the following number
 +                      will be eaten.
  
        memmap=nn[KMG]!ss[KMG]
                        [KNL,X86] Mark specific memory as protected.
  
        rcutree.gp_cleanup_delay=       [KNL]
                        Set the number of jiffies to delay each step of
 -                      RCU grace-period cleanup.  This only has effect
 -                      when CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP is set.
 +                      RCU grace-period cleanup.
  
        rcutree.gp_init_delay=  [KNL]
                        Set the number of jiffies to delay each step of
 -                      RCU grace-period initialization.  This only has
 -                      effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT
 -                      is set.
 +                      RCU grace-period initialization.
  
        rcutree.gp_preinit_delay=       [KNL]
                        Set the number of jiffies to delay each step of
                        RCU grace-period pre-initialization, that is,
                        the propagation of recent CPU-hotplug changes up
 -                      the rcu_node combining tree.  This only has effect
 -                      when CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT is set.
 +                      the rcu_node combining tree.
  
        rcutree.rcu_fanout_exact= [KNL]
                        Disable autobalancing of the rcu_node combining
                        This wake_up() will be accompanied by a
                        WARN_ONCE() splat and an ftrace_dump().
  
 +      rcuperf.gp_async= [KNL]
 +                      Measure performance of asynchronous
 +                      grace-period primitives such as call_rcu().
 +
 +      rcuperf.gp_async_max= [KNL]
 +                      Specify the maximum number of outstanding
 +                      callbacks per writer thread.  When a writer
 +                      thread exceeds this limit, it invokes the
 +                      corresponding flavor of rcu_barrier() to allow
 +                      previously posted callbacks to drain.
 +
        rcuperf.gp_exp= [KNL]
                        Measure performance of expedited synchronous
                        grace-period primitives.
        rcuperf.perf_runnable= [BOOT]
                        Start rcuperf running at boot time.
  
 +      rcuperf.perf_type= [KNL]
 +                      Specify the RCU implementation to test.
 +
        rcuperf.shutdown= [KNL]
                        Shut the system down after performance tests
                        complete.  This is useful for hands-off automated
                        testing.
  
 -      rcuperf.perf_type= [KNL]
 -                      Specify the RCU implementation to test.
 -
        rcuperf.verbose= [KNL]
                        Enable additional printk() statements.
  
 +      rcuperf.writer_holdoff= [KNL]
 +                      Write-side holdoff between grace periods,
 +                      in microseconds.  The default of zero says
 +                      no holdoff.
 +
        rcutorture.cbflood_inter_holdoff= [KNL]
                        Set holdoff time (jiffies) between successive
                        callback-flood tests.
        spia_pedr=
        spia_peddr=
  
 +      srcutree.counter_wrap_check [KNL]
 +                      Specifies how frequently to check for
 +                      grace-period sequence counter wrap for the
 +                      srcu_data structure's ->srcu_gp_seq_needed field.
 +                      The greater the number of bits set in this kernel
 +                      parameter, the less frequently counter wrap will
 +                      be checked for.  Note that the bottom two bits
 +                      are ignored.
 +
        srcutree.exp_holdoff [KNL]
                        Specifies how many nanoseconds must elapse
                        since the end of the last SRCU grace period for
                        expediting.  Set to zero to disable automatic
                        expediting.
  
 +      stack_guard_gap=        [MM]
 +                      override the default stack gap protection. The value
 +                      is in page units and it defines how many pages prior
 +                      to (for stacks growing down) resp. after (for stacks
 +                      growing up) the main stack are reserved for no other
 +                      mapping. Default value is 256 pages.
 +
        stacktrace      [FTRACE]
                        Enabled the stack tracer on boot up.
  
diff --combined MAINTAINERS
index 75ac9dc85804993fa345bd447856ff5b5369cbd4,cc65b44b1226c5ff92a793ca6b9e00f8d0220be2..1c1d106a3347fbf2847db7f84536e6a1879d8114
@@@ -155,7 -155,7 +155,7 @@@ S: Maintaine
  F:    drivers/scsi/53c700*
  
  6LOWPAN GENERIC (BTLE/IEEE 802.15.4)
 -M:    Alexander Aring <aar@pengutronix.de>
 +M:    Alexander Aring <alex.aring@gmail.com>
  M:    Jukka Rissanen <jukka.rissanen@linux.intel.com>
  L:    linux-bluetooth@vger.kernel.org
  L:    linux-wpan@vger.kernel.org
@@@ -478,7 -478,7 +478,7 @@@ L: linux-hwmon@vger.kernel.or
  S:    Maintained
  F:    Documentation/hwmon/ads1015
  F:    drivers/hwmon/ads1015.c
 -F:    include/linux/i2c/ads1015.h
 +F:    include/linux/platform_data/ads1015.h
  
  ADT746X FAN DRIVER
  M:    Colin Leroy <colin@colino.net>
@@@ -1036,22 -1036,6 +1036,22 @@@ S:    Maintaine
  F:    drivers/amba/
  F:    include/linux/amba/bus.h
  
 +ARM/ACTIONS SEMI ARCHITECTURE
 +M:    Andreas Färber <afaerber@suse.de>
 +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 +S:    Maintained
 +N:    owl
 +F:    arch/arm/mach-actions/
 +F:    arch/arm/boot/dts/owl-*
 +F:    arch/arm64/boot/dts/actions/
 +F:    drivers/clocksource/owl-*
 +F:    drivers/soc/actions/
 +F:    include/dt-bindings/power/owl-*
 +F:    include/linux/soc/actions/
 +F:    Documentation/devicetree/bindings/arm/actions.txt
 +F:    Documentation/devicetree/bindings/power/actions,owl-sps.txt
 +F:    Documentation/devicetree/bindings/timer/actions,owl-timer.txt
 +
  ARM/ADS SPHERE MACHINE SUPPORT
  M:    Lennert Buytenhek <kernel@wantstofly.org>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -1188,7 -1172,7 +1188,7 @@@ N:      clps711
  
  ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE
  M:    Hartley Sweeten <hsweeten@visionengravers.com>
 -M:    Ryan Mallon <rmallon@gmail.com>
 +M:    Alexander Sverdlin <alexander.sverdlin@gmail.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
  F:    arch/arm/mach-ep93xx/
@@@ -1223,9 -1207,7 +1223,9 @@@ L:      linux-arm-kernel@lists.infradead.or
  S:    Maintained
  F:    drivers/hwtracing/coresight/*
  F:    Documentation/trace/coresight.txt
 +F:    Documentation/trace/coresight-cpu-debug.txt
  F:    Documentation/devicetree/bindings/arm/coresight.txt
 +F:    Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt
  F:    Documentation/ABI/testing/sysfs-bus-coresight-devices-*
  F:    tools/perf/arch/arm/util/pmu.c
  F:    tools/perf/arch/arm/util/auxtrace.c
@@@ -1507,16 -1489,13 +1507,16 @@@ M:   Gregory Clement <gregory.clement@fre
  M:    Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
 -F:    arch/arm/mach-mvebu/
 -F:    drivers/rtc/rtc-armada38x.c
  F:    arch/arm/boot/dts/armada*
  F:    arch/arm/boot/dts/kirkwood*
 +F:    arch/arm/configs/mvebu_*_defconfig
 +F:    arch/arm/mach-mvebu/
  F:    arch/arm64/boot/dts/marvell/armada*
  F:    drivers/cpufreq/mvebu-cpufreq.c
 -F:    arch/arm/configs/mvebu_*_defconfig
 +F:    drivers/irqchip/irq-armada-370-xp.c
 +F:    drivers/irqchip/irq-mvebu-*
 +F:    drivers/pinctrl/mvebu/
 +F:    drivers/rtc/rtc-armada38x.c
  
  ARM/Marvell Berlin SoC support
  M:    Jisheng Zhang <jszhang@marvell.com>
@@@ -1682,6 -1661,7 +1682,6 @@@ F:      arch/arm/mach-qcom
  F:    arch/arm64/boot/dts/qcom/*
  F:    drivers/i2c/busses/i2c-qup.c
  F:    drivers/clk/qcom/
 -F:    drivers/pinctrl/qcom/
  F:    drivers/dma/qcom/
  F:    drivers/soc/qcom/
  F:    drivers/spi/spi-qup.c
@@@ -1697,13 -1677,6 +1697,13 @@@ M:    Lennert Buytenhek <kernel@wantstofly
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
  
 +ARM/REALTEK ARCHITECTURE
 +M:    Andreas Färber <afaerber@suse.de>
 +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 +S:    Maintained
 +F:    arch/arm64/boot/dts/realtek/
 +F:    Documentation/devicetree/bindings/arm/realtek.txt
 +
  ARM/RENESAS ARM64 ARCHITECTURE
  M:    Simon Horman <horms@verge.net.au>
  M:    Magnus Damm <magnus.damm@gmail.com>
@@@ -1737,7 -1710,6 +1737,7 @@@ L:      linux-rockchip@lists.infradead.or
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip.git
  S:    Maintained
  F:    arch/arm/boot/dts/rk3*
 +F:    arch/arm/boot/dts/rv1108*
  F:    arch/arm/mach-rockchip/
  F:    drivers/clk/rockchip/
  F:    drivers/i2c/busses/i2c-rk3x.c
@@@ -1749,6 -1721,7 +1749,6 @@@ N:      rockchi
  ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
  M:    Kukjin Kim <kgene@kernel.org>
  M:    Krzysztof Kozlowski <krzk@kernel.org>
 -R:    Javier Martinez Canillas <javier@osg.samsung.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
  Q:    https://patchwork.kernel.org/project/linux-samsung-soc/list/
@@@ -1802,12 -1775,11 +1802,12 @@@ F:   arch/arm/plat-samsung/s5p-dev-mfc.
  F:    drivers/media/platform/s5p-mfc/
  
  ARM/SAMSUNG S5P SERIES HDMI CEC SUBSYSTEM SUPPORT
 -M:    Kyungmin Park <kyungmin.park@samsung.com>
 -L:    linux-arm-kernel@lists.infradead.org
 +M:    Marek Szyprowski <m.szyprowski@samsung.com>
 +L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
  L:    linux-media@vger.kernel.org
  S:    Maintained
 -F:    drivers/staging/media/platform/s5p-cec/
 +F:    drivers/media/platform/s5p-cec/
 +F:    Documentation/devicetree/bindings/media/s5p-cec.txt
  
  ARM/SAMSUNG S5P SERIES JPEG CODEC SUPPORT
  M:    Andrzej Pietrasiewicz <andrzej.p@samsung.com>
@@@ -1857,6 -1829,7 +1857,6 @@@ F:      drivers/edac/altera_edac
  ARM/STI ARCHITECTURE
  M:    Patrice Chotard <patrice.chotard@st.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 -L:    kernel@stlinux.com
  W:    http://www.stlinux.com
  S:    Maintained
  F:    arch/arm/mach-sti/
@@@ -1870,8 -1843,8 +1870,8 @@@ F:      drivers/i2c/busses/i2c-st.
  F:    drivers/media/rc/st_rc.c
  F:    drivers/media/platform/sti/c8sectpfe/
  F:    drivers/mmc/host/sdhci-st.c
 -F:    drivers/phy/phy-miphy28lp.c
 -F:    drivers/phy/phy-stih407-usb.c
 +F:    drivers/phy/st/phy-miphy28lp.c
 +F:    drivers/phy/st/phy-stih407-usb.c
  F:    drivers/pinctrl/pinctrl-st.c
  F:    drivers/remoteproc/st_remoteproc.c
  F:    drivers/remoteproc/st_slim_rproc.c
@@@ -2349,15 -2322,6 +2349,15 @@@ F:    Documentation/devicetree/bindings/in
  F:    drivers/input/touchscreen/atmel_mxt_ts.c
  F:    include/linux/platform_data/atmel_mxt_ts.h
  
 +ATOMIC INFRASTRUCTURE
 +M:    Will Deacon <will.deacon@arm.com>
 +M:    Peter Zijlstra <peterz@infradead.org>
 +R:    Boqun Feng <boqun.feng@gmail.com>
 +L:    linux-kernel@vger.kernel.org
 +S:    Maintained
 +F:    arch/*/include/asm/atomic*.h
 +F:    include/*/atomic*.h
 +
  ATTO EXPRESSSAS SAS/SATA RAID SCSI DRIVER
  M:    Bradley Grove <linuxdrivers@attotech.com>
  L:    linux-scsi@vger.kernel.org
@@@ -2721,6 -2685,7 +2721,6 @@@ N:      kon
  F:    arch/arm/mach-bcm/
  
  BROADCOM BCM2835 ARM ARCHITECTURE
 -M:    Lee Jones <lee@kernel.org>
  M:    Eric Anholt <eric@anholt.net>
  M:    Stefan Wahren <stefan.wahren@i2se.com>
  L:    linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -2820,11 -2785,8 +2820,11 @@@ BROADCOM BRCM80211 IEEE802.11n WIRELES
  M:    Arend van Spriel <arend.vanspriel@broadcom.com>
  M:    Franky Lin <franky.lin@broadcom.com>
  M:    Hante Meuleman <hante.meuleman@broadcom.com>
 +M:    Chi-Hsien Lin <chi-hsien.lin@cypress.com>
 +M:    Wright Feng <wright.feng@cypress.com>
  L:    linux-wireless@vger.kernel.org
  L:    brcm80211-dev-list.pdl@broadcom.com
 +L:    brcm80211-dev-list@cypress.com
  S:    Supported
  F:    drivers/net/wireless/broadcom/brcm80211/
  
@@@ -3002,7 -2964,7 +3002,7 @@@ F:      sound/pci/oxygen
  
  C6X ARCHITECTURE
  M:    Mark Salter <msalter@redhat.com>
 -M:    Aurelien Jacquiot <a-jacquiot@ti.com>
 +M:    Aurelien Jacquiot <jacquiot.aurelien@gmail.com>
  L:    linux-c6x-dev@linux-c6x.org
  W:    http://www.linux-c6x.org/wiki/index.php/Main_Page
  S:    Maintained
@@@ -3175,7 -3137,6 +3175,7 @@@ F:      include/media/cec.
  F:    include/media/cec-notifier.h
  F:    include/uapi/linux/cec.h
  F:    include/uapi/linux/cec-funcs.h
 +F:    Documentation/devicetree/bindings/media/cec.txt
  
  CELL BROADBAND ENGINE ARCHITECTURE
  M:    Arnd Bergmann <arnd@arndb.de>
@@@ -3625,6 -3586,7 +3625,6 @@@ T:      git git://git.kernel.org/pub/scm/lin
  S:    Maintained
  F:    Documentation/crypto/
  F:    Documentation/devicetree/bindings/crypto/
 -F:    Documentation/DocBook/crypto-API.tmpl
  F:    arch/*/crypto/
  F:    crypto/
  F:    drivers/crypto/
@@@ -3751,13 -3713,6 +3751,13 @@@ S:    Supporte
  F:    drivers/infiniband/hw/cxgb4/
  F:    include/uapi/rdma/cxgb4-abi.h
  
 +CXGB4 CRYPTO DRIVER (chcr)
 +M:    Harsh Jain <harsh@chelsio.com>
 +L:    linux-crypto@vger.kernel.org
 +W:    http://www.chelsio.com
 +S:    Supported
 +F:    drivers/crypto/chelsio
 +
  CXGB4VF ETHERNET DRIVER (CXGB4VF)
  M:    Casey Leedom <leedom@chelsio.com>
  L:    netdev@vger.kernel.org
@@@ -4188,7 -4143,8 +4188,7 @@@ M:      Jonathan Corbet <corbet@lwn.net
  L:    linux-doc@vger.kernel.org
  S:    Maintained
  F:    Documentation/
 -F:    scripts/docproc.c
 -F:    scripts/kernel-doc*
 +F:    scripts/kernel-doc
  X:    Documentation/ABI/
  X:    Documentation/devicetree/
  X:    Documentation/acpi
@@@ -4736,13 -4692,6 +4736,13 @@@ S:    Maintaine
  F:    drivers/media/usb/dvb-usb-v2/dvb_usb*
  F:    drivers/media/usb/dvb-usb-v2/usb_urb.c
  
 +DONGWOON DW9714 LENS VOICE COIL DRIVER
 +M:    Sakari Ailus <sakari.ailus@linux.intel.com>
 +L:    linux-media@vger.kernel.org
 +T:    git git://linuxtv.org/media_tree.git
 +S:    Maintained
 +F:    drivers/media/i2c/dw9714.c
 +
  DYNAMIC DEBUG
  M:    Jason Baron <jbaron@akamai.com>
  S:    Maintained
@@@ -5673,7 -5622,7 +5673,7 @@@ F:      scripts/get_maintainer.p
  
  GENWQE (IBM Generic Workqueue Card)
  M:    Frank Haverkamp <haver@linux.vnet.ibm.com>
 -M:    Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
 +M:    Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
  S:    Supported
  F:    drivers/misc/genwqe/
  
@@@ -5718,6 -5667,7 +5718,6 @@@ F:      tools/testing/selftests/gpio
  
  GPIO SUBSYSTEM
  M:    Linus Walleij <linus.walleij@linaro.org>
 -M:    Alexandre Courbot <gnurou@gmail.com>
  L:    linux-gpio@vger.kernel.org
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
  S:    Maintained
@@@ -6477,7 -6427,7 +6477,7 @@@ F:      Documentation/cdrom/ide-c
  F:    drivers/ide/ide-cd*
  
  IEEE 802.15.4 SUBSYSTEM
 -M:    Alexander Aring <aar@pengutronix.de>
 +M:    Alexander Aring <alex.aring@gmail.com>
  M:    Stefan Schmidt <stefan@osg.samsung.com>
  L:    linux-wpan@vger.kernel.org
  W:    http://wpan.cakelab.org/
@@@ -6531,13 -6481,6 +6531,13 @@@ F:    Documentation/ABI/testing/sysfs-bus-
  F:    Documentation/devicetree/bindings/iio/adc/envelope-detector.txt
  F:    drivers/iio/adc/envelope-detector.c
  
 +IIO MULTIPLEXER
 +M:    Peter Rosin <peda@axentia.se>
 +L:    linux-iio@vger.kernel.org
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/iio/multiplexer/iio-mux.txt
 +F:    drivers/iio/multiplexer/iio-mux.c
 +
  IIO SUBSYSTEM AND DRIVERS
  M:    Jonathan Cameron <jic23@kernel.org>
  R:    Hartmut Knaack <knaack.h@gmx.de>
@@@ -6666,12 -6609,6 +6666,12 @@@ F:    Documentation/input/multi-touch-prot
  F:    drivers/input/input-mt.c
  K:    \b(ABS|SYN)_MT_
  
 +INSIDE SECURE CRYPTO DRIVER
 +M:    Antoine Tenart <antoine.tenart@free-electrons.com>
 +F:    drivers/crypto/inside-secure/
 +S:    Maintained
 +L:    linux-crypto@vger.kernel.org
 +
  INTEL ASoC BDW/HSW DRIVERS
  M:    Jie Yang <yang.jie@linux.intel.com>
  L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
@@@ -6801,7 -6738,6 +6801,7 @@@ F:      Documentation/networking/i40e.tx
  F:    Documentation/networking/i40evf.txt
  F:    drivers/net/ethernet/intel/
  F:    drivers/net/ethernet/intel/*/
 +F:    include/linux/avf/virtchnl.h
  
  INTEL RDMA RNIC DRIVER
  M:     Faisal Latif <faisal.latif@intel.com>
@@@ -7207,7 -7143,7 +7207,7 @@@ S:      Maintaine
  F:    drivers/media/platform/rcar_jpu.c
  
  JSM Neo PCI based serial card
 -M:    Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
 +M:    Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
  L:    linux-serial@vger.kernel.org
  S:    Maintained
  F:    drivers/tty/serial/jsm/
@@@ -7350,7 -7286,7 +7350,7 @@@ F:      arch/powerpc/kvm
  
  KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
  M:    Christian Borntraeger <borntraeger@de.ibm.com>
- M:    Cornelia Huck <cornelia.huck@de.ibm.com>
+ M:    Cornelia Huck <cohuck@redhat.com>
  L:    linux-s390@vger.kernel.org
  W:    http://www.ibm.com/developerworks/linux/linux390/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
@@@ -7413,7 -7349,7 +7413,7 @@@ KEYS/KEYRINGS
  M:    David Howells <dhowells@redhat.com>
  L:    keyrings@vger.kernel.org
  S:    Maintained
 -F:    Documentation/security/keys.txt
 +F:    Documentation/security/keys/core.rst
  F:    include/linux/key.h
  F:    include/linux/key-type.h
  F:    include/linux/keyctl.h
@@@ -7427,7 -7363,7 +7427,7 @@@ M:      Mimi Zohar <zohar@linux.vnet.ibm.com
  L:    linux-security-module@vger.kernel.org
  L:    keyrings@vger.kernel.org
  S:    Supported
 -F:    Documentation/security/keys-trusted-encrypted.txt
 +F:    Documentation/security/keys/trusted-encrypted.rst
  F:    include/keys/trusted-type.h
  F:    security/keys/trusted.c
  F:    security/keys/trusted.h
@@@ -7438,7 -7374,7 +7438,7 @@@ M:      David Safford <safford@us.ibm.com
  L:    linux-security-module@vger.kernel.org
  L:    keyrings@vger.kernel.org
  S:    Supported
 -F:    Documentation/security/keys-trusted-encrypted.txt
 +F:    Documentation/security/keys/trusted-encrypted.rst
  F:    include/keys/encrypted-type.h
  F:    security/keys/encrypted-keys/
  
@@@ -7448,7 -7384,7 +7448,7 @@@ W:      http://kgdb.wiki.kernel.org
  L:    kgdb-bugreport@lists.sourceforge.net
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jwessel/kgdb.git
  S:    Maintained
 -F:    Documentation/DocBook/kgdb.tmpl
 +F:    Documentation/dev-tools/kgdb.rst
  F:    drivers/misc/kgdbts.c
  F:    drivers/tty/serial/kgdboc.c
  F:    include/linux/kdb.h
@@@ -7602,15 -7538,6 +7602,15 @@@ S:    Maintaine
  F:    drivers/ata/pata_*.c
  F:    drivers/ata/ata_generic.c
  
 +LIBATA PATA FARADAY FTIDE010 AND GEMINI SATA BRIDGE DRIVERS
 +M:    Linus Walleij <linus.walleij@linaro.org>
 +L:    linux-ide@vger.kernel.org
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
 +S:    Maintained
 +F:    drivers/ata/pata_ftide010.c
 +F:    drivers/ata/sata_gemini.c
 +F:    drivers/ata/sata_gemini.h
 +
  LIBATA SATA AHCI PLATFORM devices support
  M:    Hans de Goede <hdegoede@redhat.com>
  M:    Tejun Heo <tj@kernel.org>
@@@ -7629,7 -7556,7 +7629,7 @@@ S:      Maintaine
  F:    drivers/ata/sata_promise.*
  
  LIBLOCKDEP
 -M:    Sasha Levin <sasha.levin@oracle.com>
 +M:    Sasha Levin <alexander.levin@verizon.com>
  S:    Maintained
  F:    tools/lib/lockdep/
  
@@@ -7780,7 -7707,7 +7780,7 @@@ F:      drivers/platform/x86/hp_accel.
  
  LIVE PATCHING
  M:    Josh Poimboeuf <jpoimboe@redhat.com>
 -M:    Jessica Yu <jeyu@redhat.com>
 +M:    Jessica Yu <jeyu@kernel.org>
  M:    Jiri Kosina <jikos@kernel.org>
  M:    Miroslav Benes <mbenes@suse.cz>
  R:    Petr Mladek <pmladek@suse.com>
@@@ -8051,12 -7978,6 +8051,12 @@@ S:    Maintaine
  F:    drivers/net/ethernet/marvell/mv643xx_eth.*
  F:    include/linux/mv643xx.h
  
 +MARVELL MV88X3310 PHY DRIVER
 +M:    Russell King <rmk@armlinux.org.uk>
 +L:    netdev@vger.kernel.org
 +S:    Maintained
 +F:    drivers/net/phy/marvell10g.c
 +
  MARVELL MVNETA ETHERNET DRIVER
  M:    Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
  L:    netdev@vger.kernel.org
@@@ -8110,16 -8031,6 +8110,16 @@@ S:    Maintaine
  F:    Documentation/hwmon/max20751
  F:    drivers/hwmon/max20751.c
  
 +MAX2175 SDR TUNER DRIVER
 +M:    Ramesh Shanmugasundaram <ramesh.shanmugasundaram@bp.renesas.com>
 +L:    linux-media@vger.kernel.org
 +T:    git git://linuxtv.org/media_tree.git
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/media/i2c/max2175.txt
 +F:    Documentation/media/v4l-drivers/max2175.rst
 +F:    drivers/media/i2c/max2175*
 +F:    include/uapi/linux/max2175.h
 +
  MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER
  L:    linux-hwmon@vger.kernel.org
  S:    Orphan
@@@ -8150,11 -8061,11 +8150,11 @@@ S:   Supporte
  F:    drivers/power/supply/max14577_charger.c
  F:    drivers/power/supply/max77693_charger.c
  
 -MAXIM MAX77802 MULTIFUNCTION PMIC DEVICE DRIVERS
 -M:    Javier Martinez Canillas <javier@osg.samsung.com>
 +MAXIM MAX77802 PMIC REGULATOR DEVICE DRIVER
 +M:    Javier Martinez Canillas <javier@dowhile0.org>
  L:    linux-kernel@vger.kernel.org
  S:    Supported
 -F:    drivers/*/*max77802*.c
 +F:    drivers/regulator/max77802-regulator.c
  F:    Documentation/devicetree/bindings/*/*max77802.txt
  F:    include/dt-bindings/*/*max77802.h
  
@@@ -8200,27 -8111,6 +8200,27 @@@ L:    linux-iio@vger.kernel.or
  S:    Maintained
  F:    drivers/iio/dac/cio-dac.c
  
 +MEDIA DRIVERS FOR RENESAS - DRIF
 +M:    Ramesh Shanmugasundaram <ramesh.shanmugasundaram@bp.renesas.com>
 +L:    linux-media@vger.kernel.org
 +L:    linux-renesas-soc@vger.kernel.org
 +T:    git git://linuxtv.org/media_tree.git
 +S:    Supported
 +F:    Documentation/devicetree/bindings/media/renesas,drif.txt
 +F:    drivers/media/platform/rcar_drif.c
 +
 +MEDIA DRIVERS FOR FREESCALE IMX
 +M:    Steve Longerbeam <slongerbeam@gmail.com>
 +M:    Philipp Zabel <p.zabel@pengutronix.de>
 +L:    linux-media@vger.kernel.org
 +T:    git git://linuxtv.org/media_tree.git
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/media/imx.txt
 +F:    Documentation/media/v4l-drivers/imx.rst
 +F:    drivers/staging/media/imx/
 +F:    include/linux/imx-media.h
 +F:    include/media/imx.h
 +
  MEDIA DRIVERS FOR RENESAS - FCP
  M:    Laurent Pinchart <laurent.pinchart@ideasonboard.com>
  L:    linux-media@vger.kernel.org
@@@ -8378,11 -8268,6 +8378,11 @@@ L:    linux-wireless@vger.kernel.or
  S:    Maintained
  F:    drivers/net/wireless/mediatek/mt7601u/
  
 +MEDIATEK RANDOM NUMBER GENERATOR SUPPORT
 +M:      Sean Wang <sean.wang@mediatek.com>
 +S:      Maintained
 +F:      drivers/char/hw_random/mtk-rng.c
 +
  MEGACHIPS STDPXXXX-GE-B850V3-FW LVDS/DP++ BRIDGES
  M:    Peter Senna Tschudin <peter.senna@collabora.com>
  M:    Martin Donnelly <martin.donnelly@ge.com>
@@@ -8426,26 -8311,6 +8426,26 @@@ W:    http://www.mellanox.co
  Q:    http://patchwork.ozlabs.org/project/netdev/list/
  F:    drivers/net/ethernet/mellanox/mlx5/core/en_*
  
 +MELLANOX ETHERNET INNOVA DRIVER
 +M:    Ilan Tayari <ilant@mellanox.com>
 +R:    Boris Pismenny <borisp@mellanox.com>
 +L:    netdev@vger.kernel.org
 +S:    Supported
 +W:    http://www.mellanox.com
 +Q:    http://patchwork.ozlabs.org/project/netdev/list/
 +F:    drivers/net/ethernet/mellanox/mlx5/core/fpga/*
 +F:    include/linux/mlx5/mlx5_ifc_fpga.h
 +
 +MELLANOX ETHERNET INNOVA IPSEC DRIVER
 +M:    Ilan Tayari <ilant@mellanox.com>
 +R:    Boris Pismenny <borisp@mellanox.com>
 +L:    netdev@vger.kernel.org
 +S:    Supported
 +W:    http://www.mellanox.com
 +Q:    http://patchwork.ozlabs.org/project/netdev/list/
 +F:    drivers/net/ethernet/mellanox/mlx5/core/en_ipsec/*
 +F:    drivers/net/ethernet/mellanox/mlx5/core/ipsec*
 +
  MELLANOX ETHERNET SWITCH DRIVERS
  M:    Jiri Pirko <jiri@mellanox.com>
  M:    Ido Schimmel <idosch@mellanox.com>
@@@ -8455,14 -8320,6 +8455,14 @@@ W:    http://www.mellanox.co
  Q:    http://patchwork.ozlabs.org/project/netdev/list/
  F:    drivers/net/ethernet/mellanox/mlxsw/
  
 +MELLANOX FIRMWARE FLASH LIBRARY (mlxfw)
 +M:    Yotam Gigi <yotamg@mellanox.com>
 +L:    netdev@vger.kernel.org
 +S:    Supported
 +W:    http://www.mellanox.com
 +Q:    http://patchwork.ozlabs.org/project/netdev/list/
 +F:    drivers/net/ethernet/mellanox/mlxfw/
 +
  MELLANOX MLXCPLD I2C AND MUX DRIVER
  M:    Vadim Pasternak <vadimp@mellanox.com>
  M:    Michael Shych <michaelsh@mellanox.com>
@@@ -8581,7 -8438,7 +8581,7 @@@ T:      git git://git.monstr.eu/linux-2.6-mi
  S:    Supported
  F:    arch/microblaze/
  
 -MICROCHIP / ATMEL AT91 / AT32 SERIAL DRIVER
 +MICROCHIP / ATMEL AT91 SERIAL DRIVER
  M:    Richard Genoud <richard.genoud@gmail.com>
  S:    Maintained
  F:    drivers/tty/serial/atmel_serial.c
@@@ -8604,16 -8461,6 +8604,16 @@@ F:    drivers/media/platform/atmel/atmel-i
  F:    drivers/media/platform/atmel/atmel-isc-regs.h
  F:    devicetree/bindings/media/atmel-isc.txt
  
 +MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER
 +M:    Woojung Huh <Woojung.Huh@microchip.com>
 +M:    Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
 +L:    netdev@vger.kernel.org
 +S:    Maintained
 +F:    net/dsa/tag_ksz.c
 +F:    drivers/net/dsa/microchip/*
 +F:    include/linux/platform_data/microchip-ksz.h
 +F:    Documentation/devicetree/bindings/net/dsa/ksz.txt
 +
  MICROCHIP USB251XB DRIVER
  M:    Richard Leitner <richard.leitner@skidata.com>
  L:    linux-usb@vger.kernel.org
@@@ -8661,7 -8508,7 +8661,7 @@@ S:      Odd Fixe
  F:    drivers/media/radio/radio-miropcm20*
  
  MELLANOX MLX4 core VPI driver
 -M:    Yishai Hadas <yishaih@mellanox.com>
 +M:    Tariq Toukan <tariqt@mellanox.com>
  L:    netdev@vger.kernel.org
  L:    linux-rdma@vger.kernel.org
  W:    http://www.mellanox.com
@@@ -8669,6 -8516,7 +8669,6 @@@ Q:      http://patchwork.ozlabs.org/project/
  S:    Supported
  F:    drivers/net/ethernet/mellanox/mlx4/
  F:    include/linux/mlx4/
 -F:    include/uapi/rdma/mlx4-abi.h
  
  MELLANOX MLX4 IB driver
  M:    Yishai Hadas <yishaih@mellanox.com>
@@@ -8678,7 -8526,6 +8678,7 @@@ Q:      http://patchwork.kernel.org/project/
  S:    Supported
  F:    drivers/infiniband/hw/mlx4/
  F:    include/linux/mlx4/
 +F:    include/uapi/rdma/mlx4-abi.h
  
  MELLANOX MLX5 core VPI driver
  M:    Saeed Mahameed <saeedm@mellanox.com>
@@@ -8691,6 -8538,7 +8691,6 @@@ Q:      http://patchwork.ozlabs.org/project/
  S:    Supported
  F:    drivers/net/ethernet/mellanox/mlx5/core/
  F:    include/linux/mlx5/
 -F:    include/uapi/rdma/mlx5-abi.h
  
  MELLANOX MLX5 IB driver
  M:    Matan Barak <matanb@mellanox.com>
@@@ -8701,7 -8549,6 +8701,7 @@@ Q:      http://patchwork.kernel.org/project/
  S:    Supported
  F:    drivers/infiniband/hw/mlx5/
  F:    include/linux/mlx5/
 +F:    include/uapi/rdma/mlx5-abi.h
  
  MELEXIS MLX90614 DRIVER
  M:    Crt Mori <cmo@melexis.com>
@@@ -8741,7 -8588,7 +8741,7 @@@ S:      Maintaine
  F:    drivers/media/dvb-frontends/mn88473*
  
  MODULE SUPPORT
 -M:    Jessica Yu <jeyu@redhat.com>
 +M:    Jessica Yu <jeyu@kernel.org>
  M:    Rusty Russell <rusty@rustcorp.com.au>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git modules-next
  S:    Maintained
@@@ -8869,15 -8716,6 +8869,15 @@@ S:    Orpha
  F:    drivers/mmc/host/mmc_spi.c
  F:    include/linux/spi/mmc_spi.h
  
 +MULTIPLEXER SUBSYSTEM
 +M:    Peter Rosin <peda@axentia.se>
 +S:    Maintained
 +F:    Documentation/ABI/testing/mux/sysfs-class-mux*
 +F:    Documentation/devicetree/bindings/mux/
 +F:    include/linux/dt-bindings/mux/
 +F:    include/linux/mux/
 +F:    drivers/mux/
 +
  MULTISOUND SOUND DRIVER
  M:    Andrew Veliath <andrewtv@usa.net>
  S:    Maintained
@@@ -9106,16 -8944,6 +9106,16 @@@ F:    net/ipv6
  F:    include/net/ip*
  F:    arch/x86/net/*
  
 +NETWORKING [TLS]
 +M:    Ilya Lesokhin <ilyal@mellanox.com>
 +M:    Aviad Yehezkel <aviadye@mellanox.com>
 +M:    Dave Watson <davejwatson@fb.com>
 +L:    netdev@vger.kernel.org
 +S:    Maintained
 +F:    net/tls/*
 +F:    include/uapi/linux/tls.h
 +F:    include/net/tls.h
 +
  NETWORKING [IPSEC]
  M:    Steffen Klassert <steffen.klassert@secunet.com>
  M:    Herbert Xu <herbert@gondor.apana.org.au>
@@@ -9197,6 -9025,9 +9197,6 @@@ F:      include/uapi/linux/nfc.
  F:    drivers/nfc/
  F:    include/linux/platform_data/nfcmrvl.h
  F:    include/linux/platform_data/nxp-nci.h
 -F:    include/linux/platform_data/pn544.h
 -F:    include/linux/platform_data/st21nfca.h
 -F:    include/linux/platform_data/st-nci.h
  F:    Documentation/devicetree/bindings/net/nfc/
  
  NFS, SUNRPC, AND LOCKD CLIENTS
@@@ -9588,13 -9419,6 +9588,13 @@@ M:    Harald Welte <laforge@gnumonks.org
  S:    Maintained
  F:    drivers/char/pcmcia/cm4040_cs.*
  
 +OMNIVISION OV5640 SENSOR DRIVER
 +M:    Steve Longerbeam <slongerbeam@gmail.com>
 +L:    linux-media@vger.kernel.org
 +T:    git git://linuxtv.org/media_tree.git
 +S:    Maintained
 +F:    drivers/media/i2c/ov5640.c
 +
  OMNIVISION OV5647 SENSOR DRIVER
  M:    Ramiro Oliveira <roliveir@synopsys.com>
  L:    linux-media@vger.kernel.org
@@@ -9610,13 -9434,6 +9610,13 @@@ S:    Maintaine
  F:    drivers/media/i2c/ov7670.c
  F:    Documentation/devicetree/bindings/media/i2c/ov7670.txt
  
 +OMNIVISION OV13858 SENSOR DRIVER
 +M:    Sakari Ailus <sakari.ailus@linux.intel.com>
 +L:    linux-media@vger.kernel.org
 +T:    git git://linuxtv.org/media_tree.git
 +S:    Maintained
 +F:    drivers/media/i2c/ov13858.c
 +
  ONENAND FLASH DRIVER
  M:    Kyungmin Park <kyungmin.park@samsung.com>
  L:    linux-mtd@lists.infradead.org
@@@ -10267,13 -10084,6 +10267,13 @@@ M: Heikki Krogerus <heikki.krogerus@lin
  S:    Maintained
  F:    drivers/pinctrl/intel/
  
 +PIN CONTROLLER - QUALCOMM
 +M:    Bjorn Andersson <bjorn.andersson@linaro.org>
 +S:    Maintained
 +L:    linux-arm-msm@vger.kernel.org
 +F:    Documentation/devicetree/bindings/pinctrl/qcom,*.txt
 +F:    drivers/pinctrl/qcom/
 +
  PIN CONTROLLER - RENESAS
  M:    Laurent Pinchart <laurent.pinchart@ideasonboard.com>
  M:    Geert Uytterhoeven <geert+renesas@glider.be>
@@@ -10345,7 -10155,7 +10345,7 @@@ T:   git git://git.kernel.org/pub/scm/lin
  S:    Maintained
  F:    Documentation/hwmon/pmbus
  F:    drivers/hwmon/pmbus/
 -F:    include/linux/i2c/pmbus.h
 +F:    include/linux/pmbus.h
  
  PMC SIERRA MaxRAID DRIVER
  L:    linux-scsi@vger.kernel.org
@@@ -10640,7 -10450,7 +10640,7 @@@ S:   Orpha
  
  PXA RTC DRIVER
  M:    Robert Jarzmik <robert.jarzmik@free.fr>
 -L:    rtc-linux@googlegroups.com
 +L:    linux-rtc@vger.kernel.org
  S:    Maintained
  
  QAT DRIVER
@@@ -10735,7 -10545,6 +10735,7 @@@ M:   Laurentiu Tudor <laurentiu.tudor@nxp
  L:    linux-kernel@vger.kernel.org
  S:    Maintained
  F:    drivers/staging/fsl-mc/
 +F:    Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
  
  QT1010 MEDIA DRIVER
  M:    Antti Palosaari <crope@iki.fi>
@@@ -10775,14 -10584,6 +10775,14 @@@ T: git git://git.kernel.org/pub/scm/lin
  S:    Supported
  F:    arch/hexagon/
  
 +QUALCOMM VENUS VIDEO ACCELERATOR DRIVER
 +M:    Stanimir Varbanov <stanimir.varbanov@linaro.org>
 +L:    linux-media@vger.kernel.org
 +L:    linux-arm-msm@vger.kernel.org
 +T:    git git://linuxtv.org/media_tree.git
 +S:    Maintained
 +F:    drivers/media/platform/qcom/venus/
 +
  QUALCOMM WCN36XX WIRELESS DRIVER
  M:    Eugene Krasnikov <k.eugene.e@gmail.com>
  L:    wcn36xx@lists.infradead.org
@@@ -10798,14 -10599,6 +10798,14 @@@ L: qemu-devel@nongnu.or
  S:    Maintained
  F:    drivers/firmware/qemu_fw_cfg.c
  
 +QUANTENNA QTNFMAC WIRELESS DRIVER
 +M:   Igor Mitsyanko <imitsyanko@quantenna.com>
 +M:   Avinash Patil <avinashp@quantenna.com>
 +M:   Sergey Matyukevich <smatyukevich@quantenna.com>
 +L:   linux-wireless@vger.kernel.org
 +S:   Maintained
 +F:   drivers/net/wireless/quantenna
 +
  RADOS BLOCK DEVICE (RBD)
  M:    Ilya Dryomov <idryomov@gmail.com>
  M:    Sage Weil <sage@redhat.com>
@@@ -10964,7 -10757,7 +10964,7 @@@ X:   kernel/torture.
  REAL TIME CLOCK (RTC) SUBSYSTEM
  M:    Alessandro Zummo <a.zummo@towertech.it>
  M:    Alexandre Belloni <alexandre.belloni@free-electrons.com>
 -L:    rtc-linux@googlegroups.com
 +L:    linux-rtc@vger.kernel.org
  Q:    http://patchwork.ozlabs.org/project/rtc-linux/list/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
  S:    Maintained
@@@ -11039,11 -10832,11 +11039,11 @@@ L:        linux-iio@vger.kernel.or
  S:    Supported
  F:    drivers/iio/adc/rcar_gyro_adc.c
  
 -RENESAS USB2 PHY DRIVER
 +RENESAS USB PHY DRIVER
  M:    Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
  L:    linux-renesas-soc@vger.kernel.org
  S:    Maintained
 -F:    drivers/phy/phy-rcar-gen3-usb2.c
 +F:    drivers/phy/renesas/phy-rcar-gen3-usb*.c
  
  RESET CONTROLLER FRAMEWORK
  M:    Philipp Zabel <p.zabel@pengutronix.de>
@@@ -11200,7 -10993,7 +11200,7 @@@ S:   Supporte
  F:    arch/s390/
  F:    drivers/s390/
  F:    Documentation/s390/
 -F:    Documentation/DocBook/s390*
 +F:    Documentation/driver-api/s390-drivers.rst
  
  S390 COMMON I/O LAYER
  M:    Sebastian Ott <sebott@linux.vnet.ibm.com>
@@@ -11268,7 -11061,7 +11268,7 @@@ S:   Supporte
  F:    drivers/iommu/s390-iommu.c
  
  S390 VFIO-CCW DRIVER
- M:    Cornelia Huck <cornelia.huck@de.ibm.com>
+ M:    Cornelia Huck <cohuck@redhat.com>
  M:    Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
  L:    linux-s390@vger.kernel.org
  L:    kvm@vger.kernel.org
@@@ -11445,12 -11238,12 +11445,12 @@@ L:        linux-kernel@vger.kernel.or
  S:    Supported
  F:    Documentation/devicetree/bindings/phy/samsung-phy.txt
  F:    Documentation/phy/samsung-usb2.txt
 -F:    drivers/phy/phy-exynos4210-usb2.c
 -F:    drivers/phy/phy-exynos4x12-usb2.c
 -F:    drivers/phy/phy-exynos5250-usb2.c
 -F:    drivers/phy/phy-s5pv210-usb2.c
 -F:    drivers/phy/phy-samsung-usb2.c
 -F:    drivers/phy/phy-samsung-usb2.h
 +F:    drivers/phy/samsung/phy-exynos4210-usb2.c
 +F:    drivers/phy/samsung/phy-exynos4x12-usb2.c
 +F:    drivers/phy/samsung/phy-exynos5250-usb2.c
 +F:    drivers/phy/samsung/phy-s5pv210-usb2.c
 +F:    drivers/phy/samsung/phy-samsung-usb2.c
 +F:    drivers/phy/samsung/phy-samsung-usb2.h
  
  SERIAL DRIVERS
  M:    Greg Kroah-Hartman <gregkh@linuxfoundation.org>
@@@ -11475,6 -11268,7 +11475,6 @@@ F:   drivers/media/rc/serial_ir.
  
  STI CEC DRIVER
  M:    Benjamin Gaignard <benjamin.gaignard@linaro.org>
 -L:    kernel@stlinux.com
  S:    Maintained
  F:    drivers/staging/media/st-cec/
  F:    Documentation/devicetree/bindings/media/stih-cec.txt
@@@ -11534,9 -11328,6 +11534,9 @@@ F:   Documentation/tee.tx
  
  THUNDERBOLT DRIVER
  M:    Andreas Noever <andreas.noever@gmail.com>
 +M:    Michael Jamet <michael.jamet@intel.com>
 +M:    Mika Westerberg <mika.westerberg@linux.intel.com>
 +M:    Yehezkel Bernat <yehezkel.bernat@intel.com>
  S:    Maintained
  F:    drivers/thunderbolt/
  
@@@ -11564,14 -11355,6 +11564,14 @@@ F: kernel/time/alarmtimer.
  F:    kernel/time/ntp.c
  F:    tools/testing/selftests/timers/
  
 +TI TRF7970A NFC DRIVER
 +M:    Mark Greer <mgreer@animalcreek.com>
 +L:    linux-wireless@vger.kernel.org
 +L:    linux-nfc@lists.01.org (moderated for non-subscribers)
 +S:    Supported
 +F:    drivers/nfc/trf7970a.c
 +F:    Documentation/devicetree/bindings/net/nfc/trf7970a.txt
 +
  SC1200 WDT DRIVER
  M:    Zwane Mwaikambo <zwanem@gmail.com>
  S:    Maintained
@@@ -11712,7 -11495,6 +11712,7 @@@ F:   kernel/seccomp.
  F:    include/uapi/linux/seccomp.h
  F:    include/linux/seccomp.h
  F:    tools/testing/selftests/seccomp/*
 +F:    Documentation/userspace-api/seccomp_filter.rst
  K:    \bsecure_computing
  K:    \bTIF_SECCOMP\b
  
@@@ -11771,7 -11553,6 +11771,7 @@@ S:   Supporte
  F:    include/linux/selinux*
  F:    security/selinux/
  F:    scripts/selinux/
 +F:    Documentation/admin-guide/LSM/SELinux.rst
  
  APPARMOR SECURITY MODULE
  M:    John Johansen <john.johansen@canonical.com>
@@@ -11780,21 -11561,18 +11780,21 @@@ W:        apparmor.wiki.kernel.or
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jj/apparmor-dev.git
  S:    Supported
  F:    security/apparmor/
 +F:    Documentation/admin-guide/LSM/apparmor.rst
  
  LOADPIN SECURITY MODULE
  M:    Kees Cook <keescook@chromium.org>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git lsm/loadpin
  S:    Supported
  F:    security/loadpin/
 +F:    Documentation/admin-guide/LSM/LoadPin.rst
  
  YAMA SECURITY MODULE
  M:    Kees Cook <keescook@chromium.org>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git yama/tip
  S:    Supported
  F:    security/yama/
 +F:    Documentation/admin-guide/LSM/Yama.rst
  
  SENSABLE PHANTOM
  M:    Jiri Slaby <jirislaby@gmail.com>
@@@ -12000,7 -11778,6 +12000,7 @@@ T:   git git://git.kernel.org/pub/scm/lin
  S:    Supported
  F:    arch/arm/mach-davinci/
  F:    drivers/i2c/busses/i2c-davinci.c
 +F:    arch/arm/boot/dts/da850*
  
  TI DAVINCI SERIES MEDIA DRIVER
  M:    "Lad, Prabhakar" <prabhakar.csengg@gmail.com>
@@@ -12097,7 -11874,7 +12097,7 @@@ L:   linux-security-module@vger.kernel.or
  W:    http://schaufler-ca.com
  T:    git git://github.com/cschaufler/smack-next
  S:    Maintained
 -F:    Documentation/security/Smack.txt
 +F:    Documentation/admin-guide/LSM/Smack.rst
  F:    security/smack/
  
  DRIVERS FOR ADAPTIVE VOLTAGE SCALING (AVS)
@@@ -12187,9 -11964,8 +12187,9 @@@ F:   drivers/leds/leds-net48xx.
  
  SOFTLOGIC 6x10 MPEG CODEC
  M:    Bluecherry Maintainers <maintainers@bluecherrydvr.com>
 +M:    Anton Sviridenko <anton@corp.bluecherry.net>
  M:    Andrey Utkin <andrey.utkin@corp.bluecherry.net>
 -M:    Andrey Utkin <andrey.krieger.utkin@gmail.com>
 +M:    Andrey Utkin <andrey_utkin@fastmail.com>
  M:    Ismael Luceno <ismael@iodev.co.uk>
  L:    linux-media@vger.kernel.org
  S:    Supported
@@@ -12857,8 -12633,6 +12857,8 @@@ F:   include/linux/soc/ti/ti_sci_protocol
  F:    Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
  F:    include/dt-bindings/genpd/k2g.h
  F:    drivers/soc/ti/ti_sci_pm_domains.c
 +F:    Documentation/devicetree/bindings/reset/ti,sci-reset.txt
 +F:    drivers/reset/reset-ti-sci.c
  
  THANKO'S RAREMONO AM/FM/SW RADIO RECEIVER USB DRIVER
  M:    Hans Verkuil <hverkuil@xs4all.nl>
@@@ -13108,7 -12882,7 +13108,7 @@@ M:   Wolfram Sang <wsa+renesas@sang-engin
  L:    linux-mmc@vger.kernel.org
  S:    Supported
  F:    drivers/mmc/host/tmio_mmc*
 -F:    drivers/mmc/host/sh_mobile_sdhi.c
 +F:    drivers/mmc/host/renesas_sdhi*
  F:    include/linux/mfd/tmio.h
  
  TMP401 HARDWARE MONITOR DRIVER
@@@ -13137,7 -12911,6 +13137,7 @@@ F:   Documentation/media/v4l-drivers/tm60
  
  TW5864 VIDEO4LINUX DRIVER
  M:    Bluecherry Maintainers <maintainers@bluecherrydvr.com>
 +M:    Anton Sviridenko <anton@corp.bluecherry.net>
  M:    Andrey Utkin <andrey.utkin@corp.bluecherry.net>
  M:    Andrey Utkin <andrey_utkin@fastmail.com>
  L:    linux-media@vger.kernel.org
@@@ -13690,17 -13463,6 +13690,17 @@@ W: http://en.wikipedia.org/wiki/Util-li
  T:    git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git
  S:    Maintained
  
 +UUID HELPERS
 +M:    Christoph Hellwig <hch@lst.de>
 +R:    Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 +L:    linux-kernel@vger.kernel.org
 +T:    git git://git.infradead.org/users/hch/uuid.git
 +F:    lib/uuid.c
 +F:    lib/test_uuid.c
 +F:    include/linux/uuid.h
 +F:    include/uapi/linux/uuid.h
 +S:    Maintained
 +
  UVESAFB DRIVER
  M:    Michal Januszewski <spock@gentoo.org>
  L:    linux-fbdev@vger.kernel.org
@@@ -13763,12 -13525,6 +13763,12 @@@ S: Maintaine
  F:    drivers/media/v4l2-core/videobuf2-*
  F:    include/media/videobuf2-*
  
 +VIDEO MULTIPLEXER DRIVER
 +M:    Philipp Zabel <p.zabel@pengutronix.de>
 +L:    linux-media@vger.kernel.org
 +S:    Maintained
 +F:    drivers/media/platform/video-mux.c
 +
  VIRTIO AND VHOST VSOCK DRIVER
  M:    Stefan Hajnoczi <stefanha@redhat.com>
  L:    kvm@vger.kernel.org
@@@ -13814,7 -13570,7 +13814,7 @@@ F:   include/uapi/linux/virtio_*.
  F:    drivers/crypto/virtio/
  
  VIRTIO DRIVERS FOR S390
- M:    Cornelia Huck <cornelia.huck@de.ibm.com>
+ M:    Cornelia Huck <cohuck@redhat.com>
  M:    Halil Pasic <pasic@linux.vnet.ibm.com>
  L:    linux-s390@vger.kernel.org
  L:    virtualization@lists.linux-foundation.org
@@@ -14013,7 -13769,6 +14013,7 @@@ M:   Evgeniy Polyakov <zbr@ioremap.net
  S:    Maintained
  F:    Documentation/w1/
  F:    drivers/w1/
 +F:    include/linux/w1.h
  
  W83791D HARDWARE MONITORING DRIVER
  M:    Marc Hulsman <m.hulsman@tudelft.nl>
@@@ -14106,7 -13861,7 +14106,7 @@@ S:   Odd fixe
  F:    drivers/net/wireless/wl3501*
  
  WOLFSON MICROELECTRONICS DRIVERS
 -L:    patches@opensource.wolfsonmicro.com
 +L:    patches@opensource.cirrus.com
  T:    git https://github.com/CirrusLogic/linux-drivers.git
  W:    https://github.com/CirrusLogic/linux-drivers/wiki
  S:    Supported
diff --combined arch/arm64/Kconfig
index 9f7a934ff707a00b8495ab0ca533fb86468d3aba,6252365b0c96d3bde1dbb3e54ed49b14d53e790f..192208ea284224dd73c5bd663d5b21e4c6caf347
@@@ -3,7 -3,6 +3,7 @@@ config ARM6
        select ACPI_CCA_REQUIRED if ACPI
        select ACPI_GENERIC_GSI if ACPI
        select ACPI_GTDT if ACPI
 +      select ACPI_IORT if ACPI
        select ACPI_REDUCED_HARDWARE_ONLY if ACPI
        select ACPI_MCFG if ACPI
        select ACPI_SPCR_TABLE if ACPI
@@@ -20,9 -19,7 +20,9 @@@
        select ARCH_HAS_STRICT_KERNEL_RWX
        select ARCH_HAS_STRICT_MODULE_RWX
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 +      select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA
        select ARCH_USE_CMPXCHG_LOCKREF
 +      select ARCH_SUPPORTS_MEMORY_FAILURE
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_SUPPORTS_NUMA_BALANCING
        select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
@@@ -44,7 -41,6 +44,7 @@@
        select EDAC_SUPPORT
        select FRAME_POINTER
        select GENERIC_ALLOCATOR
 +      select GENERIC_ARCH_TOPOLOGY
        select GENERIC_CLOCKEVENTS
        select GENERIC_CLOCKEVENTS_BROADCAST
        select GENERIC_CPU_AUTOPROBE
@@@ -96,7 -92,6 +96,7 @@@
        select HAVE_IRQ_TIME_ACCOUNTING
        select HAVE_MEMBLOCK
        select HAVE_MEMBLOCK_NODE_MAP if NUMA
 +      select HAVE_NMI if ACPI_APEI_SEA
        select HAVE_PATA_PLATFORM
        select HAVE_PERF_EVENTS
        select HAVE_PERF_REGS
@@@ -210,7 -205,7 +210,7 @@@ config GENERIC_CALIBRATE_DELA
  config ZONE_DMA
        def_bool y
  
 -config HAVE_GENERIC_RCU_GUP
 +config HAVE_GENERIC_GUP
        def_bool y
  
  config ARCH_DMA_ADDR_T_64BIT
@@@ -249,9 -244,6 +249,9 @@@ config PGTABLE_LEVEL
  config ARCH_SUPPORTS_UPROBES
        def_bool y
  
 +config ARCH_PROC_KCORE_TEXT
 +      def_bool y
 +
  source "init/Kconfig"
  
  source "kernel/Kconfig.freezer"
@@@ -488,6 -480,17 +488,17 @@@ config CAVIUM_ERRATUM_2745
  
          If unsure, say Y.
  
+ config CAVIUM_ERRATUM_30115
+       bool "Cavium erratum 30115: Guest may disable interrupts in host"
+       default y
+       help
+         On ThunderX T88 pass 1.x through 2.2, T81 pass 1.0 through
+         1.2, and T83 Pass 1.0, KVM guest execution may disable
+         interrupts in host. Trapping both GICv3 group-0 and group-1
+         accesses sidesteps the issue.
+         If unsure, say Y.
  config QCOM_FALKOR_ERRATUM_1003
        bool "Falkor E1003: Incorrect translation due to ASID change"
        default y
@@@ -990,7 -993,7 +1001,7 @@@ config RANDOMIZE_BAS
  
  config RANDOMIZE_MODULE_REGION_FULL
        bool "Randomize the module region independently from the core kernel"
 -      depends on RANDOMIZE_BASE && !DYNAMIC_FTRACE
 +      depends on RANDOMIZE_BASE
        default y
        help
          Randomizes the location of the module region without considering the
@@@ -1092,6 -1095,10 +1103,6 @@@ config SYSVIPC_COMPA
        def_bool y
        depends on COMPAT && SYSVIPC
  
 -config KEYS_COMPAT
 -      def_bool y
 -      depends on COMPAT && KEYS
 -
  endmenu
  
  menu "Power management options"
index 28bf02efce76d7adf8ae514ce10d87c0f093d659,e7d8e281ff62f7780bf2bae77788c7e237a9887b..8cabd57b634832ca7c4df7d8fee39549233167dd
@@@ -19,6 -19,7 +19,7 @@@
  #define __ASM_ESR_H
  
  #include <asm/memory.h>
+ #include <asm/sysreg.h>
  
  #define ESR_ELx_EC_UNKNOWN    (0x00)
  #define ESR_ELx_EC_WFx                (0x01)
@@@ -83,7 -84,6 +84,7 @@@
  #define ESR_ELx_WNR           (UL(1) << 6)
  
  /* Shared ISS field definitions for Data/Instruction aborts */
 +#define ESR_ELx_FnV           (UL(1) << 10)
  #define ESR_ELx_EA            (UL(1) << 9)
  #define ESR_ELx_S1PTW         (UL(1) << 7)
  
  #define ESR_ELx_SYS64_ISS_SYS_CNTFRQ  (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \
                                         ESR_ELx_SYS64_ISS_DIR_READ)
  
+ #define esr_sys64_to_sysreg(e)                                        \
+       sys_reg((((e) & ESR_ELx_SYS64_ISS_OP0_MASK) >>          \
+                ESR_ELx_SYS64_ISS_OP0_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >>          \
+                ESR_ELx_SYS64_ISS_OP1_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >>          \
+                ESR_ELx_SYS64_ISS_CRN_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >>          \
+                ESR_ELx_SYS64_ISS_CRM_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >>          \
+                ESR_ELx_SYS64_ISS_OP2_SHIFT))
+ #define esr_cp15_to_sysreg(e)                                 \
+       sys_reg(3,                                              \
+               (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >>          \
+                ESR_ELx_SYS64_ISS_OP1_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >>          \
+                ESR_ELx_SYS64_ISS_CRN_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >>          \
+                ESR_ELx_SYS64_ISS_CRM_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >>          \
+                ESR_ELx_SYS64_ISS_OP2_SHIFT))
  #ifndef __ASSEMBLY__
  #include <asm/types.h>
  
index 773b35d16a0b61ddc3b13f02fd8a7eaca9d4976b,c6313c5d331c0a960f278b7fca34b7b1f1e0d1e9..0b436df746fcb094d8bd8b3c928c0862d6a90df5
@@@ -46,6 -46,8 +46,8 @@@
  #include <linux/of.h>
  
  #include <asm/reg.h>
+ #include <asm/ppc-opcode.h>
+ #include <asm/disassemble.h>
  #include <asm/cputable.h>
  #include <asm/cacheflush.h>
  #include <asm/tlbflush.h>
@@@ -645,6 -647,7 +647,7 @@@ static void kvmppc_create_dtl_entry(str
        unsigned long stolen;
        unsigned long core_stolen;
        u64 now;
+       unsigned long flags;
  
        dt = vcpu->arch.dtl_ptr;
        vpa = vcpu->arch.vpa.pinned_addr;
        core_stolen = vcore_stolen_time(vc, now);
        stolen = core_stolen - vcpu->arch.stolen_logged;
        vcpu->arch.stolen_logged = core_stolen;
-       spin_lock_irq(&vcpu->arch.tbacct_lock);
+       spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
        stolen += vcpu->arch.busy_stolen;
        vcpu->arch.busy_stolen = 0;
-       spin_unlock_irq(&vcpu->arch.tbacct_lock);
+       spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
        if (!dt || !vpa)
                return;
        memset(dt, 0, sizeof(struct dtl_entry));
        vcpu->arch.dtl.dirty = true;
  }
  
+ /* See if there is a doorbell interrupt pending for a vcpu */
+ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
+ {
+       int thr;
+       struct kvmppc_vcore *vc;
+       if (vcpu->arch.doorbell_request)
+               return true;
+       /*
+        * Ensure that the read of vcore->dpdes comes after the read
+        * of vcpu->doorbell_request.  This barrier matches the
+        * lwsync in book3s_hv_rmhandlers.S just before the
+        * fast_guest_return label.
+        */
+       smp_rmb();
+       vc = vcpu->arch.vcore;
+       thr = vcpu->vcpu_id - vc->first_vcpuid;
+       return !!(vc->dpdes & (1 << thr));
+ }
  static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
  {
        if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
@@@ -926,6 -949,101 +949,101 @@@ static int kvmppc_emulate_debug_inst(st
        }
  }
  
+ static void do_nothing(void *x)
+ {
+ }
+ static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
+ {
+       int thr, cpu, pcpu, nthreads;
+       struct kvm_vcpu *v;
+       unsigned long dpdes;
+       nthreads = vcpu->kvm->arch.emul_smt_mode;
+       dpdes = 0;
+       cpu = vcpu->vcpu_id & ~(nthreads - 1);
+       for (thr = 0; thr < nthreads; ++thr, ++cpu) {
+               v = kvmppc_find_vcpu(vcpu->kvm, cpu);
+               if (!v)
+                       continue;
+               /*
+                * If the vcpu is currently running on a physical cpu thread,
+                * interrupt it in order to pull it out of the guest briefly,
+                * which will update its vcore->dpdes value.
+                */
+               pcpu = READ_ONCE(v->cpu);
+               if (pcpu >= 0)
+                       smp_call_function_single(pcpu, do_nothing, NULL, 1);
+               if (kvmppc_doorbell_pending(v))
+                       dpdes |= 1 << thr;
+       }
+       return dpdes;
+ }
+ /*
+  * On POWER9, emulate doorbell-related instructions in order to
+  * give the guest the illusion of running on a multi-threaded core.
+  * The instructions emulated are msgsndp, msgclrp, mfspr TIR,
+  * and mfspr DPDES.
+  */
+ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
+ {
+       u32 inst, rb, thr;
+       unsigned long arg;
+       struct kvm *kvm = vcpu->kvm;
+       struct kvm_vcpu *tvcpu;
+       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+               return EMULATE_FAIL;
+       if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
+               return RESUME_GUEST;
+       if (get_op(inst) != 31)
+               return EMULATE_FAIL;
+       rb = get_rb(inst);
+       thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
+       switch (get_xop(inst)) {
+       case OP_31_XOP_MSGSNDP:
+               arg = kvmppc_get_gpr(vcpu, rb);
+               if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
+                       break;
+               arg &= 0x3f;
+               if (arg >= kvm->arch.emul_smt_mode)
+                       break;
+               tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
+               if (!tvcpu)
+                       break;
+               if (!tvcpu->arch.doorbell_request) {
+                       tvcpu->arch.doorbell_request = 1;
+                       kvmppc_fast_vcpu_kick_hv(tvcpu);
+               }
+               break;
+       case OP_31_XOP_MSGCLRP:
+               arg = kvmppc_get_gpr(vcpu, rb);
+               if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
+                       break;
+               vcpu->arch.vcore->dpdes = 0;
+               vcpu->arch.doorbell_request = 0;
+               break;
+       case OP_31_XOP_MFSPR:
+               switch (get_sprn(inst)) {
+               case SPRN_TIR:
+                       arg = thr;
+                       break;
+               case SPRN_DPDES:
+                       arg = kvmppc_read_dpdes(vcpu);
+                       break;
+               default:
+                       return EMULATE_FAIL;
+               }
+               kvmppc_set_gpr(vcpu, get_rt(inst), arg);
+               break;
+       default:
+               return EMULATE_FAIL;
+       }
+       kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+       return RESUME_GUEST;
+ }
  static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
                                 struct task_struct *tsk)
  {
                r = RESUME_GUEST;
                break;
        case BOOK3S_INTERRUPT_MACHINE_CHECK:
-               /*
-                * Deliver a machine check interrupt to the guest.
-                * We have to do this, even if the host has handled the
-                * machine check, because machine checks use SRR0/1 and
-                * the interrupt might have trashed guest state in them.
-                */
-               kvmppc_book3s_queue_irqprio(vcpu,
-                                           BOOK3S_INTERRUPT_MACHINE_CHECK);
-               r = RESUME_GUEST;
+               /* Exit to guest with KVM_EXIT_NMI as exit reason */
+               run->exit_reason = KVM_EXIT_NMI;
+               run->hw.hardware_exit_reason = vcpu->arch.trap;
+               /* Clear out the old NMI status from run->flags */
+               run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
+               /* Now set the NMI status */
+               if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
+                       run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
+               else
+                       run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
+               r = RESUME_HOST;
+               /* Print the MCE event to host console. */
+               machine_check_print_event_info(&vcpu->arch.mce_evt, false);
                break;
        case BOOK3S_INTERRUPT_PROGRAM:
        {
                break;
        /*
         * This occurs if the guest (kernel or userspace), does something that
-        * is prohibited by HFSCR.  We just generate a program interrupt to
-        * the guest.
+        * is prohibited by HFSCR.
+        * On POWER9, this could be a doorbell instruction that we need
+        * to emulate.
+        * Otherwise, we just generate a program interrupt to the guest.
         */
        case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
-               kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-               r = RESUME_GUEST;
+               r = EMULATE_FAIL;
+               if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
+                       r = kvmppc_emulate_doorbell_instr(vcpu);
+               if (r == EMULATE_FAIL) {
+                       kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+                       r = RESUME_GUEST;
+               }
                break;
        case BOOK3S_INTERRUPT_HV_RM_HARD:
                r = RESUME_PASSTHROUGH;
@@@ -1143,6 -1273,12 +1273,12 @@@ static void kvmppc_set_lpcr(struct kvm_
        mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
        if (cpu_has_feature(CPU_FTR_ARCH_207S))
                mask |= LPCR_AIL;
+       /*
+        * On POWER9, allow userspace to enable large decrementer for the
+        * guest, whether or not the host has it enabled.
+        */
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               mask |= LPCR_LD;
  
        /* Broken 32-bit version of LPCR must not clear top bits */
        if (preserve_top32)
@@@ -1611,7 -1747,7 +1747,7 @@@ static struct kvmppc_vcore *kvmppc_vcor
        init_swait_queue_head(&vcore->wq);
        vcore->preempt_tb = TB_NIL;
        vcore->lpcr = kvm->arch.lpcr;
-       vcore->first_vcpuid = core * threads_per_vcore();
+       vcore->first_vcpuid = core * kvm->arch.smt_mode;
        vcore->kvm = kvm;
        INIT_LIST_HEAD(&vcore->preempt_list);
  
@@@ -1770,14 -1906,10 +1906,10 @@@ static struct kvm_vcpu *kvmppc_core_vcp
                                                   unsigned int id)
  {
        struct kvm_vcpu *vcpu;
-       int err = -EINVAL;
+       int err;
        int core;
        struct kvmppc_vcore *vcore;
  
-       core = id / threads_per_vcore();
-       if (core >= KVM_MAX_VCORES)
-               goto out;
        err = -ENOMEM;
        vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
        if (!vcpu)
        vcpu->arch.busy_preempt = TB_NIL;
        vcpu->arch.intr_msr = MSR_SF | MSR_ME;
  
+       /*
+        * Set the default HFSCR for the guest from the host value.
+        * This value is only used on POWER9.
+        * On POWER9 DD1, TM doesn't work, so we make sure to
+        * prevent the guest from using it.
+        * On POWER9, we want to virtualize the doorbell facility, so we
+        * turn off the HFSCR bit, which causes those instructions to trap.
+        */
+       vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
+       if (!cpu_has_feature(CPU_FTR_TM))
+               vcpu->arch.hfscr &= ~HFSCR_TM;
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               vcpu->arch.hfscr &= ~HFSCR_MSGP;
        kvmppc_mmu_book3s_hv_init(vcpu);
  
        vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
        init_waitqueue_head(&vcpu->arch.cpu_run);
  
        mutex_lock(&kvm->lock);
-       vcore = kvm->arch.vcores[core];
-       if (!vcore) {
-               vcore = kvmppc_vcore_create(kvm, core);
-               kvm->arch.vcores[core] = vcore;
-               kvm->arch.online_vcores++;
+       vcore = NULL;
+       err = -EINVAL;
+       core = id / kvm->arch.smt_mode;
+       if (core < KVM_MAX_VCORES) {
+               vcore = kvm->arch.vcores[core];
+               if (!vcore) {
+                       err = -ENOMEM;
+                       vcore = kvmppc_vcore_create(kvm, core);
+                       kvm->arch.vcores[core] = vcore;
+                       kvm->arch.online_vcores++;
+               }
        }
        mutex_unlock(&kvm->lock);
  
        return ERR_PTR(err);
  }
  
+ static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
+                             unsigned long flags)
+ {
+       int err;
+       int esmt = 0;
+       if (flags)
+               return -EINVAL;
+       if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
+               return -EINVAL;
+       if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+               /*
+                * On POWER8 (or POWER7), the threading mode is "strict",
+                * so we pack smt_mode vcpus per vcore.
+                */
+               if (smt_mode > threads_per_subcore)
+                       return -EINVAL;
+       } else {
+               /*
+                * On POWER9, the threading mode is "loose",
+                * so each vcpu gets its own vcore.
+                */
+               esmt = smt_mode;
+               smt_mode = 1;
+       }
+       mutex_lock(&kvm->lock);
+       err = -EBUSY;
+       if (!kvm->arch.online_vcores) {
+               kvm->arch.smt_mode = smt_mode;
+               kvm->arch.emul_smt_mode = esmt;
+               err = 0;
+       }
+       mutex_unlock(&kvm->lock);
+       return err;
+ }
  static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
  {
        if (vpa->pinned_addr)
@@@ -1897,7 -2086,7 +2086,7 @@@ static void kvmppc_end_cede(struct kvm_
        }
  }
  
- extern void __kvmppc_vcore_entry(void);
+ extern int __kvmppc_vcore_entry(void);
  
  static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
                                   struct kvm_vcpu *vcpu)
@@@ -1962,10 -2151,6 +2151,6 @@@ static void kvmppc_release_hwthread(in
        tpaca->kvm_hstate.kvm_split_mode = NULL;
  }
  
- static void do_nothing(void *x)
- {
- }
  static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
  {
        int i;
                        smp_call_function_single(cpu + i, do_nothing, NULL, 1);
  }
  
+ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
+ {
+       struct kvm *kvm = vcpu->kvm;
+       /*
+        * With radix, the guest can do TLB invalidations itself,
+        * and it could choose to use the local form (tlbiel) if
+        * it is invalidating a translation that has only ever been
+        * used on one vcpu.  However, that doesn't mean it has
+        * only ever been used on one physical cpu, since vcpus
+        * can move around between pcpus.  To cope with this, when
+        * a vcpu moves from one pcpu to another, we need to tell
+        * any vcpus running on the same core as this vcpu previously
+        * ran to flush the TLB.  The TLB is shared between threads,
+        * so we use a single bit in .need_tlb_flush for all 4 threads.
+        */
+       if (vcpu->arch.prev_cpu != pcpu) {
+               if (vcpu->arch.prev_cpu >= 0 &&
+                   cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
+                   cpu_first_thread_sibling(pcpu))
+                       radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
+               vcpu->arch.prev_cpu = pcpu;
+       }
+ }
  static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
  {
        int cpu;
        struct paca_struct *tpaca;
-       struct kvmppc_vcore *mvc = vc->master_vcore;
        struct kvm *kvm = vc->kvm;
  
        cpu = vc->pcpu;
                        vcpu->arch.timer_running = 0;
                }
                cpu += vcpu->arch.ptid;
-               vcpu->cpu = mvc->pcpu;
+               vcpu->cpu = vc->pcpu;
                vcpu->arch.thread_cpu = cpu;
-               /*
-                * With radix, the guest can do TLB invalidations itself,
-                * and it could choose to use the local form (tlbiel) if
-                * it is invalidating a translation that has only ever been
-                * used on one vcpu.  However, that doesn't mean it has
-                * only ever been used on one physical cpu, since vcpus
-                * can move around between pcpus.  To cope with this, when
-                * a vcpu moves from one pcpu to another, we need to tell
-                * any vcpus running on the same core as this vcpu previously
-                * ran to flush the TLB.  The TLB is shared between threads,
-                * so we use a single bit in .need_tlb_flush for all 4 threads.
-                */
-               if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
-                       if (vcpu->arch.prev_cpu >= 0 &&
-                           cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
-                           cpu_first_thread_sibling(cpu))
-                               radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
-                       vcpu->arch.prev_cpu = cpu;
-               }
                cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
        }
        tpaca = &paca[cpu];
        tpaca->kvm_hstate.kvm_vcpu = vcpu;
-       tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
+       tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
        /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
        smp_wmb();
-       tpaca->kvm_hstate.kvm_vcore = mvc;
+       tpaca->kvm_hstate.kvm_vcore = vc;
        if (cpu != smp_processor_id())
                kvmppc_ipi_thread(cpu);
  }
@@@ -2155,8 -2344,7 +2344,7 @@@ struct core_info 
        int             max_subcore_threads;
        int             total_threads;
        int             subcore_threads[MAX_SUBCORES];
-       struct kvm      *subcore_vm[MAX_SUBCORES];
-       struct list_head vcs[MAX_SUBCORES];
+       struct kvmppc_vcore *vc[MAX_SUBCORES];
  };
  
  /*
@@@ -2167,17 -2355,12 +2355,12 @@@ static int subcore_thread_map[MAX_SUBCO
  
  static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
  {
-       int sub;
        memset(cip, 0, sizeof(*cip));
        cip->n_subcores = 1;
        cip->max_subcore_threads = vc->num_threads;
        cip->total_threads = vc->num_threads;
        cip->subcore_threads[0] = vc->num_threads;
-       cip->subcore_vm[0] = vc->kvm;
-       for (sub = 0; sub < MAX_SUBCORES; ++sub)
-               INIT_LIST_HEAD(&cip->vcs[sub]);
-       list_add_tail(&vc->preempt_list, &cip->vcs[0]);
+       cip->vc[0] = vc;
  }
  
  static bool subcore_config_ok(int n_subcores, int n_threads)
        return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
  }
  
- static void init_master_vcore(struct kvmppc_vcore *vc)
+ static void init_vcore_to_run(struct kvmppc_vcore *vc)
  {
-       vc->master_vcore = vc;
        vc->entry_exit_map = 0;
        vc->in_guest = 0;
        vc->napping_threads = 0;
@@@ -2224,9 -2406,9 +2406,9 @@@ static bool can_dynamic_split(struct kv
        ++cip->n_subcores;
        cip->total_threads += vc->num_threads;
        cip->subcore_threads[sub] = vc->num_threads;
-       cip->subcore_vm[sub] = vc->kvm;
-       init_master_vcore(vc);
-       list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
+       cip->vc[sub] = vc;
+       init_vcore_to_run(vc);
+       list_del_init(&vc->preempt_list);
  
        return true;
  }
@@@ -2294,6 -2476,18 +2476,18 @@@ static void collect_piggybacks(struct c
        spin_unlock(&lp->lock);
  }
  
+ static bool recheck_signals(struct core_info *cip)
+ {
+       int sub, i;
+       struct kvm_vcpu *vcpu;
+       for (sub = 0; sub < cip->n_subcores; ++sub)
+               for_each_runnable_thread(i, vcpu, cip->vc[sub])
+                       if (signal_pending(vcpu->arch.run_task))
+                               return true;
+       return false;
+ }
  static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
  {
        int still_running = 0, i;
                        wake_up(&vcpu->arch.cpu_run);
                }
        }
-       list_del_init(&vc->preempt_list);
        if (!is_master) {
                if (still_running > 0) {
                        kvmppc_vcore_preempt(vc);
@@@ -2393,6 -2586,21 +2586,21 @@@ static inline int kvmppc_set_host_core(
        return 0;
  }
  
+ static void set_irq_happened(int trap)
+ {
+       switch (trap) {
+       case BOOK3S_INTERRUPT_EXTERNAL:
+               local_paca->irq_happened |= PACA_IRQ_EE;
+               break;
+       case BOOK3S_INTERRUPT_H_DOORBELL:
+               local_paca->irq_happened |= PACA_IRQ_DBELL;
+               break;
+       case BOOK3S_INTERRUPT_HMI:
+               local_paca->irq_happened |= PACA_IRQ_HMI;
+               break;
+       }
+ }
  /*
   * Run a set of guest threads on a physical core.
   * Called with vc->lock held.
@@@ -2403,7 -2611,7 +2611,7 @@@ static noinline void kvmppc_run_core(st
        int i;
        int srcu_idx;
        struct core_info core_info;
-       struct kvmppc_vcore *pvc, *vcnext;
+       struct kvmppc_vcore *pvc;
        struct kvm_split_mode split_info, *sip;
        int split, subcore_size, active;
        int sub;
        int pcpu, thr;
        int target_threads;
        int controlled_threads;
+       int trap;
  
        /*
         * Remove from the list any threads that have a signal pending
        /*
         * Initialize *vc.
         */
-       init_master_vcore(vc);
+       init_vcore_to_run(vc);
        vc->preempt_tb = TB_NIL;
  
        /*
        if (vc->num_threads < target_threads)
                collect_piggybacks(&core_info, target_threads);
  
+       /*
+        * On radix, arrange for TLB flushing if necessary.
+        * This has to be done before disabling interrupts since
+        * it uses smp_call_function().
+        */
+       pcpu = smp_processor_id();
+       if (kvm_is_radix(vc->kvm)) {
+               for (sub = 0; sub < core_info.n_subcores; ++sub)
+                       for_each_runnable_thread(i, vcpu, core_info.vc[sub])
+                               kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+       }
+       /*
+        * Hard-disable interrupts, and check resched flag and signals.
+        * If we need to reschedule or deliver a signal, clean up
+        * and return without going into the guest(s).
+        */
+       local_irq_disable();
+       hard_irq_disable();
+       if (lazy_irq_pending() || need_resched() ||
+           recheck_signals(&core_info)) {
+               local_irq_enable();
+               vc->vcore_state = VCORE_INACTIVE;
+               /* Unlock all except the primary vcore */
+               for (sub = 1; sub < core_info.n_subcores; ++sub) {
+                       pvc = core_info.vc[sub];
+                       /* Put back on to the preempted vcores list */
+                       kvmppc_vcore_preempt(pvc);
+                       spin_unlock(&pvc->lock);
+               }
+               for (i = 0; i < controlled_threads; ++i)
+                       kvmppc_release_hwthread(pcpu + i);
+               return;
+       }
+       kvmppc_clear_host_core(pcpu);
        /* Decide on micro-threading (split-core) mode */
        subcore_size = threads_per_subcore;
        cmd_bit = stat_bit = 0;
                split_info.ldbar = mfspr(SPRN_LDBAR);
                split_info.subcore_size = subcore_size;
                for (sub = 0; sub < core_info.n_subcores; ++sub)
-                       split_info.master_vcs[sub] =
-                               list_first_entry(&core_info.vcs[sub],
-                                       struct kvmppc_vcore, preempt_list);
+                       split_info.vc[sub] = core_info.vc[sub];
                /* order writes to split_info before kvm_split_mode pointer */
                smp_wmb();
        }
-       pcpu = smp_processor_id();
        for (thr = 0; thr < controlled_threads; ++thr)
                paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
  
                }
        }
  
-       kvmppc_clear_host_core(pcpu);
        /* Start all the threads */
        active = 0;
        for (sub = 0; sub < core_info.n_subcores; ++sub) {
                thr = subcore_thread_map[sub];
                thr0_done = false;
                active |= 1 << thr;
-               list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
-                       pvc->pcpu = pcpu + thr;
-                       for_each_runnable_thread(i, vcpu, pvc) {
-                               kvmppc_start_thread(vcpu, pvc);
-                               kvmppc_create_dtl_entry(vcpu, pvc);
-                               trace_kvm_guest_enter(vcpu);
-                               if (!vcpu->arch.ptid)
-                                       thr0_done = true;
-                               active |= 1 << (thr + vcpu->arch.ptid);
-                       }
-                       /*
-                        * We need to start the first thread of each subcore
-                        * even if it doesn't have a vcpu.
-                        */
-                       if (pvc->master_vcore == pvc && !thr0_done)
-                               kvmppc_start_thread(NULL, pvc);
-                       thr += pvc->num_threads;
+               pvc = core_info.vc[sub];
+               pvc->pcpu = pcpu + thr;
+               for_each_runnable_thread(i, vcpu, pvc) {
+                       kvmppc_start_thread(vcpu, pvc);
+                       kvmppc_create_dtl_entry(vcpu, pvc);
+                       trace_kvm_guest_enter(vcpu);
+                       if (!vcpu->arch.ptid)
+                               thr0_done = true;
+                       active |= 1 << (thr + vcpu->arch.ptid);
                }
+               /*
+                * We need to start the first thread of each subcore
+                * even if it doesn't have a vcpu.
+                */
+               if (!thr0_done)
+                       kvmppc_start_thread(NULL, pvc);
+               thr += pvc->num_threads;
        }
  
        /*
        trace_kvmppc_run_core(vc, 0);
  
        for (sub = 0; sub < core_info.n_subcores; ++sub)
-               list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
-                       spin_unlock(&pvc->lock);
+               spin_unlock(&core_info.vc[sub]->lock);
+       /*
+        * Interrupts will be enabled once we get into the guest,
+        * so tell lockdep that we're about to enable interrupts.
+        */
+       trace_hardirqs_on();
  
        guest_enter();
  
        srcu_idx = srcu_read_lock(&vc->kvm->srcu);
  
-       __kvmppc_vcore_entry();
+       trap = __kvmppc_vcore_entry();
  
        srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
  
+       guest_exit();
+       trace_hardirqs_off();
+       set_irq_happened(trap);
        spin_lock(&vc->lock);
        /* prevent other vcpu threads from doing kvmppc_start_thread() now */
        vc->vcore_state = VCORE_EXITING;
                split_info.do_nap = 0;
        }
  
+       kvmppc_set_host_core(pcpu);
+       local_irq_enable();
        /* Let secondaries go back to the offline loop */
        for (i = 0; i < controlled_threads; ++i) {
                kvmppc_release_hwthread(pcpu + i);
                cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
        }
  
-       kvmppc_set_host_core(pcpu);
        spin_unlock(&vc->lock);
  
        /* make sure updates to secondary vcpu structs are visible now */
        smp_mb();
-       guest_exit();
  
-       for (sub = 0; sub < core_info.n_subcores; ++sub)
-               list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
-                                        preempt_list)
-                       post_guest_process(pvc, pvc == vc);
+       for (sub = 0; sub < core_info.n_subcores; ++sub) {
+               pvc = core_info.vc[sub];
+               post_guest_process(pvc, pvc == vc);
+       }
  
        spin_lock(&vc->lock);
        preempt_enable();
@@@ -2666,6 -2917,30 +2917,30 @@@ static void shrink_halt_poll_ns(struct 
                vc->halt_poll_ns /= halt_poll_ns_shrink;
  }
  
+ #ifdef CONFIG_KVM_XICS
+ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+ {
+       if (!xive_enabled())
+               return false;
+       return vcpu->arch.xive_saved_state.pipr <
+               vcpu->arch.xive_saved_state.cppr;
+ }
+ #else
+ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+ {
+       return false;
+ }
+ #endif /* CONFIG_KVM_XICS */
+ static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
+ {
+       if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
+           kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
+               return true;
+       return false;
+ }
  /*
   * Check to see if any of the runnable vcpus on the vcore have pending
   * exceptions or are no longer ceded
@@@ -2676,8 -2951,7 +2951,7 @@@ static int kvmppc_vcore_check_block(str
        int i;
  
        for_each_runnable_thread(i, vcpu, vc) {
-               if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded ||
-                   vcpu->arch.prodded)
+               if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
                        return 1;
        }
  
@@@ -2819,15 -3093,14 +3093,14 @@@ static int kvmppc_run_vcpu(struct kvm_r
         */
        if (!signal_pending(current)) {
                if (vc->vcore_state == VCORE_PIGGYBACK) {
-                       struct kvmppc_vcore *mvc = vc->master_vcore;
-                       if (spin_trylock(&mvc->lock)) {
-                               if (mvc->vcore_state == VCORE_RUNNING &&
-                                   !VCORE_IS_EXITING(mvc)) {
+                       if (spin_trylock(&vc->lock)) {
+                               if (vc->vcore_state == VCORE_RUNNING &&
+                                   !VCORE_IS_EXITING(vc)) {
                                        kvmppc_create_dtl_entry(vcpu, vc);
                                        kvmppc_start_thread(vcpu, vc);
                                        trace_kvm_guest_enter(vcpu);
                                }
-                               spin_unlock(&mvc->lock);
+                               spin_unlock(&vc->lock);
                        }
                } else if (vc->vcore_state == VCORE_RUNNING &&
                           !VCORE_IS_EXITING(vc)) {
                        break;
                n_ceded = 0;
                for_each_runnable_thread(i, v, vc) {
-                       if (!v->arch.pending_exceptions && !v->arch.prodded)
+                       if (!kvmppc_vcpu_woken(v))
                                n_ceded += v->arch.ceded;
                        else
                                v->arch.ceded = 0;
@@@ -3368,7 -3641,7 +3641,7 @@@ void kvmppc_alloc_host_rm_ops(void
                return;
        }
  
 -      get_online_cpus();
 +      cpus_read_lock();
  
        for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
                if (!cpu_online(cpu))
        l_ops = (unsigned long) ops;
  
        if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
 -              put_online_cpus();
 +              cpus_read_unlock();
                kfree(ops->rm_core);
                kfree(ops);
                return;
        }
  
 -      cpuhp_setup_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE,
 -                                "ppc/kvm_book3s:prepare",
 -                                kvmppc_set_host_core,
 -                                kvmppc_clear_host_core);
 -      put_online_cpus();
 +      cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
 +                                           "ppc/kvm_book3s:prepare",
 +                                           kvmppc_set_host_core,
 +                                           kvmppc_clear_host_core);
 +      cpus_read_unlock();
  }
  
  void kvmppc_free_host_rm_ops(void)
@@@ -3518,6 -3791,19 +3791,19 @@@ static int kvmppc_core_init_vm_hv(struc
        if (!cpu_has_feature(CPU_FTR_ARCH_300))
                kvm_hv_vm_activated();
  
+       /*
+        * Initialize smt_mode depending on processor.
+        * POWER8 and earlier have to use "strict" threading, where
+        * all vCPUs in a vcore have to run on the same (sub)core,
+        * whereas on POWER9 the threads can each run a different
+        * guest.
+        */
+       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+               kvm->arch.smt_mode = threads_per_subcore;
+       else
+               kvm->arch.smt_mode = 1;
+       kvm->arch.emul_smt_mode = 1;
        /*
         * Create a debugfs directory for the VM
         */
@@@ -3947,6 -4233,7 +4233,7 @@@ static struct kvmppc_ops kvm_ops_hv = 
  #endif
        .configure_mmu = kvmhv_configure_mmu,
        .get_rmmu_info = kvmhv_get_rmmu_info,
+       .set_smt_mode = kvmhv_set_smt_mode,
  };
  
  static int kvm_init_subcore_bitmap(void)
index 6baae236f461d97b02c7d00b2dd9c57dafe75e30,495aedbaf44757018b7bb4d217b6d94f563c3910..a409d59919344a277fac0858c2bd12ece1bd83c9
  #define KVM_HALT_POLL_NS_DEFAULT 80000
  
  /* s390-specific vcpu->requests bit members */
- #define KVM_REQ_ENABLE_IBS         8
- #define KVM_REQ_DISABLE_IBS        9
- #define KVM_REQ_ICPT_OPEREXC       10
+ #define KVM_REQ_ENABLE_IBS    KVM_ARCH_REQ(0)
+ #define KVM_REQ_DISABLE_IBS   KVM_ARCH_REQ(1)
+ #define KVM_REQ_ICPT_OPEREXC  KVM_ARCH_REQ(2)
+ #define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3)
+ #define KVM_REQ_STOP_MIGRATION  KVM_ARCH_REQ(4)
  
  #define SIGP_CTRL_C           0x80
  #define SIGP_CTRL_SCN_MASK    0x3f
@@@ -56,7 -58,7 +58,7 @@@ union bsca_sigp_ctrl 
                __u8 r : 1;
                __u8 scn : 6;
        };
- } __packed;
+ };
  
  union esca_sigp_ctrl {
        __u16 value;
                __u8 reserved: 7;
                __u8 scn;
        };
- } __packed;
+ };
  
  struct esca_entry {
        union esca_sigp_ctrl sigp_ctrl;
        __u16   reserved1[3];
        __u64   sda;
        __u64   reserved2[6];
- } __packed;
+ };
  
  struct bsca_entry {
        __u8    reserved0;
@@@ -80,7 -82,7 +82,7 @@@
        __u16   reserved[3];
        __u64   sda;
        __u64   reserved2[2];
- } __attribute__((packed));
+ };
  
  union ipte_control {
        unsigned long val;
@@@ -97,7 -99,7 +99,7 @@@ struct bsca_block 
        __u64   mcn;
        __u64   reserved2;
        struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
- } __attribute__((packed));
+ };
  
  struct esca_block {
        union ipte_control ipte_control;
        __u64   mcn[4];
        __u64   reserved2[20];
        struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
- } __packed;
+ };
  
  /*
   * This struct is used to store some machine check info from lowcore
@@@ -274,7 -276,7 +276,7 @@@ struct kvm_s390_sie_block 
  
  struct kvm_s390_itdb {
        __u8    data[256];
- } __packed;
+ };
  
  struct sie_page {
        struct kvm_s390_sie_block sie_block;
        __u8 reserved218[1000];         /* 0x0218 */
        struct kvm_s390_itdb itdb;      /* 0x0600 */
        __u8 reserved700[2304];         /* 0x0700 */
- } __packed;
+ };
  
  struct kvm_vcpu_stat {
        u64 exit_userspace;
@@@ -556,6 -558,7 +558,6 @@@ struct kvm_s390_float_interrupt 
        struct mutex ais_lock;
        u8 simm;
        u8 nimm;
 -      int ais_enabled;
  };
  
  struct kvm_hw_wp_info_arch {
@@@ -695,7 -698,7 +697,7 @@@ struct sie_page2 
        __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64];    /* 0x0000 */
        struct kvm_s390_crypto_cb crycb;                /* 0x0800 */
        u8 reserved900[0x1000 - 0x900];                 /* 0x0900 */
- } __packed;
+ };
  
  struct kvm_s390_vsie {
        struct mutex mutex;
        struct page *pages[KVM_MAX_VCPUS];
  };
  
+ struct kvm_s390_migration_state {
+       unsigned long bitmap_size;      /* in bits (number of guest pages) */
+       atomic64_t dirty_pages;         /* number of dirty pages */
+       unsigned long *pgste_bitmap;
+ };
  struct kvm_arch{
        void *sca;
        int use_esca;
        struct kvm_s390_crypto crypto;
        struct kvm_s390_vsie vsie;
        u64 epoch;
+       struct kvm_s390_migration_state *migration_state;
        /* subset of available cpu features enabled by user space */
        DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
  };
diff --combined arch/s390/kvm/gaccess.c
index 875f8bea8c670dc83f626349d6a35cc17bd0cf8a,17e3a4e71bc90e74c3624c3f0370df56b6bbff53..653cae5e1ee1f97b6de07f665cf2b90ba8619d8e
@@@ -89,7 -89,7 +89,7 @@@ struct region3_table_entry_fc1 
        unsigned long f  : 1; /* Fetch-Protection Bit */
        unsigned long fc : 1; /* Format-Control */
        unsigned long p  : 1; /* DAT-Protection Bit */
-       unsigned long co : 1; /* Change-Recording Override */
+       unsigned long iep: 1; /* Instruction-Execution-Protection */
        unsigned long    : 2;
        unsigned long i  : 1; /* Region-Invalid Bit */
        unsigned long cr : 1; /* Common-Region Bit */
@@@ -131,7 -131,7 +131,7 @@@ struct segment_entry_fc1 
        unsigned long f  : 1; /* Fetch-Protection Bit */
        unsigned long fc : 1; /* Format-Control */
        unsigned long p  : 1; /* DAT-Protection Bit */
-       unsigned long co : 1; /* Change-Recording Override */
+       unsigned long iep: 1; /* Instruction-Execution-Protection */
        unsigned long    : 2;
        unsigned long i  : 1; /* Segment-Invalid Bit */
        unsigned long cs : 1; /* Common-Segment Bit */
@@@ -168,7 -168,8 +168,8 @@@ union page_table_entry 
                unsigned long z  : 1; /* Zero Bit */
                unsigned long i  : 1; /* Page-Invalid Bit */
                unsigned long p  : 1; /* DAT-Protection Bit */
-               unsigned long    : 9;
+               unsigned long iep: 1; /* Instruction-Execution-Protection */
+               unsigned long    : 8;
        };
  };
  
@@@ -241,7 -242,7 +242,7 @@@ struct ale 
        unsigned long asteo  : 25; /* ASN-Second-Table-Entry Origin */
        unsigned long        : 6;
        unsigned long astesn : 32; /* ASTE Sequence Number */
- } __packed;
+ };
  
  struct aste {
        unsigned long i      : 1; /* ASX-Invalid Bit */
        unsigned long ald    : 32;
        unsigned long astesn : 32;
        /* .. more fields there */
- } __packed;
+ };
  
  int ipte_lock_held(struct kvm_vcpu *vcpu)
  {
@@@ -485,6 -486,7 +486,7 @@@ enum prot_type 
        PROT_TYPE_KEYC = 1,
        PROT_TYPE_ALC  = 2,
        PROT_TYPE_DAT  = 3,
+       PROT_TYPE_IEP  = 4,
  };
  
  static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
        switch (code) {
        case PGM_PROTECTION:
                switch (prot) {
+               case PROT_TYPE_IEP:
+                       tec->b61 = 1;
+                       /* FALL THROUGH */
                case PROT_TYPE_LA:
                        tec->b56 = 1;
                        break;
@@@ -551,26 -556,26 +556,26 @@@ static int get_vcpu_asce(struct kvm_vcp
        int rc;
        struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
  
 -      if (!psw.t) {
 +      if (!psw.dat) {
                asce->val = 0;
                asce->r = 1;
                return 0;
        }
  
 -      if (mode == GACC_IFETCH)
 -              psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY;
 +      if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME))
 +              psw.as = PSW_BITS_AS_PRIMARY;
  
        switch (psw.as) {
 -      case PSW_AS_PRIMARY:
 +      case PSW_BITS_AS_PRIMARY:
                asce->val = vcpu->arch.sie_block->gcr[1];
                return 0;
 -      case PSW_AS_SECONDARY:
 +      case PSW_BITS_AS_SECONDARY:
                asce->val = vcpu->arch.sie_block->gcr[7];
                return 0;
 -      case PSW_AS_HOME:
 +      case PSW_BITS_AS_HOME:
                asce->val = vcpu->arch.sie_block->gcr[13];
                return 0;
 -      case PSW_AS_ACCREG:
 +      case PSW_BITS_AS_ACCREG:
                rc = ar_translation(vcpu, asce, ar, mode);
                if (rc > 0)
                        return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
@@@ -591,6 -596,7 +596,7 @@@ static int deref_table(struct kvm *kvm
   * @gpa: points to where guest physical (absolute) address should be stored
   * @asce: effective asce
   * @mode: indicates the access mode to be used
+  * @prot: returns the type for protection exceptions
   *
   * Translate a guest virtual address into a guest absolute address by means
   * of dynamic address translation as specified by the architecture.
   */
  static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
                                     unsigned long *gpa, const union asce asce,
-                                    enum gacc_mode mode)
+                                    enum gacc_mode mode, enum prot_type *prot)
  {
        union vaddress vaddr = {.addr = gva};
        union raddress raddr = {.addr = gva};
        union page_table_entry pte;
        int dat_protection = 0;
+       int iep_protection = 0;
        union ctlreg0 ctlreg0;
        unsigned long ptr;
-       int edat1, edat2;
+       int edat1, edat2, iep;
  
        ctlreg0.val = vcpu->arch.sie_block->gcr[0];
        edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
        edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
+       iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
        if (asce.r)
                goto real_address;
        ptr = asce.origin * 4096;
                        return PGM_TRANSLATION_SPEC;
                if (rtte.fc && edat2) {
                        dat_protection |= rtte.fc1.p;
+                       iep_protection = rtte.fc1.iep;
                        raddr.rfaa = rtte.fc1.rfaa;
                        goto absolute_address;
                }
                        return PGM_TRANSLATION_SPEC;
                if (ste.fc && edat1) {
                        dat_protection |= ste.fc1.p;
+                       iep_protection = ste.fc1.iep;
                        raddr.sfaa = ste.fc1.sfaa;
                        goto absolute_address;
                }
        if (pte.z)
                return PGM_TRANSLATION_SPEC;
        dat_protection |= pte.p;
+       iep_protection = pte.iep;
        raddr.pfra = pte.pfra;
  real_address:
        raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
  absolute_address:
-       if (mode == GACC_STORE && dat_protection)
+       if (mode == GACC_STORE && dat_protection) {
+               *prot = PROT_TYPE_DAT;
                return PGM_PROTECTION;
+       }
+       if (mode == GACC_IFETCH && iep_protection && iep) {
+               *prot = PROT_TYPE_IEP;
+               return PGM_PROTECTION;
+       }
        if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
                return PGM_ADDRESSING;
        *gpa = raddr.addr;
@@@ -771,7 -788,7 +788,7 @@@ static int low_address_protection_enabl
  
        if (!ctlreg0.lap)
                return 0;
 -      if (psw_bits(*psw).t && asce.p)
 +      if (psw_bits(*psw).dat && asce.p)
                return 0;
        return 1;
  }
@@@ -782,6 -799,7 +799,7 @@@ static int guest_page_range(struct kvm_
  {
        psw_t *psw = &vcpu->arch.sie_block->gpsw;
        int lap_enabled, rc = 0;
+       enum prot_type prot;
  
        lap_enabled = low_address_protection_enabled(vcpu, asce);
        while (nr_pages) {
                        return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
                                         PROT_TYPE_LA);
                ga &= PAGE_MASK;
 -              if (psw_bits(*psw).t) {
 +              if (psw_bits(*psw).dat) {
-                       rc = guest_translate(vcpu, ga, pages, asce, mode);
+                       rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
                        if (rc < 0)
                                return rc;
                } else {
                                rc = PGM_ADDRESSING;
                }
                if (rc)
-                       return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT);
+                       return trans_exc(vcpu, rc, ga, ar, mode, prot);
                ga += PAGE_SIZE;
                pages++;
                nr_pages--;
@@@ -831,7 -849,7 +849,7 @@@ int access_guest(struct kvm_vcpu *vcpu
                pages = vmalloc(nr_pages * sizeof(unsigned long));
        if (!pages)
                return -ENOMEM;
 -      need_ipte_lock = psw_bits(*psw).t && !asce.r;
 +      need_ipte_lock = psw_bits(*psw).dat && !asce.r;
        if (need_ipte_lock)
                ipte_lock(vcpu);
        rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
@@@ -886,6 -904,7 +904,7 @@@ int guest_translate_address(struct kvm_
                            unsigned long *gpa, enum gacc_mode mode)
  {
        psw_t *psw = &vcpu->arch.sie_block->gpsw;
+       enum prot_type prot;
        union asce asce;
        int rc;
  
                                         mode, PROT_TYPE_LA);
        }
  
 -      if (psw_bits(*psw).t && !asce.r) {      /* Use DAT? */
 +      if (psw_bits(*psw).dat && !asce.r) {    /* Use DAT? */
-               rc = guest_translate(vcpu, gva, gpa, asce, mode);
+               rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
                if (rc > 0)
-                       return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT);
+                       return trans_exc(vcpu, rc, gva, 0, mode, prot);
        } else {
                *gpa = kvm_s390_real_to_abs(vcpu, gva);
                if (kvm_is_error_gpa(vcpu->kvm, *gpa))
@@@ -977,12 -996,11 +996,12 @@@ static int kvm_s390_shadow_tables(struc
        ptr = asce.origin * 4096;
        if (asce.r) {
                *fake = 1;
 +              ptr = 0;
                asce.dt = ASCE_TYPE_REGION1;
        }
        switch (asce.dt) {
        case ASCE_TYPE_REGION1:
 -              if (vaddr.rfx01 > asce.tl && !asce.r)
 +              if (vaddr.rfx01 > asce.tl && !*fake)
                        return PGM_REGION_FIRST_TRANS;
                break;
        case ASCE_TYPE_REGION2:
                union region1_table_entry rfte;
  
                if (*fake) {
 -                      /* offset in 16EB guest memory block */
 -                      ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
 +                      ptr += (unsigned long) vaddr.rfx << 53;
                        rfte.val = ptr;
                        goto shadow_r2t;
                }
@@@ -1036,7 -1055,8 +1055,7 @@@ shadow_r2t
                union region2_table_entry rste;
  
                if (*fake) {
 -                      /* offset in 8PB guest memory block */
 -                      ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
 +                      ptr += (unsigned long) vaddr.rsx << 42;
                        rste.val = ptr;
                        goto shadow_r3t;
                }
@@@ -1063,7 -1083,8 +1082,7 @@@ shadow_r3t
                union region3_table_entry rtte;
  
                if (*fake) {
 -                      /* offset in 4TB guest memory block */
 -                      ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
 +                      ptr += (unsigned long) vaddr.rtx << 31;
                        rtte.val = ptr;
                        goto shadow_sgt;
                }
@@@ -1099,7 -1120,8 +1118,7 @@@ shadow_sgt
                union segment_table_entry ste;
  
                if (*fake) {
 -                      /* offset in 2G guest memory block */
 -                      ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
 +                      ptr += (unsigned long) vaddr.sx << 20;
                        ste.val = ptr;
                        goto shadow_pgt;
                }
index 2d120fef7d90d915e33d7f19cb0ba39e9a6a264e,f2c78fc1852d38a1955f573c1e0b1da687ddc307..a619ddae610da2c0fabaf66a9a123d1572b51f9c
@@@ -251,8 -251,13 +251,13 @@@ static unsigned long deliverable_irqs(s
                __clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
        if (psw_mchk_disabled(vcpu))
                active_mask &= ~IRQ_PEND_MCHK_MASK;
+       /*
+        * Check both floating and local interrupt's cr14 because
+        * bit IRQ_PEND_MCHK_REP could be set in both cases.
+        */
        if (!(vcpu->arch.sie_block->gcr[14] &
-             vcpu->kvm->arch.float_int.mchk.cr14))
+          (vcpu->kvm->arch.float_int.mchk.cr14 |
+          vcpu->arch.local_int.irq.mchk.cr14)))
                __clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
  
        /*
        return ret < 0 ? ret : n;
  }
  
+ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr)
+ {
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+       struct kvm_s390_ais_all ais;
+       if (attr->attr < sizeof(ais))
+               return -EINVAL;
+       if (!test_kvm_facility(kvm, 72))
+               return -ENOTSUPP;
+       mutex_lock(&fi->ais_lock);
+       ais.simm = fi->simm;
+       ais.nimm = fi->nimm;
+       mutex_unlock(&fi->ais_lock);
+       if (copy_to_user((void __user *)attr->addr, &ais, sizeof(ais)))
+               return -EFAULT;
+       return 0;
+ }
  static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
  {
        int r;
                r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
                                          attr->attr);
                break;
+       case KVM_DEV_FLIC_AISM_ALL:
+               r = flic_ais_mode_get_all(dev->kvm, attr);
+               break;
        default:
                r = -EINVAL;
        }
@@@ -2160,7 -2190,7 +2190,7 @@@ static int modify_ais_mode(struct kvm *
        struct kvm_s390_ais_req req;
        int ret = 0;
  
 -      if (!fi->ais_enabled)
 +      if (!test_kvm_facility(kvm, 72))
                return -ENOTSUPP;
  
        if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
@@@ -2204,7 -2234,7 +2234,7 @@@ static int kvm_s390_inject_airq(struct 
        };
        int ret = 0;
  
 -      if (!fi->ais_enabled || !adapter->suppressible)
 +      if (!test_kvm_facility(kvm, 72) || !adapter->suppressible)
                return kvm_s390_inject_vm(kvm, &s390int);
  
        mutex_lock(&fi->ais_lock);
@@@ -2235,6 -2265,25 +2265,25 @@@ static int flic_inject_airq(struct kvm 
        return kvm_s390_inject_airq(kvm, adapter);
  }
  
+ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
+ {
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+       struct kvm_s390_ais_all ais;
+       if (!test_kvm_facility(kvm, 72))
+               return -ENOTSUPP;
+       if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais)))
+               return -EFAULT;
+       mutex_lock(&fi->ais_lock);
+       fi->simm = ais.simm;
+       fi->nimm = ais.nimm;
+       mutex_unlock(&fi->ais_lock);
+       return 0;
+ }
  static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
  {
        int r = 0;
        case KVM_DEV_FLIC_AIRQ_INJECT:
                r = flic_inject_airq(dev->kvm, attr);
                break;
+       case KVM_DEV_FLIC_AISM_ALL:
+               r = flic_ais_mode_set_all(dev->kvm, attr);
+               break;
        default:
                r = -EINVAL;
        }
@@@ -2298,6 -2350,7 +2350,7 @@@ static int flic_has_attr(struct kvm_dev
        case KVM_DEV_FLIC_CLEAR_IO_IRQ:
        case KVM_DEV_FLIC_AISM:
        case KVM_DEV_FLIC_AIRQ_INJECT:
+       case KVM_DEV_FLIC_AISM_ALL:
                return 0;
        }
        return -ENXIO;
@@@ -2415,6 -2468,42 +2468,42 @@@ static int set_adapter_int(struct kvm_k
        return ret;
  }
  
+ /*
+  * Inject the machine check to the guest.
+  */
+ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
+                                    struct mcck_volatile_info *mcck_info)
+ {
+       struct kvm_s390_interrupt_info inti;
+       struct kvm_s390_irq irq;
+       struct kvm_s390_mchk_info *mchk;
+       union mci mci;
+       __u64 cr14 = 0;         /* upper bits are not used */
+       mci.val = mcck_info->mcic;
+       if (mci.sr)
+               cr14 |= MCCK_CR14_RECOVERY_SUB_MASK;
+       if (mci.dg)
+               cr14 |= MCCK_CR14_DEGRAD_SUB_MASK;
+       if (mci.w)
+               cr14 |= MCCK_CR14_WARN_SUB_MASK;
+       mchk = mci.ck ? &inti.mchk : &irq.u.mchk;
+       mchk->cr14 = cr14;
+       mchk->mcic = mcck_info->mcic;
+       mchk->ext_damage_code = mcck_info->ext_damage_code;
+       mchk->failing_storage_address = mcck_info->failing_storage_address;
+       if (mci.ck) {
+               /* Inject the floating machine check */
+               inti.type = KVM_S390_MCHK;
+               WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti));
+       } else {
+               /* Inject the machine check to specified vcpu */
+               irq.type = KVM_S390_MCHK;
+               WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
+       }
+ }
  int kvm_set_routing_entry(struct kvm *kvm,
                          struct kvm_kernel_irq_routing_entry *e,
                          const struct kvm_irq_routing_entry *ue)
diff --combined arch/s390/kvm/kvm-s390.c
index b0d7de5a533dcc50249575ff80a1a67ddec077db,ef6419654c162277dc761890424f27612a1d7187..3f2884e99ed4ce461cdb6f08148968880e90747b
@@@ -30,6 -30,7 +30,7 @@@
  #include <linux/vmalloc.h>
  #include <linux/bitmap.h>
  #include <linux/sched/signal.h>
+ #include <linux/string.h>
  
  #include <asm/asm-offsets.h>
  #include <asm/lowcore.h>
@@@ -386,6 -387,7 +387,7 @@@ int kvm_vm_ioctl_check_extension(struc
        case KVM_CAP_S390_SKEYS:
        case KVM_CAP_S390_IRQ_STATE:
        case KVM_CAP_S390_USER_INSTR0:
+       case KVM_CAP_S390_CMMA_MIGRATION:
        case KVM_CAP_S390_AIS:
                r = 1;
                break;
@@@ -558,6 -560,7 +560,6 @@@ static int kvm_vm_ioctl_enable_cap(stru
                } else {
                        set_kvm_facility(kvm->arch.model.fac_mask, 72);
                        set_kvm_facility(kvm->arch.model.fac_list, 72);
 -                      kvm->arch.float_int.ais_enabled = 1;
                        r = 0;
                }
                mutex_unlock(&kvm->lock);
@@@ -749,6 -752,129 +751,129 @@@ static int kvm_s390_vm_set_crypto(struc
        return 0;
  }
  
+ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
+ {
+       int cx;
+       struct kvm_vcpu *vcpu;
+       kvm_for_each_vcpu(cx, vcpu, kvm)
+               kvm_s390_sync_request(req, vcpu);
+ }
+ /*
+  * Must be called with kvm->srcu held to avoid races on memslots, and with
+  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
+  */
+ static int kvm_s390_vm_start_migration(struct kvm *kvm)
+ {
+       struct kvm_s390_migration_state *mgs;
+       struct kvm_memory_slot *ms;
+       /* should be the only one */
+       struct kvm_memslots *slots;
+       unsigned long ram_pages;
+       int slotnr;
+       /* migration mode already enabled */
+       if (kvm->arch.migration_state)
+               return 0;
+       slots = kvm_memslots(kvm);
+       if (!slots || !slots->used_slots)
+               return -EINVAL;
+       mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
+       if (!mgs)
+               return -ENOMEM;
+       kvm->arch.migration_state = mgs;
+       if (kvm->arch.use_cmma) {
+               /*
+                * Get the last slot. They should be sorted by base_gfn, so the
+                * last slot is also the one at the end of the address space.
+                * We have verified above that at least one slot is present.
+                */
+               ms = slots->memslots + slots->used_slots - 1;
+               /* round up so we only use full longs */
+               ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
+               /* allocate enough bytes to store all the bits */
+               mgs->pgste_bitmap = vmalloc(ram_pages / 8);
+               if (!mgs->pgste_bitmap) {
+                       kfree(mgs);
+                       kvm->arch.migration_state = NULL;
+                       return -ENOMEM;
+               }
+               mgs->bitmap_size = ram_pages;
+               atomic64_set(&mgs->dirty_pages, ram_pages);
+               /* mark all the pages in active slots as dirty */
+               for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
+                       ms = slots->memslots + slotnr;
+                       bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
+               }
+               kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
+       }
+       return 0;
+ }
+ /*
+  * Must be called with kvm->lock to avoid races with ourselves and
+  * kvm_s390_vm_start_migration.
+  */
+ static int kvm_s390_vm_stop_migration(struct kvm *kvm)
+ {
+       struct kvm_s390_migration_state *mgs;
+       /* migration mode already disabled */
+       if (!kvm->arch.migration_state)
+               return 0;
+       mgs = kvm->arch.migration_state;
+       kvm->arch.migration_state = NULL;
+       if (kvm->arch.use_cmma) {
+               kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
+               vfree(mgs->pgste_bitmap);
+       }
+       kfree(mgs);
+       return 0;
+ }
+ static int kvm_s390_vm_set_migration(struct kvm *kvm,
+                                    struct kvm_device_attr *attr)
+ {
+       int idx, res = -ENXIO;
+       mutex_lock(&kvm->lock);
+       switch (attr->attr) {
+       case KVM_S390_VM_MIGRATION_START:
+               idx = srcu_read_lock(&kvm->srcu);
+               res = kvm_s390_vm_start_migration(kvm);
+               srcu_read_unlock(&kvm->srcu, idx);
+               break;
+       case KVM_S390_VM_MIGRATION_STOP:
+               res = kvm_s390_vm_stop_migration(kvm);
+               break;
+       default:
+               break;
+       }
+       mutex_unlock(&kvm->lock);
+       return res;
+ }
+ static int kvm_s390_vm_get_migration(struct kvm *kvm,
+                                    struct kvm_device_attr *attr)
+ {
+       u64 mig = (kvm->arch.migration_state != NULL);
+       if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
+               return -ENXIO;
+       if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
+               return -EFAULT;
+       return 0;
+ }
  static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
  {
        u8 gtod_high;
@@@ -1089,6 -1215,9 +1214,9 @@@ static int kvm_s390_vm_set_attr(struct 
        case KVM_S390_VM_CRYPTO:
                ret = kvm_s390_vm_set_crypto(kvm, attr);
                break;
+       case KVM_S390_VM_MIGRATION:
+               ret = kvm_s390_vm_set_migration(kvm, attr);
+               break;
        default:
                ret = -ENXIO;
                break;
@@@ -1111,6 -1240,9 +1239,9 @@@ static int kvm_s390_vm_get_attr(struct 
        case KVM_S390_VM_CPU_MODEL:
                ret = kvm_s390_get_cpu_model(kvm, attr);
                break;
+       case KVM_S390_VM_MIGRATION:
+               ret = kvm_s390_vm_get_migration(kvm, attr);
+               break;
        default:
                ret = -ENXIO;
                break;
@@@ -1178,6 -1310,9 +1309,9 @@@ static int kvm_s390_vm_has_attr(struct 
                        break;
                }
                break;
+       case KVM_S390_VM_MIGRATION:
+               ret = 0;
+               break;
        default:
                ret = -ENXIO;
                break;
        return r;
  }
  
+ /*
+  * Base address and length must be sent at the start of each block, therefore
+  * it's cheaper to send some clean data, as long as it's less than the size of
+  * two longs.
+  */
+ #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
+ /* for consistency */
+ #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
+ /*
+  * This function searches for the next page with dirty CMMA attributes, and
+  * saves the attributes in the buffer up to either the end of the buffer or
+  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
+  * no trailing clean bytes are saved.
+  * In case no dirty bits were found, or if CMMA was not enabled or used, the
+  * output buffer will indicate 0 as length.
+  */
+ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
+                                 struct kvm_s390_cmma_log *args)
+ {
+       struct kvm_s390_migration_state *s = kvm->arch.migration_state;
+       unsigned long bufsize, hva, pgstev, i, next, cur;
+       int srcu_idx, peek, r = 0, rr;
+       u8 *res;
+       cur = args->start_gfn;
+       i = next = pgstev = 0;
+       if (unlikely(!kvm->arch.use_cmma))
+               return -ENXIO;
+       /* Invalid/unsupported flags were specified */
+       if (args->flags & ~KVM_S390_CMMA_PEEK)
+               return -EINVAL;
+       /* Migration mode query, and we are not doing a migration */
+       peek = !!(args->flags & KVM_S390_CMMA_PEEK);
+       if (!peek && !s)
+               return -EINVAL;
+       /* CMMA is disabled or was not used, or the buffer has length zero */
+       bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
+       if (!bufsize || !kvm->mm->context.use_cmma) {
+               memset(args, 0, sizeof(*args));
+               return 0;
+       }
+       if (!peek) {
+               /* We are not peeking, and there are no dirty pages */
+               if (!atomic64_read(&s->dirty_pages)) {
+                       memset(args, 0, sizeof(*args));
+                       return 0;
+               }
+               cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
+                                   args->start_gfn);
+               if (cur >= s->bitmap_size)      /* nothing found, loop back */
+                       cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
+               if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
+                       memset(args, 0, sizeof(*args));
+                       return 0;
+               }
+               next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
+       }
+       res = vmalloc(bufsize);
+       if (!res)
+               return -ENOMEM;
+       args->start_gfn = cur;
+       down_read(&kvm->mm->mmap_sem);
+       srcu_idx = srcu_read_lock(&kvm->srcu);
+       while (i < bufsize) {
+               hva = gfn_to_hva(kvm, cur);
+               if (kvm_is_error_hva(hva)) {
+                       r = -EFAULT;
+                       break;
+               }
+               /* decrement only if we actually flipped the bit to 0 */
+               if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
+                       atomic64_dec(&s->dirty_pages);
+               r = get_pgste(kvm->mm, hva, &pgstev);
+               if (r < 0)
+                       pgstev = 0;
+               /* save the value */
+               res[i++] = (pgstev >> 24) & 0x3;
+               /*
+                * if the next bit is too far away, stop.
+                * if we reached the previous "next", find the next one
+                */
+               if (!peek) {
+                       if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
+                               break;
+                       if (cur == next)
+                               next = find_next_bit(s->pgste_bitmap,
+                                                    s->bitmap_size, cur + 1);
+               /* reached the end of the bitmap or of the buffer, stop */
+                       if ((next >= s->bitmap_size) ||
+                           (next >= args->start_gfn + bufsize))
+                               break;
+               }
+               cur++;
+       }
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
+       up_read(&kvm->mm->mmap_sem);
+       args->count = i;
+       args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
+       rr = copy_to_user((void __user *)args->values, res, args->count);
+       if (rr)
+               r = -EFAULT;
+       vfree(res);
+       return r;
+ }
+ /*
+  * This function sets the CMMA attributes for the given pages. If the input
+  * buffer has zero length, no action is taken, otherwise the attributes are
+  * set and the mm->context.use_cmma flag is set.
+  */
+ static int kvm_s390_set_cmma_bits(struct kvm *kvm,
+                                 const struct kvm_s390_cmma_log *args)
+ {
+       unsigned long hva, mask, pgstev, i;
+       uint8_t *bits;
+       int srcu_idx, r = 0;
+       mask = args->mask;
+       if (!kvm->arch.use_cmma)
+               return -ENXIO;
+       /* invalid/unsupported flags */
+       if (args->flags != 0)
+               return -EINVAL;
+       /* Enforce sane limit on memory allocation */
+       if (args->count > KVM_S390_CMMA_SIZE_MAX)
+               return -EINVAL;
+       /* Nothing to do */
+       if (args->count == 0)
+               return 0;
+       bits = vmalloc(sizeof(*bits) * args->count);
+       if (!bits)
+               return -ENOMEM;
+       r = copy_from_user(bits, (void __user *)args->values, args->count);
+       if (r) {
+               r = -EFAULT;
+               goto out;
+       }
+       down_read(&kvm->mm->mmap_sem);
+       srcu_idx = srcu_read_lock(&kvm->srcu);
+       for (i = 0; i < args->count; i++) {
+               hva = gfn_to_hva(kvm, args->start_gfn + i);
+               if (kvm_is_error_hva(hva)) {
+                       r = -EFAULT;
+                       break;
+               }
+               pgstev = bits[i];
+               pgstev = pgstev << 24;
+               mask &= _PGSTE_GPS_USAGE_MASK;
+               set_pgste_bits(kvm->mm, hva, mask, pgstev);
+       }
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
+       up_read(&kvm->mm->mmap_sem);
+       if (!kvm->mm->context.use_cmma) {
+               down_write(&kvm->mm->mmap_sem);
+               kvm->mm->context.use_cmma = 1;
+               up_write(&kvm->mm->mmap_sem);
+       }
+ out:
+       vfree(bits);
+       return r;
+ }
  long kvm_arch_vm_ioctl(struct file *filp,
                       unsigned int ioctl, unsigned long arg)
  {
                r = kvm_s390_set_skeys(kvm, &args);
                break;
        }
+       case KVM_S390_GET_CMMA_BITS: {
+               struct kvm_s390_cmma_log args;
+               r = -EFAULT;
+               if (copy_from_user(&args, argp, sizeof(args)))
+                       break;
+               r = kvm_s390_get_cmma_bits(kvm, &args);
+               if (!r) {
+                       r = copy_to_user(argp, &args, sizeof(args));
+                       if (r)
+                               r = -EFAULT;
+               }
+               break;
+       }
+       case KVM_S390_SET_CMMA_BITS: {
+               struct kvm_s390_cmma_log args;
+               r = -EFAULT;
+               if (copy_from_user(&args, argp, sizeof(args)))
+                       break;
+               r = kvm_s390_set_cmma_bits(kvm, &args);
+               break;
+       }
        default:
                r = -ENOTTY;
        }
@@@ -1532,6 -1866,7 +1865,6 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
        mutex_init(&kvm->arch.float_int.ais_lock);
        kvm->arch.float_int.simm = 0;
        kvm->arch.float_int.nimm = 0;
 -      kvm->arch.float_int.ais_enabled = 0;
        spin_lock_init(&kvm->arch.float_int.lock);
        for (i = 0; i < FIRQ_LIST_COUNT; i++)
                INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
@@@ -1631,6 -1966,10 +1964,10 @@@ void kvm_arch_destroy_vm(struct kvm *kv
        kvm_s390_destroy_adapters(kvm);
        kvm_s390_clear_float_irqs(kvm);
        kvm_s390_vsie_destroy(kvm);
+       if (kvm->arch.migration_state) {
+               vfree(kvm->arch.migration_state->pgste_bitmap);
+               kfree(kvm->arch.migration_state);
+       }
        KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
  }
  
@@@ -1975,7 -2314,6 +2312,6 @@@ int kvm_s390_vcpu_setup_cmma(struct kvm
        if (!vcpu->arch.sie_block->cbrlo)
                return -ENOMEM;
  
-       vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
        vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
        return 0;
  }
@@@ -2439,7 -2777,7 +2775,7 @@@ static int kvm_s390_handle_requests(str
  {
  retry:
        kvm_s390_vcpu_request_handled(vcpu);
-       if (!vcpu->requests)
+       if (!kvm_request_pending(vcpu))
                return 0;
        /*
         * We use MMU_RELOAD just to re-arm the ipte notifier for the
                goto retry;
        }
  
+       if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
+               /*
+                * Disable CMMA virtualization; we will emulate the ESSA
+                * instruction manually, in order to provide additional
+                * functionalities needed for live migration.
+                */
+               vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
+               goto retry;
+       }
+       if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
+               /*
+                * Re-enable CMMA virtualization if CMMA is available and
+                * was used.
+                */
+               if ((vcpu->kvm->arch.use_cmma) &&
+                   (vcpu->kvm->mm->context.use_cmma))
+                       vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+               goto retry;
+       }
        /* nothing to do, just clear the request */
        kvm_clear_request(KVM_REQ_UNHALT, vcpu);
  
@@@ -2682,6 -3041,9 +3039,9 @@@ static int vcpu_post_run_fault_in_sie(s
  
  static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
  {
+       struct mcck_volatile_info *mcck_info;
+       struct sie_page *sie_page;
        VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
                   vcpu->arch.sie_block->icptcode);
        trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
        vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
        vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
  
+       if (exit_reason == -EINTR) {
+               VCPU_EVENT(vcpu, 3, "%s", "machine check");
+               sie_page = container_of(vcpu->arch.sie_block,
+                                       struct sie_page, sie_block);
+               mcck_info = &sie_page->mcck_info;
+               kvm_s390_reinject_machine_check(vcpu, mcck_info);
+               return 0;
+       }
        if (vcpu->arch.sie_block->icptcode > 0) {
                int rc = kvm_handle_sie_intercept(vcpu);
  
diff --combined arch/s390/kvm/priv.c
index e53292a892575c40134d5cc35c999fea9a64b97c,a226c459809bf0d16657cec3b578c4fe43cd729b..8a1dac793d6b0ad0685ffd7a35743ca511274035
@@@ -24,6 -24,7 +24,7 @@@
  #include <asm/ebcdic.h>
  #include <asm/sysinfo.h>
  #include <asm/pgtable.h>
+ #include <asm/page-states.h>
  #include <asm/pgalloc.h>
  #include <asm/gmap.h>
  #include <asm/io.h>
@@@ -361,7 -362,7 +362,7 @@@ static int handle_sske(struct kvm_vcpu 
                }
        }
        if (m3 & SSKE_MB) {
 -              if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT)
 +              if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT)
                        vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
                else
                        vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
  static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
  {
        vcpu->stat.instruction_ipte_interlock++;
 -      if (psw_bits(vcpu->arch.sie_block->gpsw).p)
 +      if (psw_bits(vcpu->arch.sie_block->gpsw).pstate)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
        wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
        kvm_s390_retry_instr(vcpu);
@@@ -901,7 -902,7 +902,7 @@@ static int handle_pfmf(struct kvm_vcpu 
                /* only support 2G frame size if EDAT2 is available and we are
                   not in 24-bit addressing mode */
                if (!test_kvm_facility(vcpu->kvm, 78) ||
 -                  psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_24BIT)
 +                  psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT)
                        return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
                end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
                break;
                start += PAGE_SIZE;
        }
        if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
 -              if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) {
 +              if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
                        vcpu->run->s.regs.gprs[reg2] = end;
                } else {
                        vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
        return 0;
  }
  
+ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
+ {
+       struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state;
+       int r1, r2, nappended, entries;
+       unsigned long gfn, hva, res, pgstev, ptev;
+       unsigned long *cbrlo;
+       /*
+        * We don't need to set SD.FPF.SK to 1 here, because if we have a
+        * machine check here we either handle it or crash
+        */
+       kvm_s390_get_regs_rre(vcpu, &r1, &r2);
+       gfn = vcpu->run->s.regs.gprs[r2] >> PAGE_SHIFT;
+       hva = gfn_to_hva(vcpu->kvm, gfn);
+       entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+       if (kvm_is_error_hva(hva))
+               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       nappended = pgste_perform_essa(vcpu->kvm->mm, hva, orc, &ptev, &pgstev);
+       if (nappended < 0) {
+               res = orc ? 0x10 : 0;
+               vcpu->run->s.regs.gprs[r1] = res; /* Exception Indication */
+               return 0;
+       }
+       res = (pgstev & _PGSTE_GPS_USAGE_MASK) >> 22;
+       /*
+        * Set the block-content state part of the result. 0 means resident, so
+        * nothing to do if the page is valid. 2 is for preserved pages
+        * (non-present and non-zero), and 3 for zero pages (non-present and
+        * zero).
+        */
+       if (ptev & _PAGE_INVALID) {
+               res |= 2;
+               if (pgstev & _PGSTE_GPS_ZERO)
+                       res |= 1;
+       }
+       vcpu->run->s.regs.gprs[r1] = res;
+       /*
+        * It is possible that all the normal 511 slots were full, in which case
+        * we will now write in the 512th slot, which is reserved for host use.
+        * In both cases we let the normal essa handling code process all the
+        * slots, including the reserved one, if needed.
+        */
+       if (nappended > 0) {
+               cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo & PAGE_MASK);
+               cbrlo[entries] = gfn << PAGE_SHIFT;
+       }
+       if (orc) {
+               /* increment only if we are really flipping the bit to 1 */
+               if (!test_and_set_bit(gfn, ms->pgste_bitmap))
+                       atomic64_inc(&ms->dirty_pages);
+       }
+       return nappended;
+ }
  static int handle_essa(struct kvm_vcpu *vcpu)
  {
        /* entries expected to be 1FF */
        int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
        unsigned long *cbrlo;
        struct gmap *gmap;
-       int i;
+       int i, orc;
  
        VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
        gmap = vcpu->arch.gmap;
  
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
-       if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
+       /* Check for invalid operation request code */
+       orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
+       if (orc > ESSA_MAX)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
  
-       /* Retry the ESSA instruction */
-       kvm_s390_retry_instr(vcpu);
+       if (likely(!vcpu->kvm->arch.migration_state)) {
+               /*
+                * CMMA is enabled in the KVM settings, but is disabled in
+                * the SIE block and in the mm_context, and we are not doing
+                * a migration. Enable CMMA in the mm_context.
+                * Since we need to take a write lock to write to the context
+                * to avoid races with storage keys handling, we check if the
+                * value really needs to be written to; if the value is
+                * already correct, we do nothing and avoid the lock.
+                */
+               if (vcpu->kvm->mm->context.use_cmma == 0) {
+                       down_write(&vcpu->kvm->mm->mmap_sem);
+                       vcpu->kvm->mm->context.use_cmma = 1;
+                       up_write(&vcpu->kvm->mm->mmap_sem);
+               }
+               /*
+                * If we are here, we are supposed to have CMMA enabled in
+                * the SIE block. Enabling CMMA works on a per-CPU basis,
+                * while the context use_cmma flag is per process.
+                * It's possible that the context flag is enabled and the
+                * SIE flag is not, so we set the flag always; if it was
+                * already set, nothing changes, otherwise we enable it
+                * on this CPU too.
+                */
+               vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+               /* Retry the ESSA instruction */
+               kvm_s390_retry_instr(vcpu);
+       } else {
+               /* Account for the possible extra cbrl entry */
+               i = do_essa(vcpu, orc);
+               if (i < 0)
+                       return i;
+               entries += i;
+       }
        vcpu->arch.sie_block->cbrlo &= PAGE_MASK;       /* reset nceo */
        cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
        down_read(&gmap->mm->mmap_sem);
index d406894cd9a2f5b5c36234e1ceb2b5876feeb0cb,dbf266b0d14a5c0ce56c5cbd50a8d7890a9ffc2c..5573c75f8e4ced276c8585b71f0df9b786ea9e90
  #define DEBUGCTLMSR_BTS_OFF_OS                (1UL <<  9)
  #define DEBUGCTLMSR_BTS_OFF_USR               (1UL << 10)
  #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI        (1UL << 11)
 +#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
 +#define DEBUGCTLMSR_FREEZE_IN_SMM     (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
  
  #define MSR_PEBS_FRONTEND             0x000003f7
  
  #define HWP_MIN_PERF(x)               (x & 0xff)
  #define HWP_MAX_PERF(x)               ((x & 0xff) << 8)
  #define HWP_DESIRED_PERF(x)           ((x & 0xff) << 16)
 -#define HWP_ENERGY_PERF_PREFERENCE(x) ((x & 0xff) << 24)
 -#define HWP_ACTIVITY_WINDOW(x)                ((x & 0xff3) << 32)
 -#define HWP_PACKAGE_CONTROL(x)                ((x & 0x1) << 42)
 +#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
 +#define HWP_EPP_PERFORMANCE           0x00
 +#define HWP_EPP_BALANCE_PERFORMANCE   0x80
 +#define HWP_EPP_BALANCE_POWERSAVE     0xC0
 +#define HWP_EPP_POWERSAVE             0xFF
 +#define HWP_ACTIVITY_WINDOW(x)                ((unsigned long long)(x & 0xff3) << 32)
 +#define HWP_PACKAGE_CONTROL(x)                ((unsigned long long)(x & 0x1) << 42)
  
  /* IA32_HWP_STATUS */
  #define HWP_GUARANTEED_CHANGE(x)      (x & 0x1)
  #define MSR_IA32_TSC_ADJUST             0x0000003b
  #define MSR_IA32_BNDCFGS              0x00000d90
  
+ #define MSR_IA32_BNDCFGS_RSVD         0x00000ffc
  #define MSR_IA32_XSS                  0x00000da0
  
  #define FEATURE_CONTROL_LOCKED                                (1<<0)
  #define MSR_MISC_PWR_MGMT             0x000001aa
  
  #define MSR_IA32_ENERGY_PERF_BIAS     0x000001b0
 -#define ENERGY_PERF_BIAS_PERFORMANCE  0
 -#define ENERGY_PERF_BIAS_NORMAL               6
 -#define ENERGY_PERF_BIAS_POWERSAVE    15
 +#define ENERGY_PERF_BIAS_PERFORMANCE          0
 +#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE  4
 +#define ENERGY_PERF_BIAS_NORMAL                       6
 +#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE    8
 +#define ENERGY_PERF_BIAS_POWERSAVE            15
  
  #define MSR_IA32_PACKAGE_THERM_STATUS         0x000001b1
  
diff --combined arch/x86/kvm/emulate.c
index 80890dee66cebf370a3815e28f7bd7c34025b0d4,4a38b96563917dca0f7d5871c5fadf785b0c9ca7..fb0055953fbc9d6e6a3263e1d0bc4fc3ba900a8e
@@@ -900,7 -900,7 +900,7 @@@ static __always_inline int do_insn_fetc
        if (rc != X86EMUL_CONTINUE)                                     \
                goto done;                                              \
        ctxt->_eip += sizeof(_type);                                    \
-       _x = *(_type __aligned(1) *) ctxt->fetch.ptr;                   \
+       memcpy(&_x, ctxt->fetch.ptr, sizeof(_type));                    \
        ctxt->fetch.ptr += sizeof(_type);                               \
        _x;                                                             \
  })
@@@ -2742,7 -2742,6 +2742,7 @@@ static int em_syscall(struct x86_emulat
                ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
        }
  
 +      ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
        return X86EMUL_CONTINUE;
  }
  
@@@ -3941,6 -3940,25 +3941,25 @@@ static int check_fxsr(struct x86_emulat
        return X86EMUL_CONTINUE;
  }
  
+ /*
+  * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
+  * and restore MXCSR.
+  */
+ static size_t __fxstate_size(int nregs)
+ {
+       return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
+ }
+ static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
+ {
+       bool cr4_osfxsr;
+       if (ctxt->mode == X86EMUL_MODE_PROT64)
+               return __fxstate_size(16);
+       cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
+       return __fxstate_size(cr4_osfxsr ? 8 : 0);
+ }
  /*
   * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
   *  1) 16 bit mode
  static int em_fxsave(struct x86_emulate_ctxt *ctxt)
  {
        struct fxregs_state fx_state;
-       size_t size;
        int rc;
  
        rc = check_fxsr(ctxt);
        if (rc != X86EMUL_CONTINUE)
                return rc;
  
-       if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)
-               size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]);
-       else
-               size = offsetof(struct fxregs_state, xmm_space[0]);
-       return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
- }
- static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt,
-               struct fxregs_state *new)
- {
-       int rc = X86EMUL_CONTINUE;
-       struct fxregs_state old;
-       rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old));
-       if (rc != X86EMUL_CONTINUE)
-               return rc;
-       /*
-        * 64 bit host will restore XMM 8-15, which is not correct on non-64
-        * bit guests.  Load the current values in order to preserve 64 bit
-        * XMMs after fxrstor.
-        */
- #ifdef CONFIG_X86_64
-       /* XXX: accessing XMM 8-15 very awkwardly */
-       memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16);
- #endif
-       /*
-        * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but
-        * does save and restore MXCSR.
-        */
-       if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))
-               memcpy(new->xmm_space, old.xmm_space, 8 * 16);
-       return rc;
+       return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
+                                  fxstate_size(ctxt));
  }
  
  static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
  {
        struct fxregs_state fx_state;
        int rc;
+       size_t size;
  
        rc = check_fxsr(ctxt);
        if (rc != X86EMUL_CONTINUE)
                return rc;
  
-       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
-       if (rc != X86EMUL_CONTINUE)
-               return rc;
+       ctxt->ops->get_fpu(ctxt);
  
-       if (fx_state.mxcsr >> 16)
-               return emulate_gp(ctxt, 0);
+       size = fxstate_size(ctxt);
+       if (size < __fxstate_size(16)) {
+               rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
+               if (rc != X86EMUL_CONTINUE)
+                       goto out;
+       }
  
-       ctxt->ops->get_fpu(ctxt);
+       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+       if (rc != X86EMUL_CONTINUE)
+               goto out;
  
-       if (ctxt->mode < X86EMUL_MODE_PROT64)
-               rc = fxrstor_fixup(ctxt, &fx_state);
+       if (fx_state.mxcsr >> 16) {
+               rc = emulate_gp(ctxt, 0);
+               goto out;
+       }
  
        if (rc == X86EMUL_CONTINUE)
                rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
  
+ out:
        ctxt->ops->put_fpu(ctxt);
  
        return rc;
diff --combined arch/x86/kvm/mmu.c
index cb8225969255ec006fbff5eab76471f341d92b77,3ba600d09deae751645184c9ea9141b3807ebc6c..aafd399cf8c6f3d3e219ec636a73b19ee1e9d20d
@@@ -183,13 -183,13 +183,13 @@@ static u64 __read_mostly shadow_user_ma
  static u64 __read_mostly shadow_accessed_mask;
  static u64 __read_mostly shadow_dirty_mask;
  static u64 __read_mostly shadow_mmio_mask;
+ static u64 __read_mostly shadow_mmio_value;
  static u64 __read_mostly shadow_present_mask;
  
  /*
-  * The mask/value to distinguish a PTE that has been marked not-present for
-  * access tracking purposes.
-  * The mask would be either 0 if access tracking is disabled, or
-  * SPTE_SPECIAL_MASK|VMX_EPT_RWX_MASK if access tracking is enabled.
+  * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value.
+  * Non-present SPTEs with shadow_acc_track_value set are in place for access
+  * tracking.
   */
  static u64 __read_mostly shadow_acc_track_mask;
  static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK;
@@@ -207,16 -207,40 +207,40 @@@ static const u64 shadow_acc_track_saved
  static void mmu_spte_set(u64 *sptep, u64 spte);
  static void mmu_free_roots(struct kvm_vcpu *vcpu);
  
- void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
+ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
  {
+       BUG_ON((mmio_mask & mmio_value) != mmio_value);
+       shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK;
        shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
  }
  EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
  
+ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
+ {
+       return sp->role.ad_disabled;
+ }
+ static inline bool spte_ad_enabled(u64 spte)
+ {
+       MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+       return !(spte & shadow_acc_track_value);
+ }
+ static inline u64 spte_shadow_accessed_mask(u64 spte)
+ {
+       MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+       return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
+ }
+ static inline u64 spte_shadow_dirty_mask(u64 spte)
+ {
+       MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+       return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
+ }
  static inline bool is_access_track_spte(u64 spte)
  {
-       /* Always false if shadow_acc_track_mask is zero.  */
-       return (spte & shadow_acc_track_mask) == shadow_acc_track_value;
+       return !spte_ad_enabled(spte) && (spte & shadow_acc_track_mask) == 0;
  }
  
  /*
@@@ -270,7 -294,7 +294,7 @@@ static void mark_mmio_spte(struct kvm_v
        u64 mask = generation_mmio_spte_mask(gen);
  
        access &= ACC_WRITE_MASK | ACC_USER_MASK;
-       mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
+       mask |= shadow_mmio_value | access | gfn << PAGE_SHIFT;
  
        trace_mark_mmio_spte(sptep, gfn, access, gen);
        mmu_spte_set(sptep, mask);
  
  static bool is_mmio_spte(u64 spte)
  {
-       return (spte & shadow_mmio_mask) == shadow_mmio_mask;
+       return (spte & shadow_mmio_mask) == shadow_mmio_value;
  }
  
  static gfn_t get_mmio_spte_gfn(u64 spte)
@@@ -315,12 -339,20 +339,20 @@@ static bool check_mmio_spte(struct kvm_
        return likely(kvm_gen == spte_gen);
  }
  
+ /*
+  * Sets the shadow PTE masks used by the MMU.
+  *
+  * Assumptions:
+  *  - Setting either @accessed_mask or @dirty_mask requires setting both
+  *  - At least one of @accessed_mask or @acc_track_mask must be set
+  */
  void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
                u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
                u64 acc_track_mask)
  {
-       if (acc_track_mask != 0)
-               acc_track_mask |= SPTE_SPECIAL_MASK;
+       BUG_ON(!dirty_mask != !accessed_mask);
+       BUG_ON(!accessed_mask && !acc_track_mask);
+       BUG_ON(acc_track_mask & shadow_acc_track_value);
  
        shadow_user_mask = user_mask;
        shadow_accessed_mask = accessed_mask;
        shadow_x_mask = x_mask;
        shadow_present_mask = p_mask;
        shadow_acc_track_mask = acc_track_mask;
-       WARN_ON(shadow_accessed_mask != 0 && shadow_acc_track_mask != 0);
  }
  EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
  
@@@ -549,7 -580,7 +580,7 @@@ static bool spte_has_volatile_bits(u64 
            is_access_track_spte(spte))
                return true;
  
-       if (shadow_accessed_mask) {
+       if (spte_ad_enabled(spte)) {
                if ((spte & shadow_accessed_mask) == 0 ||
                    (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0))
                        return true;
  
  static bool is_accessed_spte(u64 spte)
  {
-       return shadow_accessed_mask ? spte & shadow_accessed_mask
-                                   : !is_access_track_spte(spte);
+       u64 accessed_mask = spte_shadow_accessed_mask(spte);
+       return accessed_mask ? spte & accessed_mask
+                            : !is_access_track_spte(spte);
  }
  
  static bool is_dirty_spte(u64 spte)
  {
-       return shadow_dirty_mask ? spte & shadow_dirty_mask
-                                : spte & PT_WRITABLE_MASK;
+       u64 dirty_mask = spte_shadow_dirty_mask(spte);
+       return dirty_mask ? spte & dirty_mask : spte & PT_WRITABLE_MASK;
  }
  
  /* Rules for using mmu_spte_set:
@@@ -707,10 -741,10 +741,10 @@@ static u64 mmu_spte_get_lockless(u64 *s
  
  static u64 mark_spte_for_access_track(u64 spte)
  {
-       if (shadow_accessed_mask != 0)
+       if (spte_ad_enabled(spte))
                return spte & ~shadow_accessed_mask;
  
-       if (shadow_acc_track_mask == 0 || is_access_track_spte(spte))
+       if (is_access_track_spte(spte))
                return spte;
  
        /*
        spte |= (spte & shadow_acc_track_saved_bits_mask) <<
                shadow_acc_track_saved_bits_shift;
        spte &= ~shadow_acc_track_mask;
-       spte |= shadow_acc_track_value;
  
        return spte;
  }
@@@ -741,6 -774,7 +774,7 @@@ static u64 restore_acc_track_spte(u64 s
        u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift)
                         & shadow_acc_track_saved_bits_mask;
  
+       WARN_ON_ONCE(spte_ad_enabled(spte));
        WARN_ON_ONCE(!is_access_track_spte(spte));
  
        new_spte &= ~shadow_acc_track_mask;
@@@ -759,7 -793,7 +793,7 @@@ static bool mmu_spte_age(u64 *sptep
        if (!is_accessed_spte(spte))
                return false;
  
-       if (shadow_accessed_mask) {
+       if (spte_ad_enabled(spte)) {
                clear_bit((ffs(shadow_accessed_mask) - 1),
                          (unsigned long *)sptep);
        } else {
@@@ -1390,6 -1424,22 +1424,22 @@@ static bool spte_clear_dirty(u64 *sptep
        return mmu_spte_update(sptep, spte);
  }
  
+ static bool wrprot_ad_disabled_spte(u64 *sptep)
+ {
+       bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT,
+                                              (unsigned long *)sptep);
+       if (was_writable)
+               kvm_set_pfn_dirty(spte_to_pfn(*sptep));
+       return was_writable;
+ }
+ /*
+  * Gets the GFN ready for another round of dirty logging by clearing the
+  *    - D bit on ad-enabled SPTEs, and
+  *    - W bit on ad-disabled SPTEs.
+  * Returns true iff any D or W bits were cleared.
+  */
  static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
  {
        u64 *sptep;
        bool flush = false;
  
        for_each_rmap_spte(rmap_head, &iter, sptep)
-               flush |= spte_clear_dirty(sptep);
+               if (spte_ad_enabled(*sptep))
+                       flush |= spte_clear_dirty(sptep);
+               else
+                       flush |= wrprot_ad_disabled_spte(sptep);
  
        return flush;
  }
@@@ -1420,7 -1473,8 +1473,8 @@@ static bool __rmap_set_dirty(struct kv
        bool flush = false;
  
        for_each_rmap_spte(rmap_head, &iter, sptep)
-               flush |= spte_set_dirty(sptep);
+               if (spte_ad_enabled(*sptep))
+                       flush |= spte_set_dirty(sptep);
  
        return flush;
  }
@@@ -1452,7 -1506,8 +1506,8 @@@ static void kvm_mmu_write_protect_pt_ma
  }
  
  /**
-  * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages
+  * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages, or write
+  * protect the page if the D-bit isn't supported.
   * @kvm: kvm instance
   * @slot: slot to clear D-bit
   * @gfn_offset: start of the BITS_PER_LONG pages we care about
@@@ -1766,18 -1821,9 +1821,9 @@@ static int kvm_test_age_rmapp(struct kv
        u64 *sptep;
        struct rmap_iterator iter;
  
-       /*
-        * If there's no access bit in the secondary pte set by the hardware and
-        * fast access tracking is also not enabled, it's up to gup-fast/gup to
-        * set the access bit in the primary pte or in the page structure.
-        */
-       if (!shadow_accessed_mask && !shadow_acc_track_mask)
-               goto out;
        for_each_rmap_spte(rmap_head, &iter, sptep)
                if (is_accessed_spte(*sptep))
                        return 1;
- out:
        return 0;
  }
  
@@@ -1798,18 -1844,6 +1844,6 @@@ static void rmap_recycle(struct kvm_vcp
  
  int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
  {
-       /*
-        * In case of absence of EPT Access and Dirty Bits supports,
-        * emulate the accessed bit for EPT, by checking if this page has
-        * an EPT mapping, and clearing it if it does. On the next access,
-        * a new EPT mapping will be established.
-        * This has some overhead, but not as much as the cost of swapping
-        * out actively used pages or breaking up actively used hugepages.
-        */
-       if (!shadow_accessed_mask && !shadow_acc_track_mask)
-               return kvm_handle_hva_range(kvm, start, end, 0,
-                                           kvm_unmap_rmapp);
        return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
  }
  
@@@ -2398,7 -2432,12 +2432,12 @@@ static void link_shadow_page(struct kvm
        BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
  
        spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
-              shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
+              shadow_user_mask | shadow_x_mask;
+       if (sp_ad_disabled(sp))
+               spte |= shadow_acc_track_value;
+       else
+               spte |= shadow_accessed_mask;
  
        mmu_spte_set(sptep, spte);
  
@@@ -2666,10 -2705,15 +2705,15 @@@ static int set_spte(struct kvm_vcpu *vc
  {
        u64 spte = 0;
        int ret = 0;
+       struct kvm_mmu_page *sp;
  
        if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
                return 0;
  
+       sp = page_header(__pa(sptep));
+       if (sp_ad_disabled(sp))
+               spte |= shadow_acc_track_value;
        /*
         * For the EPT case, shadow_present_mask is 0 if hardware
         * supports exec-only page table entries.  In that case,
         */
        spte |= shadow_present_mask;
        if (!speculative)
-               spte |= shadow_accessed_mask;
+               spte |= spte_shadow_accessed_mask(spte);
  
        if (pte_access & ACC_EXEC_MASK)
                spte |= shadow_x_mask;
  
        if (pte_access & ACC_WRITE_MASK) {
                kvm_vcpu_mark_page_dirty(vcpu, gfn);
-               spte |= shadow_dirty_mask;
+               spte |= spte_shadow_dirty_mask(spte);
        }
  
        if (speculative)
@@@ -2877,16 -2921,16 +2921,16 @@@ static void direct_pte_prefetch(struct 
  {
        struct kvm_mmu_page *sp;
  
+       sp = page_header(__pa(sptep));
        /*
-        * Since it's no accessed bit on EPT, it's no way to
-        * distinguish between actually accessed translations
-        * and prefetched, so disable pte prefetch if EPT is
-        * enabled.
+        * Without accessed bits, there's no way to distinguish between
+        * actually accessed translations and prefetched, so disable pte
+        * prefetch if accessed bits aren't available.
         */
-       if (!shadow_accessed_mask)
+       if (sp_ad_disabled(sp))
                return;
  
-       sp = page_header(__pa(sptep));
        if (sp->role.level > PT_PAGE_TABLE_LEVEL)
                return;
  
@@@ -3698,15 -3742,12 +3742,15 @@@ static int kvm_arch_setup_async_pf(stru
        return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
  }
  
 -static bool can_do_async_pf(struct kvm_vcpu *vcpu)
 +bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
  {
        if (unlikely(!lapic_in_kernel(vcpu) ||
                     kvm_event_needs_reinjection(vcpu)))
                return false;
  
 +      if (is_guest_mode(vcpu))
 +              return false;
 +
        return kvm_x86_ops->interrupt_allowed(vcpu);
  }
  
@@@ -3722,7 -3763,7 +3766,7 @@@ static bool try_async_pf(struct kvm_vcp
        if (!async)
                return false; /* *pfn has correct page already */
  
 -      if (!prefault && can_do_async_pf(vcpu)) {
 +      if (!prefault && kvm_can_do_async_pf(vcpu)) {
                trace_kvm_try_async_get_page(gva, gfn);
                if (kvm_find_async_pf_gfn(vcpu, gfn)) {
                        trace_kvm_async_pf_doublefault(gva, gfn);
@@@ -4290,6 -4331,7 +4334,7 @@@ static void init_kvm_tdp_mmu(struct kvm
  
        context->base_role.word = 0;
        context->base_role.smm = is_smm(vcpu);
+       context->base_role.ad_disabled = (shadow_accessed_mask == 0);
        context->page_fault = tdp_page_fault;
        context->sync_page = nonpaging_sync_page;
        context->invlpg = nonpaging_invlpg;
@@@ -4377,6 -4419,7 +4422,7 @@@ void kvm_init_shadow_ept_mmu(struct kvm
        context->root_level = context->shadow_root_level;
        context->root_hpa = INVALID_PAGE;
        context->direct_map = false;
+       context->base_role.ad_disabled = !accessed_dirty;
  
        update_permission_bitmask(vcpu, context, true);
        update_pkru_bitmask(vcpu, context, true);
@@@ -4636,6 -4679,7 +4682,7 @@@ static void kvm_mmu_pte_write(struct kv
        mask.smep_andnot_wp = 1;
        mask.smap_andnot_wp = 1;
        mask.smm = 1;
+       mask.ad_disabled = 1;
  
        /*
         * If we don't have indirect shadow pages, it means no page is
diff --combined arch/x86/kvm/mmu.h
index 330bf3a811fb07271de382b598be402c871f6496,41d362e95681c5de61f89d80037114bfaa04537e..a276834950c14a15681c9d125ddde8e9b9dc6af8
@@@ -51,7 -51,7 +51,7 @@@ static inline u64 rsvd_bits(int s, int 
        return ((1ULL << (e - s + 1)) - 1) << s;
  }
  
- void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
+ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
  
  void
  reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
@@@ -76,7 -76,6 +76,7 @@@ int handle_mmio_page_fault(struct kvm_v
  void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
  void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
                             bool accessed_dirty);
 +bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
  
  static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
  {
diff --combined arch/x86/kvm/svm.c
index 33460fcdeef9e7ae51b673704c242b4226eef4d1,03df7c1da581a7317c4e566aa0db5e28f86134c6..905ea6052517fef7a09bf82f396222ade76be2a5
@@@ -36,7 -36,6 +36,7 @@@
  #include <linux/slab.h>
  #include <linux/amd-iommu.h>
  #include <linux/hashtable.h>
 +#include <linux/frame.h>
  
  #include <asm/apic.h>
  #include <asm/perf_event.h>
@@@ -190,6 -189,7 +190,7 @@@ struct vcpu_svm 
        struct nested_state nested;
  
        bool nmi_singlestep;
+       u64 nmi_singlestep_guest_rflags;
  
        unsigned int3_injected;
        unsigned long int3_rip;
@@@ -964,6 -964,18 +965,18 @@@ static void svm_disable_lbrv(struct vcp
        set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
  }
  
+ static void disable_nmi_singlestep(struct vcpu_svm *svm)
+ {
+       svm->nmi_singlestep = false;
+       if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
+               /* Clear our flags if they were not set by the guest */
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+                       svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+                       svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
+       }
+ }
  /* Note:
   * This hash table is used to map VM_ID to a struct kvm_arch,
   * when handling AMD IOMMU GALOG notification to schedule in
@@@ -1713,11 -1725,24 +1726,24 @@@ static void svm_vcpu_unblocking(struct 
  
  static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
  {
-       return to_svm(vcpu)->vmcb->save.rflags;
+       struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned long rflags = svm->vmcb->save.rflags;
+       if (svm->nmi_singlestep) {
+               /* Hide our flags if they were not set by the guest */
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+                       rflags &= ~X86_EFLAGS_TF;
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+                       rflags &= ~X86_EFLAGS_RF;
+       }
+       return rflags;
  }
  
  static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
  {
+       if (to_svm(vcpu)->nmi_singlestep)
+               rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
         /*
          * Any change of EFLAGS.VM is accompanied by a reload of SS
          * (caused by either a task switch or an inter-privilege IRET),
@@@ -2112,10 -2137,7 +2138,7 @@@ static int db_interception(struct vcpu_
        }
  
        if (svm->nmi_singlestep) {
-               svm->nmi_singlestep = false;
-               if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
-                       svm->vmcb->save.rflags &=
-                               ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+               disable_nmi_singlestep(svm);
        }
  
        if (svm->vcpu.guest_debug &
@@@ -2370,8 -2392,8 +2393,8 @@@ static void nested_svm_uninit_mmu_conte
  
  static int nested_svm_check_permissions(struct vcpu_svm *svm)
  {
-       if (!(svm->vcpu.arch.efer & EFER_SVME)
-           || !is_paging(&svm->vcpu)) {
+       if (!(svm->vcpu.arch.efer & EFER_SVME) ||
+           !is_paging(&svm->vcpu)) {
                kvm_queue_exception(&svm->vcpu, UD_VECTOR);
                return 1;
        }
                return 1;
        }
  
-        return 0;
+       return 0;
  }
  
  static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
@@@ -2534,6 -2556,31 +2557,31 @@@ static int nested_svm_exit_handled_msr(
        return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
  }
  
+ /* DB exceptions for our internal use must not cause vmexit */
+ static int nested_svm_intercept_db(struct vcpu_svm *svm)
+ {
+       unsigned long dr6;
+       /* if we're not singlestepping, it's not ours */
+       if (!svm->nmi_singlestep)
+               return NESTED_EXIT_DONE;
+       /* if it's not a singlestep exception, it's not ours */
+       if (kvm_get_dr(&svm->vcpu, 6, &dr6))
+               return NESTED_EXIT_DONE;
+       if (!(dr6 & DR6_BS))
+               return NESTED_EXIT_DONE;
+       /* if the guest is singlestepping, it should get the vmexit */
+       if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
+               disable_nmi_singlestep(svm);
+               return NESTED_EXIT_DONE;
+       }
+       /* it's ours, the nested hypervisor must not see this one */
+       return NESTED_EXIT_HOST;
+ }
  static int nested_svm_exit_special(struct vcpu_svm *svm)
  {
        u32 exit_code = svm->vmcb->control.exit_code;
@@@ -2589,8 -2636,12 +2637,12 @@@ static int nested_svm_intercept(struct 
        }
        case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
                u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
-               if (svm->nested.intercept_exceptions & excp_bits)
-                       vmexit = NESTED_EXIT_DONE;
+               if (svm->nested.intercept_exceptions & excp_bits) {
+                       if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
+                               vmexit = nested_svm_intercept_db(svm);
+                       else
+                               vmexit = NESTED_EXIT_DONE;
+               }
                /* async page fault always cause vmexit */
                else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
                         svm->apf_reason != 0)
@@@ -4627,10 -4678,17 +4679,17 @@@ static void enable_nmi_window(struct kv
            == HF_NMI_MASK)
                return; /* IRET will cause a vm exit */
  
+       if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0)
+               return; /* STGI will cause a vm exit */
+       if (svm->nested.exit_required)
+               return; /* we're not going to run the guest yet */
        /*
         * Something prevents NMI from been injected. Single step over possible
         * problem (IRET or exception injection or interrupt shadow)
         */
+       svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
        svm->nmi_singlestep = true;
        svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
  }
@@@ -4771,6 -4829,22 +4830,22 @@@ static void svm_vcpu_run(struct kvm_vcp
        if (unlikely(svm->nested.exit_required))
                return;
  
+       /*
+        * Disable singlestep if we're injecting an interrupt/exception.
+        * We don't want our modified rflags to be pushed on the stack where
+        * we might not be able to easily reset them if we disabled NMI
+        * singlestep later.
+        */
+       if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
+               /*
+                * Event injection happens before external interrupts cause a
+                * vmexit and interrupts are disabled here, so smp_send_reschedule
+                * is enough to force an immediate vmexit.
+                */
+               disable_nmi_singlestep(svm);
+               smp_send_reschedule(vcpu->cpu);
+       }
        pre_svm_run(svm);
  
        sync_lapic_to_cr8(vcpu);
  
        mark_all_clean(svm->vmcb);
  }
 +STACK_FRAME_NON_STANDARD(svm_vcpu_run);
  
  static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
  {
diff --combined arch/x86/kvm/vmx.c
index 6dcc4873e435c7357892e9dcd200c3c4c9b3688a,b4cfdcfdc1c1f9c965bac66b3cea384c2a6a3dfa..f76efad248aba0dc02bce77a4cd984343d181d79
@@@ -33,7 -33,6 +33,7 @@@
  #include <linux/slab.h>
  #include <linux/tboot.h>
  #include <linux/hrtimer.h>
 +#include <linux/frame.h>
  #include "kvm_cache_regs.h"
  #include "x86.h"
  
@@@ -49,7 -48,6 +49,7 @@@
  #include <asm/kexec.h>
  #include <asm/apic.h>
  #include <asm/irq_remapping.h>
 +#include <asm/mmu_context.h>
  
  #include "trace.h"
  #include "pmu.h"
@@@ -598,7 -596,6 +598,7 @@@ struct vcpu_vmx 
                int           gs_ldt_reload_needed;
                int           fs_reload_needed;
                u64           msr_host_bndcfgs;
 +              unsigned long vmcs_host_cr3;    /* May not match real cr3 */
                unsigned long vmcs_host_cr4;    /* May not match real cr4 */
        } host_state;
        struct {
@@@ -913,8 -910,9 +913,9 @@@ static void nested_release_page_clean(s
        kvm_release_page_clean(page);
  }
  
+ static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
  static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
- static u64 construct_eptp(unsigned long root_hpa);
+ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
  static bool vmx_xsaves_supported(void);
  static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
  static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@@ -2428,7 -2426,7 +2429,7 @@@ static int nested_vmx_check_exception(s
        if (!(vmcs12->exception_bitmap & (1u << nr)))
                return 0;
  
 -      nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
 +      nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
                          vmcs_read32(VM_EXIT_INTR_INFO),
                          vmcs_readl(EXIT_QUALIFICATION));
        return 1;
@@@ -2772,7 -2770,7 +2773,7 @@@ static void nested_vmx_setup_ctls_msrs(
                if (enable_ept_ad_bits) {
                        vmx->nested.nested_vmx_secondary_ctls_high |=
                                SECONDARY_EXEC_ENABLE_PML;
-                      vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
+                       vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
                }
        } else
                vmx->nested.nested_vmx_ept_caps = 0;
@@@ -3198,7 -3196,8 +3199,8 @@@ static int vmx_get_msr(struct kvm_vcpu 
                msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
                break;
        case MSR_IA32_BNDCFGS:
-               if (!kvm_mpx_supported())
+               if (!kvm_mpx_supported() ||
+                   (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
                        return 1;
                msr_info->data = vmcs_read64(GUEST_BNDCFGS);
                break;
@@@ -3280,7 -3279,11 +3282,11 @@@ static int vmx_set_msr(struct kvm_vcpu 
                vmcs_writel(GUEST_SYSENTER_ESP, data);
                break;
        case MSR_IA32_BNDCFGS:
-               if (!kvm_mpx_supported())
+               if (!kvm_mpx_supported() ||
+                   (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
+                       return 1;
+               if (is_noncanonical_address(data & PAGE_MASK) ||
+                   (data & MSR_IA32_BNDCFGS_RSVD))
                        return 1;
                vmcs_write64(GUEST_BNDCFGS, data);
                break;
@@@ -4013,7 -4016,7 +4019,7 @@@ static inline void __vmx_flush_tlb(stru
        if (enable_ept) {
                if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
                        return;
-               ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
+               ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
        } else {
                vpid_sync_context(vpid);
        }
@@@ -4188,14 -4191,15 +4194,15 @@@ static void vmx_set_cr0(struct kvm_vcp
        vmx->emulation_required = emulation_required(vcpu);
  }
  
- static u64 construct_eptp(unsigned long root_hpa)
+ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
  {
        u64 eptp;
  
        /* TODO write the value reading from MSR */
        eptp = VMX_EPT_DEFAULT_MT |
                VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
-       if (enable_ept_ad_bits)
+       if (enable_ept_ad_bits &&
+           (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
                eptp |= VMX_EPT_AD_ENABLE_BIT;
        eptp |= (root_hpa & PAGE_MASK);
  
@@@ -4209,7 -4213,7 +4216,7 @@@ static void vmx_set_cr3(struct kvm_vcp
  
        guest_cr3 = cr3;
        if (enable_ept) {
-               eptp = construct_eptp(cr3);
+               eptp = construct_eptp(vcpu, cr3);
                vmcs_write64(EPT_POINTER, eptp);
                if (is_paging(vcpu) || is_guest_mode(vcpu))
                        guest_cr3 = kvm_read_cr3(vcpu);
@@@ -5015,19 -5019,12 +5022,19 @@@ static void vmx_set_constant_host_state
        u32 low32, high32;
        unsigned long tmpl;
        struct desc_ptr dt;
 -      unsigned long cr0, cr4;
 +      unsigned long cr0, cr3, cr4;
  
        cr0 = read_cr0();
        WARN_ON(cr0 & X86_CR0_TS);
        vmcs_writel(HOST_CR0, cr0);  /* 22.2.3 */
 -      vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
 +
 +      /*
 +       * Save the most likely value for this task's CR3 in the VMCS.
 +       * We can't use __get_current_cr3_fast() because we're not atomic.
 +       */
 +      cr3 = __read_cr3();
 +      vmcs_writel(HOST_CR3, cr3);             /* 22.2.3  FIXME: shadow tables */
 +      vmx->host_state.vmcs_host_cr3 = cr3;
  
        /* Save the most likely value for this task's CR4 in the VMCS. */
        cr4 = cr4_read_shadow();
@@@ -5170,7 -5167,8 +5177,8 @@@ static void ept_set_mmio_spte_mask(void
         * EPT Misconfigurations can be generated if the value of bits 2:0
         * of an EPT paging-structure entry is 110b (write/execute).
         */
-       kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE);
+       kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
+                                  VMX_EPT_MISCONFIG_WX_VALUE);
  }
  
  #define VMX_XSS_EXIT_BITMAP 0
@@@ -6220,17 -6218,6 +6228,6 @@@ static int handle_ept_violation(struct 
  
        exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
  
-       if (is_guest_mode(vcpu)
-           && !(exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) {
-               /*
-                * Fix up exit_qualification according to whether guest
-                * page table accesses are reads or writes.
-                */
-               u64 eptp = nested_ept_get_cr3(vcpu);
-               if (!(eptp & VMX_EPT_AD_ENABLE_BIT))
-                       exit_qualification &= ~EPT_VIOLATION_ACC_WRITE;
-       }
        /*
         * EPT violation happened while executing iret from NMI,
         * "blocked by NMI" bit has to be set before next VM entry.
@@@ -6453,7 -6440,7 +6450,7 @@@ void vmx_enable_tdp(void
                enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
                0ull, VMX_EPT_EXECUTABLE_MASK,
                cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
-               enable_ept_ad_bits ? 0ull : VMX_EPT_RWX_MASK);
+               VMX_EPT_RWX_MASK);
  
        ept_set_mmio_spte_mask();
        kvm_enable_tdp();
@@@ -6557,7 -6544,6 +6554,6 @@@ static __init int hardware_setup(void
        vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
        vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
        vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
  
        memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
                        vmx_msr_bitmap_legacy, PAGE_SIZE);
@@@ -7661,7 -7647,10 +7657,10 @@@ static int handle_invvpid(struct kvm_vc
        unsigned long type, types;
        gva_t gva;
        struct x86_exception e;
-       int vpid;
+       struct {
+               u64 vpid;
+               u64 gla;
+       } operand;
  
        if (!(vmx->nested.nested_vmx_secondary_ctls_high &
              SECONDARY_EXEC_ENABLE_VPID) ||
        if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
                        vmx_instruction_info, false, &gva))
                return 1;
-       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid,
-                               sizeof(u32), &e)) {
+       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
+                               sizeof(operand), &e)) {
                kvm_inject_page_fault(vcpu, &e);
                return 1;
        }
+       if (operand.vpid >> 16) {
+               nested_vmx_failValid(vcpu,
+                       VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+               return kvm_skip_emulated_instruction(vcpu);
+       }
  
        switch (type) {
        case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
+               if (is_noncanonical_address(operand.gla)) {
+                       nested_vmx_failValid(vcpu,
+                               VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+                       return kvm_skip_emulated_instruction(vcpu);
+               }
+               /* fall through */
        case VMX_VPID_EXTENT_SINGLE_CONTEXT:
        case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
-               if (!vpid) {
+               if (!operand.vpid) {
                        nested_vmx_failValid(vcpu,
                                VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
                        return kvm_skip_emulated_instruction(vcpu);
@@@ -8662,7 -8662,6 +8672,7 @@@ static void vmx_handle_external_intr(st
                        );
        }
  }
 +STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
  
  static bool vmx_has_high_real_mode_segbase(void)
  {
@@@ -8831,7 -8830,7 +8841,7 @@@ static void vmx_arm_hv_timer(struct kvm
  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 -      unsigned long debugctlmsr, cr4;
 +      unsigned long debugctlmsr, cr3, cr4;
  
        /* Don't enter VMX if guest state is invalid, let the exit handler
           start emulation until we arrive back to a valid state */
        if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
                vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
  
 +      cr3 = __get_current_cr3_fast();
 +      if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
 +              vmcs_writel(HOST_CR3, cr3);
 +              vmx->host_state.vmcs_host_cr3 = cr3;
 +      }
 +
        cr4 = cr4_read_shadow();
        if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
                vmcs_writel(HOST_CR4, cr4);
        vmx_recover_nmi_blocking(vmx);
        vmx_complete_interrupts(vmx);
  }
 +STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
  
  static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
  {
@@@ -9394,6 -9386,11 +9404,11 @@@ static void nested_ept_inject_page_faul
        vmcs12->guest_physical_address = fault->address;
  }
  
+ static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
+ {
+       return nested_ept_get_cr3(vcpu) & VMX_EPT_AD_ENABLE_BIT;
+ }
  /* Callbacks for nested_ept_init_mmu_context: */
  
  static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
  
  static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
  {
-       u64 eptp;
+       bool wants_ad;
  
        WARN_ON(mmu_is_nested(vcpu));
-       eptp = nested_ept_get_cr3(vcpu);
-       if ((eptp & VMX_EPT_AD_ENABLE_BIT) && !enable_ept_ad_bits)
+       wants_ad = nested_ept_ad_enabled(vcpu);
+       if (wants_ad && !enable_ept_ad_bits)
                return 1;
  
        kvm_mmu_unload(vcpu);
        kvm_init_shadow_ept_mmu(vcpu,
                        to_vmx(vcpu)->nested.nested_vmx_ept_caps &
                        VMX_EPT_EXECUTE_ONLY_BIT,
-                       eptp & VMX_EPT_AD_ENABLE_BIT);
+                       wants_ad);
        vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
        vcpu->arch.mmu.get_cr3           = nested_ept_get_cr3;
        vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
@@@ -10728,8 -10725,7 +10743,7 @@@ static void sync_vmcs12(struct kvm_vcp
                vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
        }
  
-       if (nested_cpu_has_ept(vmcs12))
-               vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
+       vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
  
        if (nested_cpu_has_vid(vmcs12))
                vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
        vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
        if (kvm_mpx_supported())
                vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
-       if (nested_cpu_has_xsaves(vmcs12))
-               vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
  }
  
  /*
@@@ -11152,7 -11146,8 +11164,8 @@@ static int vmx_set_hv_timer(struct kvm_
        vmx->hv_deadline_tsc = tscl + delta_tsc;
        vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
                        PIN_BASED_VMX_PREEMPTION_TIMER);
-       return 0;
+       return delta_tsc == 0;
  }
  
  static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
diff --combined arch/x86/kvm/x86.c
index 0e846f0cb83bb214811d0a12d2f700cc96a455f9,3a12b879f5424078905030b6de6d087aacbaf76a..6c7266f7766dcb6ec02b13b9b1439c9f9d547071
@@@ -2841,10 -2841,10 +2841,10 @@@ void kvm_arch_vcpu_load(struct kvm_vcp
                        kvm_vcpu_write_tsc_offset(vcpu, offset);
                        vcpu->arch.tsc_catchup = 1;
                }
-               if (kvm_lapic_hv_timer_in_use(vcpu) &&
-                               kvm_x86_ops->set_hv_timer(vcpu,
-                                       kvm_get_lapic_target_expiration_tsc(vcpu)))
-                       kvm_lapic_switch_to_sw_timer(vcpu);
+               if (kvm_lapic_hv_timer_in_use(vcpu))
+                       kvm_lapic_restart_hv_timer(vcpu);
                /*
                 * On a host with synchronized TSC, there is no need to update
                 * kvmclock on vcpu->cpu migration
@@@ -5313,8 -5313,6 +5313,8 @@@ static void init_emulate_ctxt(struct kv
        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
  
        ctxt->eflags = kvm_get_rflags(vcpu);
 +      ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
 +
        ctxt->eip = kvm_rip_read(vcpu);
        ctxt->mode = (!is_protmode(vcpu))               ? X86EMUL_MODE_REAL :
                     (ctxt->eflags & X86_EFLAGS_VM)     ? X86EMUL_MODE_VM86 :
@@@ -5530,25 -5528,36 +5530,25 @@@ static int kvm_vcpu_check_hw_bp(unsigne
        return dr6;
  }
  
 -static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
 +static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
  {
        struct kvm_run *kvm_run = vcpu->run;
  
 -      /*
 -       * rflags is the old, "raw" value of the flags.  The new value has
 -       * not been saved yet.
 -       *
 -       * This is correct even for TF set by the guest, because "the
 -       * processor will not generate this exception after the instruction
 -       * that sets the TF flag".
 -       */
 -      if (unlikely(rflags & X86_EFLAGS_TF)) {
 -              if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
 -                      kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
 -                                                DR6_RTM;
 -                      kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
 -                      kvm_run->debug.arch.exception = DB_VECTOR;
 -                      kvm_run->exit_reason = KVM_EXIT_DEBUG;
 -                      *r = EMULATE_USER_EXIT;
 -              } else {
 -                      /*
 -                       * "Certain debug exceptions may clear bit 0-3.  The
 -                       * remaining contents of the DR6 register are never
 -                       * cleared by the processor".
 -                       */
 -                      vcpu->arch.dr6 &= ~15;
 -                      vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
 -                      kvm_queue_exception(vcpu, DB_VECTOR);
 -              }
 +      if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
 +              kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
 +              kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
 +              kvm_run->debug.arch.exception = DB_VECTOR;
 +              kvm_run->exit_reason = KVM_EXIT_DEBUG;
 +              *r = EMULATE_USER_EXIT;
 +      } else {
 +              /*
 +               * "Certain debug exceptions may clear bit 0-3.  The
 +               * remaining contents of the DR6 register are never
 +               * cleared by the processor".
 +               */
 +              vcpu->arch.dr6 &= ~15;
 +              vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
 +              kvm_queue_exception(vcpu, DB_VECTOR);
        }
  }
  
@@@ -5558,17 -5567,7 +5558,17 @@@ int kvm_skip_emulated_instruction(struc
        int r = EMULATE_DONE;
  
        kvm_x86_ops->skip_emulated_instruction(vcpu);
 -      kvm_vcpu_check_singlestep(vcpu, rflags, &r);
 +
 +      /*
 +       * rflags is the old, "raw" value of the flags.  The new value has
 +       * not been saved yet.
 +       *
 +       * This is correct even for TF set by the guest, because "the
 +       * processor will not generate this exception after the instruction
 +       * that sets the TF flag".
 +       */
 +      if (unlikely(rflags & X86_EFLAGS_TF))
 +              kvm_vcpu_do_singlestep(vcpu, &r);
        return r == EMULATE_DONE;
  }
  EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
@@@ -5727,9 -5726,8 +5727,9 @@@ restart
                toggle_interruptibility(vcpu, ctxt->interruptibility);
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
                kvm_rip_write(vcpu, ctxt->eip);
 -              if (r == EMULATE_DONE)
 -                      kvm_vcpu_check_singlestep(vcpu, rflags, &r);
 +              if (r == EMULATE_DONE &&
 +                  (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
 +                      kvm_vcpu_do_singlestep(vcpu, &r);
                if (!ctxt->have_exception ||
                    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
                        __kvm_set_rflags(vcpu, ctxt->eflags);
@@@ -6011,7 -6009,7 +6011,7 @@@ static void kvm_set_mmio_spte_mask(void
                mask &= ~1ull;
  #endif
  
-       kvm_mmu_set_mmio_spte_mask(mask);
+       kvm_mmu_set_mmio_spte_mask(mask, mask);
  }
  
  #ifdef CONFIG_X86_64
@@@ -6733,7 -6731,7 +6733,7 @@@ static int vcpu_enter_guest(struct kvm_
  
        bool req_immediate_exit = false;
  
-       if (vcpu->requests) {
+       if (kvm_request_pending(vcpu)) {
                if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
                        kvm_mmu_unload(vcpu);
                if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
                        kvm_x86_ops->sync_pir_to_irr(vcpu);
        }
  
-       if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
+       if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
            || need_resched() || signal_pending(current)) {
                vcpu->mode = OUTSIDE_GUEST_MODE;
                smp_wmb();
@@@ -8609,7 -8607,8 +8609,7 @@@ bool kvm_arch_can_inject_async_page_pre
        if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
                return true;
        else
 -              return !kvm_event_needs_reinjection(vcpu) &&
 -                      kvm_x86_ops->interrupt_allowed(vcpu);
 +              return kvm_can_do_async_pf(vcpu);
  }
  
  void kvm_arch_start_assignment(struct kvm *kvm)
diff --combined virt/kvm/arm/mmu.c
index 1c44aa35f909dadbc8dd7cf0ab0c54fa81347e21,f2d5b6cf06ae24fc9cf8118432e61c92e120434f..0e1fc75f3585774b7b885cf3f52eda59a4fd299f
@@@ -20,6 -20,7 +20,7 @@@
  #include <linux/kvm_host.h>
  #include <linux/io.h>
  #include <linux/hugetlb.h>
+ #include <linux/sched/signal.h>
  #include <trace/events/kvm.h>
  #include <asm/pgalloc.h>
  #include <asm/cacheflush.h>
@@@ -29,7 -30,6 +30,7 @@@
  #include <asm/kvm_asm.h>
  #include <asm/kvm_emulate.h>
  #include <asm/virt.h>
 +#include <asm/system_misc.h>
  
  #include "trace.h"
  
@@@ -1262,6 -1262,24 +1263,24 @@@ static void coherent_cache_guest_page(s
        __coherent_cache_guest_page(vcpu, pfn, size);
  }
  
+ static void kvm_send_hwpoison_signal(unsigned long address,
+                                    struct vm_area_struct *vma)
+ {
+       siginfo_t info;
+       info.si_signo   = SIGBUS;
+       info.si_errno   = 0;
+       info.si_code    = BUS_MCEERR_AR;
+       info.si_addr    = (void __user *)address;
+       if (is_vm_hugetlb_page(vma))
+               info.si_addr_lsb = huge_page_shift(hstate_vma(vma));
+       else
+               info.si_addr_lsb = PAGE_SHIFT;
+       send_sig_info(SIGBUS, &info, current);
+ }
  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                          struct kvm_memory_slot *memslot, unsigned long hva,
                          unsigned long fault_status)
        smp_rmb();
  
        pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
+       if (pfn == KVM_PFN_ERR_HWPOISON) {
+               kvm_send_hwpoison_signal(hva, vma);
+               return 0;
+       }
        if (is_error_noslot_pfn(pfn))
                return -EFAULT;
  
                kvm_set_pfn_accessed(pfn);
  }
  
 +static bool is_abort_sea(unsigned long fault_status)
 +{
 +      switch (fault_status) {
 +      case FSC_SEA:
 +      case FSC_SEA_TTW0:
 +      case FSC_SEA_TTW1:
 +      case FSC_SEA_TTW2:
 +      case FSC_SEA_TTW3:
 +      case FSC_SECC:
 +      case FSC_SECC_TTW0:
 +      case FSC_SECC_TTW1:
 +      case FSC_SECC_TTW2:
 +      case FSC_SECC_TTW3:
 +              return true;
 +      default:
 +              return false;
 +      }
 +}
 +
  /**
   * kvm_handle_guest_abort - handles all 2nd stage aborts
   * @vcpu:     the VCPU pointer
@@@ -1472,29 -1475,19 +1495,29 @@@ int kvm_handle_guest_abort(struct kvm_v
        gfn_t gfn;
        int ret, idx;
  
 +      fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
 +
 +      fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 +
 +      /*
 +       * The host kernel will handle the synchronous external abort. There
 +       * is no need to pass the error into the guest.
 +       */
 +      if (is_abort_sea(fault_status)) {
 +              if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
 +                      return 1;
 +      }
 +
        is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
        if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) {
                kvm_inject_vabt(vcpu);
                return 1;
        }
  
 -      fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 -
        trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
                              kvm_vcpu_get_hfar(vcpu), fault_ipa);
  
        /* Check the stage-2 fault is trans. fault or write fault */
 -      fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
        if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
            fault_status != FSC_ACCESS) {
                kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",