Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 7 Jul 2017 01:38:31 +0000 (18:38 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 7 Jul 2017 01:38:31 +0000 (18:38 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Jul 2017 01:38:31 +0000 (18:38 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Jul 2017 01:38:31 +0000 (18:38 -0700)
diff --combined Documentation/admin-guide/kernel-parameters.txt

index f24ee1c99412578650eec7bc83e42bcbb3601ed9,aa8341e73b352bb3d091e5f55a7f6612e976fd70..aa1d4409fe0aee0f7af9b7084e8f6e0fd7f697d0
--- 1/Documentation/admin-guide/kernel-parameters.txt
--- 2/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@@ -649,13 -649,6 +649,13 @@@
                         /proc/<pid>/coredump_filter.
                         See also Documentation/filesystems/proc.txt.
   
+ +      coresight_cpu_debug.enable
+ +                      [ARM,ARM64]
+ +                      Format: <bool>
+ +                      Enable/disable the CPU sampling based debugging.
+ +                      0: default value, disable debugging
+ +                      1: enable debugging at boot time
+ +
         cpuidle.off=1   [CPU_IDLE]
                         disable the cpuidle sub-system
   
@@@ -727,8 -720,7 +727,8 @@@
                         See also Documentation/input/joystick-parport.txt
   
         ddebug_query=   [KNL,DYNAMIC_DEBUG] Enable debug messages at early boot
- -                      time. See Documentation/dynamic-debug-howto.txt for
+ +                      time. See
+ +                      Documentation/admin-guide/dynamic-debug-howto.rst for
                         details.  Deprecated, see dyndbg.
   
         debug           [KNL] Enable kernel debugging (events log level).
@@@ -874,15 -866,6 +874,15 @@@
   
         dscc4.setup=    [NET]
   
+ +      dt_cpu_ftrs=    [PPC]
+ +                      Format: {"off" | "known"}
+ +                      Control how the dt_cpu_ftrs device-tree binding is
+ +                      used for CPU feature discovery and setup (if it
+ +                      exists).
+ +                      off: Do not use it, fall back to legacy cpu table.
+ +                      known: Do not pass through unknown features to guests
+ +                      or userspace, only those that the kernel is aware of.
+ +
         dump_apple_properties   [X86]
                         Dump name and content of EFI device properties on
                         x86 Macs.  Useful for driver authors to determine
@@@ -891,8 -874,7 +891,8 @@@
         dyndbg[="val"]          [KNL,DYNAMIC_DEBUG]
         module.dyndbg[="val"]
                         Enable debug messages at boot time.  See
- -                      Documentation/dynamic-debug-howto.txt for details.
+ +                      Documentation/admin-guide/dynamic-debug-howto.rst
+ +                      for details.
   
         nompx           [X86] Disables Intel Memory Protection Extensions.
                         See Documentation/x86/intel_mpx.txt for more
@@@ -963,12 -945,6 +963,12 @@@
                         must already be setup and configured. Options are not
                         yet supported.
   
+ +              owl,<addr>
+ +                      Start an early, polled-mode console on a serial port
+ +                      of an Actions Semi SoC, such as S500 or S900, at the
+ +                      specified address. The serial port must already be
+ +                      setup and configured. Options are not yet supported.
+ +
                 smh     Use ARM semihosting calls for early console.
   
                 s3c2410,<addr>
@@@ -1501,21 -1477,12 +1501,21 @@@
                         in crypto/hash_info.h.
   
         ima_policy=     [IMA]
- -                      The builtin measurement policy to load during IMA
- -                      setup.  Specyfing "tcb" as the value, measures all
- -                      programs exec'd, files mmap'd for exec, and all files
- -                      opened with the read mode bit set by either the
- -                      effective uid (euid=0) or uid=0.
- -                      Format: "tcb"
+ +                      The builtin policies to load during IMA setup.
+ +                      Format: "tcb | appraise_tcb | secure_boot"
+ +
+ +                      The "tcb" policy measures all programs exec'd, files
+ +                      mmap'd for exec, and all files opened with the read
+ +                      mode bit set by either the effective uid (euid=0) or
+ +                      uid=0.
+ +
+ +                      The "appraise_tcb" policy appraises the integrity of
+ +                      all files owned by root. (This is the equivalent
+ +                      of ima_appraise_tcb.)
+ +
+ +                      The "secure_boot" policy appraises the integrity
+ +                      of files (eg. kexec kernel image, kernel modules,
+ +                      firmware, policy, etc) based on file signatures.
   
         ima_tcb         [IMA] Deprecated.  Use ima_policy= instead.
                         Load a policy which meets the needs of the Trusted
@@@ -1862,6 -1829,18 +1862,18 @@@
                         for all guests.
                         Default is 1 (enabled) if in 64-bit or 32-bit PAE mode.
   
+       kvm-arm.vgic_v3_group0_trap=
+                       [KVM,ARM] Trap guest accesses to GICv3 group-0
+                       system registers
+ 
+       kvm-arm.vgic_v3_group1_trap=
+                       [KVM,ARM] Trap guest accesses to GICv3 group-1
+                       system registers
+ 
+       kvm-arm.vgic_v3_common_trap=
+                       [KVM,ARM] Trap guest accesses to GICv3 common
+                       system registers
+ 
         kvm-intel.ept=  [KVM,Intel] Disable extended page tables
                         (virtualized MMU) support on capable Intel chips.
                         Default is 1 (enabled)
@@@ -2160,12 -2139,6 +2172,12 @@@
         memmap=nn[KMG]@ss[KMG]
                         [KNL] Force usage of a specific region of memory.
                         Region of memory to be used is from ss to ss+nn.
+ +                      If @ss[KMG] is omitted, it is equivalent to mem=nn[KMG],
+ +                      which limits max address to nn[KMG].
+ +                      Multiple different regions can be specified,
+ +                      comma delimited.
+ +                      Example:
+ +                              memmap=100M@2G,100M#3G,1G!1024G
   
         memmap=nn[KMG]#ss[KMG]
                         [KNL,ACPI] Mark specific memory as ACPI data.
@@@ -2178,9 -2151,6 +2190,9 @@@
                                  memmap=64K$0x18690000
                                  or
                                  memmap=0x10000$0x18690000
+ +                      Some bootloaders may need an escape character before '$',
+ +                      like Grub2, otherwise '$' and the following number
+ +                      will be eaten.
   
         memmap=nn[KMG]!ss[KMG]
                         [KNL,X86] Mark specific memory as protected.
@@@ -3271,17 -3241,21 +3283,17 @@@
   
         rcutree.gp_cleanup_delay=       [KNL]
                         Set the number of jiffies to delay each step of
- -                      RCU grace-period cleanup.  This only has effect
- -                      when CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP is set.
+ +                      RCU grace-period cleanup.
   
         rcutree.gp_init_delay=  [KNL]
                         Set the number of jiffies to delay each step of
- -                      RCU grace-period initialization.  This only has
- -                      effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT
- -                      is set.
+ +                      RCU grace-period initialization.
   
         rcutree.gp_preinit_delay=       [KNL]
                         Set the number of jiffies to delay each step of
                         RCU grace-period pre-initialization, that is,
                         the propagation of recent CPU-hotplug changes up
- -                      the rcu_node combining tree.  This only has effect
- -                      when CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT is set.
+ +                      the rcu_node combining tree.
   
         rcutree.rcu_fanout_exact= [KNL]
                         Disable autobalancing of the rcu_node combining
@@@ -3357,17 -3331,6 +3369,17 @@@
                         This wake_up() will be accompanied by a
                         WARN_ONCE() splat and an ftrace_dump().
   
+ +      rcuperf.gp_async= [KNL]
+ +                      Measure performance of asynchronous
+ +                      grace-period primitives such as call_rcu().
+ +
+ +      rcuperf.gp_async_max= [KNL]
+ +                      Specify the maximum number of outstanding
+ +                      callbacks per writer thread.  When a writer
+ +                      thread exceeds this limit, it invokes the
+ +                      corresponding flavor of rcu_barrier() to allow
+ +                      previously posted callbacks to drain.
+ +
         rcuperf.gp_exp= [KNL]
                         Measure performance of expedited synchronous
                         grace-period primitives.
@@@ -3395,22 -3358,17 +3407,22 @@@
         rcuperf.perf_runnable= [BOOT]
                         Start rcuperf running at boot time.
   
+ +      rcuperf.perf_type= [KNL]
+ +                      Specify the RCU implementation to test.
+ +
         rcuperf.shutdown= [KNL]
                         Shut the system down after performance tests
                         complete.  This is useful for hands-off automated
                         testing.
   
- -      rcuperf.perf_type= [KNL]
- -                      Specify the RCU implementation to test.
- -
         rcuperf.verbose= [KNL]
                         Enable additional printk() statements.
   
+ +      rcuperf.writer_holdoff= [KNL]
+ +                      Write-side holdoff between grace periods,
+ +                      in microseconds.  The default of zero says
+ +                      no holdoff.
+ +
         rcutorture.cbflood_inter_holdoff= [KNL]
                         Set holdoff time (jiffies) between successive
                         callback-flood tests.
@@@ -3848,15 -3806,6 +3860,15 @@@
         spia_pedr=
         spia_peddr=
   
+ +      srcutree.counter_wrap_check [KNL]
+ +                      Specifies how frequently to check for
+ +                      grace-period sequence counter wrap for the
+ +                      srcu_data structure's ->srcu_gp_seq_needed field.
+ +                      The greater the number of bits set in this kernel
+ +                      parameter, the less frequently counter wrap will
+ +                      be checked for.  Note that the bottom two bits
+ +                      are ignored.
+ +
         srcutree.exp_holdoff [KNL]
                         Specifies how many nanoseconds must elapse
                         since the end of the last SRCU grace period for
@@@ -3865,13 -3814,6 +3877,13 @@@
                         expediting.  Set to zero to disable automatic
                         expediting.
   
+ +      stack_guard_gap=        [MM]
+ +                      override the default stack gap protection. The value
+ +                      is in page units and it defines how many pages prior
+ +                      to (for stacks growing down) resp. after (for stacks
+ +                      growing up) the main stack are reserved for no other
+ +                      mapping. Default value is 256 pages.
+ +
         stacktrace      [FTRACE]
                         Enabled the stack tracer on boot up.
   
diff --combined MAINTAINERS

index 75ac9dc85804993fa345bd447856ff5b5369cbd4,cc65b44b1226c5ff92a793ca6b9e00f8d0220be2..1c1d106a3347fbf2847db7f84536e6a1879d8114
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -155,7 -155,7 +155,7 @@@ S: Maintaine
   F:    drivers/scsi/53c700*
   
   6LOWPAN GENERIC (BTLE/IEEE 802.15.4)
- -M:    Alexander Aring <aar@pengutronix.de>
+ +M:    Alexander Aring <alex.aring@gmail.com>
   M:    Jukka Rissanen <jukka.rissanen@linux.intel.com>
   L:    linux-bluetooth@vger.kernel.org
   L:    linux-wpan@vger.kernel.org
@@@ -478,7 -478,7 +478,7 @@@ L: linux-hwmon@vger.kernel.or
   S:    Maintained
   F:    Documentation/hwmon/ads1015
   F:    drivers/hwmon/ads1015.c
- -F:    include/linux/i2c/ads1015.h
+ +F:    include/linux/platform_data/ads1015.h
   
   ADT746X FAN DRIVER
   M:    Colin Leroy <colin@colino.net>
@@@ -1036,22 -1036,6 +1036,22 @@@ S:    Maintaine
   F:    drivers/amba/
   F:    include/linux/amba/bus.h
   
+ +ARM/ACTIONS SEMI ARCHITECTURE
+ +M:    Andreas Färber <afaerber@suse.de>
+ +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+ +S:    Maintained
+ +N:    owl
+ +F:    arch/arm/mach-actions/
+ +F:    arch/arm/boot/dts/owl-*
+ +F:    arch/arm64/boot/dts/actions/
+ +F:    drivers/clocksource/owl-*
+ +F:    drivers/soc/actions/
+ +F:    include/dt-bindings/power/owl-*
+ +F:    include/linux/soc/actions/
+ +F:    Documentation/devicetree/bindings/arm/actions.txt
+ +F:    Documentation/devicetree/bindings/power/actions,owl-sps.txt
+ +F:    Documentation/devicetree/bindings/timer/actions,owl-timer.txt
+ +
   ARM/ADS SPHERE MACHINE SUPPORT
   M:    Lennert Buytenhek <kernel@wantstofly.org>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -1188,7 -1172,7 +1188,7 @@@ N:      clps711
   
   ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE
   M:    Hartley Sweeten <hsweeten@visionengravers.com>
- -M:    Ryan Mallon <rmallon@gmail.com>
+ +M:    Alexander Sverdlin <alexander.sverdlin@gmail.com>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Maintained
   F:    arch/arm/mach-ep93xx/
@@@ -1223,9 -1207,7 +1223,9 @@@ L:      linux-arm-kernel@lists.infradead.or
   S:    Maintained
   F:    drivers/hwtracing/coresight/*
   F:    Documentation/trace/coresight.txt
+ +F:    Documentation/trace/coresight-cpu-debug.txt
   F:    Documentation/devicetree/bindings/arm/coresight.txt
+ +F:    Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt
   F:    Documentation/ABI/testing/sysfs-bus-coresight-devices-*
   F:    tools/perf/arch/arm/util/pmu.c
   F:    tools/perf/arch/arm/util/auxtrace.c
@@@ -1507,16 -1489,13 +1507,16 @@@ M:   Gregory Clement <gregory.clement@fre
   M:    Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Maintained
- -F:    arch/arm/mach-mvebu/
- -F:    drivers/rtc/rtc-armada38x.c
   F:    arch/arm/boot/dts/armada*
   F:    arch/arm/boot/dts/kirkwood*
+ +F:    arch/arm/configs/mvebu_*_defconfig
+ +F:    arch/arm/mach-mvebu/
   F:    arch/arm64/boot/dts/marvell/armada*
   F:    drivers/cpufreq/mvebu-cpufreq.c
- -F:    arch/arm/configs/mvebu_*_defconfig
+ +F:    drivers/irqchip/irq-armada-370-xp.c
+ +F:    drivers/irqchip/irq-mvebu-*
+ +F:    drivers/pinctrl/mvebu/
+ +F:    drivers/rtc/rtc-armada38x.c
   
   ARM/Marvell Berlin SoC support
   M:    Jisheng Zhang <jszhang@marvell.com>
@@@ -1682,6 -1661,7 +1682,6 @@@ F:      arch/arm/mach-qcom
   F:    arch/arm64/boot/dts/qcom/*
   F:    drivers/i2c/busses/i2c-qup.c
   F:    drivers/clk/qcom/
- -F:    drivers/pinctrl/qcom/
   F:    drivers/dma/qcom/
   F:    drivers/soc/qcom/
   F:    drivers/spi/spi-qup.c
@@@ -1697,13 -1677,6 +1697,13 @@@ M:    Lennert Buytenhek <kernel@wantstofly
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Maintained
   
+ +ARM/REALTEK ARCHITECTURE
+ +M:    Andreas Färber <afaerber@suse.de>
+ +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+ +S:    Maintained
+ +F:    arch/arm64/boot/dts/realtek/
+ +F:    Documentation/devicetree/bindings/arm/realtek.txt
+ +
   ARM/RENESAS ARM64 ARCHITECTURE
   M:    Simon Horman <horms@verge.net.au>
   M:    Magnus Damm <magnus.damm@gmail.com>
@@@ -1737,7 -1710,6 +1737,7 @@@ L:      linux-rockchip@lists.infradead.or
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip.git
   S:    Maintained
   F:    arch/arm/boot/dts/rk3*
+ +F:    arch/arm/boot/dts/rv1108*
   F:    arch/arm/mach-rockchip/
   F:    drivers/clk/rockchip/
   F:    drivers/i2c/busses/i2c-rk3x.c
@@@ -1749,6 -1721,7 +1749,6 @@@ N:      rockchi
   ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
   M:    Kukjin Kim <kgene@kernel.org>
   M:    Krzysztof Kozlowski <krzk@kernel.org>
- -R:    Javier Martinez Canillas <javier@osg.samsung.com>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
   Q:    https://patchwork.kernel.org/project/linux-samsung-soc/list/
@@@ -1802,12 -1775,11 +1802,12 @@@ F:   arch/arm/plat-samsung/s5p-dev-mfc.
   F:    drivers/media/platform/s5p-mfc/
   
   ARM/SAMSUNG S5P SERIES HDMI CEC SUBSYSTEM SUPPORT
- -M:    Kyungmin Park <kyungmin.park@samsung.com>
- -L:    linux-arm-kernel@lists.infradead.org
+ +M:    Marek Szyprowski <m.szyprowski@samsung.com>
+ +L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
   L:    linux-media@vger.kernel.org
   S:    Maintained
- -F:    drivers/staging/media/platform/s5p-cec/
+ +F:    drivers/media/platform/s5p-cec/
+ +F:    Documentation/devicetree/bindings/media/s5p-cec.txt
   
   ARM/SAMSUNG S5P SERIES JPEG CODEC SUPPORT
   M:    Andrzej Pietrasiewicz <andrzej.p@samsung.com>
@@@ -1857,6 -1829,7 +1857,6 @@@ F:      drivers/edac/altera_edac
   ARM/STI ARCHITECTURE
   M:    Patrice Chotard <patrice.chotard@st.com>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
- -L:    kernel@stlinux.com
   W:    http://www.stlinux.com
   S:    Maintained
   F:    arch/arm/mach-sti/
@@@ -1870,8 -1843,8 +1870,8 @@@ F:      drivers/i2c/busses/i2c-st.
   F:    drivers/media/rc/st_rc.c
   F:    drivers/media/platform/sti/c8sectpfe/
   F:    drivers/mmc/host/sdhci-st.c
- -F:    drivers/phy/phy-miphy28lp.c
- -F:    drivers/phy/phy-stih407-usb.c
+ +F:    drivers/phy/st/phy-miphy28lp.c
+ +F:    drivers/phy/st/phy-stih407-usb.c
   F:    drivers/pinctrl/pinctrl-st.c
   F:    drivers/remoteproc/st_remoteproc.c
   F:    drivers/remoteproc/st_slim_rproc.c
@@@ -2349,15 -2322,6 +2349,15 @@@ F:    Documentation/devicetree/bindings/in
   F:    drivers/input/touchscreen/atmel_mxt_ts.c
   F:    include/linux/platform_data/atmel_mxt_ts.h
   
+ +ATOMIC INFRASTRUCTURE
+ +M:    Will Deacon <will.deacon@arm.com>
+ +M:    Peter Zijlstra <peterz@infradead.org>
+ +R:    Boqun Feng <boqun.feng@gmail.com>
+ +L:    linux-kernel@vger.kernel.org
+ +S:    Maintained
+ +F:    arch/*/include/asm/atomic*.h
+ +F:    include/*/atomic*.h
+ +
   ATTO EXPRESSSAS SAS/SATA RAID SCSI DRIVER
   M:    Bradley Grove <linuxdrivers@attotech.com>
   L:    linux-scsi@vger.kernel.org
@@@ -2721,6 -2685,7 +2721,6 @@@ N:      kon
   F:    arch/arm/mach-bcm/
   
   BROADCOM BCM2835 ARM ARCHITECTURE
- -M:    Lee Jones <lee@kernel.org>
   M:    Eric Anholt <eric@anholt.net>
   M:    Stefan Wahren <stefan.wahren@i2se.com>
   L:    linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -2820,11 -2785,8 +2820,11 @@@ BROADCOM BRCM80211 IEEE802.11n WIRELES
   M:    Arend van Spriel <arend.vanspriel@broadcom.com>
   M:    Franky Lin <franky.lin@broadcom.com>
   M:    Hante Meuleman <hante.meuleman@broadcom.com>
+ +M:    Chi-Hsien Lin <chi-hsien.lin@cypress.com>
+ +M:    Wright Feng <wright.feng@cypress.com>
   L:    linux-wireless@vger.kernel.org
   L:    brcm80211-dev-list.pdl@broadcom.com
+ +L:    brcm80211-dev-list@cypress.com
   S:    Supported
   F:    drivers/net/wireless/broadcom/brcm80211/
   
@@@ -3002,7 -2964,7 +3002,7 @@@ F:      sound/pci/oxygen
   
   C6X ARCHITECTURE
   M:    Mark Salter <msalter@redhat.com>
- -M:    Aurelien Jacquiot <a-jacquiot@ti.com>
+ +M:    Aurelien Jacquiot <jacquiot.aurelien@gmail.com>
   L:    linux-c6x-dev@linux-c6x.org
   W:    http://www.linux-c6x.org/wiki/index.php/Main_Page
   S:    Maintained
@@@ -3175,7 -3137,6 +3175,7 @@@ F:      include/media/cec.
   F:    include/media/cec-notifier.h
   F:    include/uapi/linux/cec.h
   F:    include/uapi/linux/cec-funcs.h
+ +F:    Documentation/devicetree/bindings/media/cec.txt
   
   CELL BROADBAND ENGINE ARCHITECTURE
   M:    Arnd Bergmann <arnd@arndb.de>
@@@ -3625,6 -3586,7 +3625,6 @@@ T:      git git://git.kernel.org/pub/scm/lin
   S:    Maintained
   F:    Documentation/crypto/
   F:    Documentation/devicetree/bindings/crypto/
- -F:    Documentation/DocBook/crypto-API.tmpl
   F:    arch/*/crypto/
   F:    crypto/
   F:    drivers/crypto/
@@@ -3751,13 -3713,6 +3751,13 @@@ S:    Supporte
   F:    drivers/infiniband/hw/cxgb4/
   F:    include/uapi/rdma/cxgb4-abi.h
   
+ +CXGB4 CRYPTO DRIVER (chcr)
+ +M:    Harsh Jain <harsh@chelsio.com>
+ +L:    linux-crypto@vger.kernel.org
+ +W:    http://www.chelsio.com
+ +S:    Supported
+ +F:    drivers/crypto/chelsio
+ +
   CXGB4VF ETHERNET DRIVER (CXGB4VF)
   M:    Casey Leedom <leedom@chelsio.com>
   L:    netdev@vger.kernel.org
@@@ -4188,7 -4143,8 +4188,7 @@@ M:      Jonathan Corbet <corbet@lwn.net
   L:    linux-doc@vger.kernel.org
   S:    Maintained
   F:    Documentation/
- -F:    scripts/docproc.c
- -F:    scripts/kernel-doc*
+ +F:    scripts/kernel-doc
   X:    Documentation/ABI/
   X:    Documentation/devicetree/
   X:    Documentation/acpi
@@@ -4736,13 -4692,6 +4736,13 @@@ S:    Maintaine
   F:    drivers/media/usb/dvb-usb-v2/dvb_usb*
   F:    drivers/media/usb/dvb-usb-v2/usb_urb.c
   
+ +DONGWOON DW9714 LENS VOICE COIL DRIVER
+ +M:    Sakari Ailus <sakari.ailus@linux.intel.com>
+ +L:    linux-media@vger.kernel.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Maintained
+ +F:    drivers/media/i2c/dw9714.c
+ +
   DYNAMIC DEBUG
   M:    Jason Baron <jbaron@akamai.com>
   S:    Maintained
@@@ -5673,7 -5622,7 +5673,7 @@@ F:      scripts/get_maintainer.p
   
   GENWQE (IBM Generic Workqueue Card)
   M:    Frank Haverkamp <haver@linux.vnet.ibm.com>
- -M:    Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
+ +M:    Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
   S:    Supported
   F:    drivers/misc/genwqe/
   
@@@ -5718,6 -5667,7 +5718,6 @@@ F:      tools/testing/selftests/gpio
   
   GPIO SUBSYSTEM
   M:    Linus Walleij <linus.walleij@linaro.org>
- -M:    Alexandre Courbot <gnurou@gmail.com>
   L:    linux-gpio@vger.kernel.org
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
   S:    Maintained
@@@ -6477,7 -6427,7 +6477,7 @@@ F:      Documentation/cdrom/ide-c
   F:    drivers/ide/ide-cd*
   
   IEEE 802.15.4 SUBSYSTEM
- -M:    Alexander Aring <aar@pengutronix.de>
+ +M:    Alexander Aring <alex.aring@gmail.com>
   M:    Stefan Schmidt <stefan@osg.samsung.com>
   L:    linux-wpan@vger.kernel.org
   W:    http://wpan.cakelab.org/
@@@ -6531,13 -6481,6 +6531,13 @@@ F:    Documentation/ABI/testing/sysfs-bus-
   F:    Documentation/devicetree/bindings/iio/adc/envelope-detector.txt
   F:    drivers/iio/adc/envelope-detector.c
   
+ +IIO MULTIPLEXER
+ +M:    Peter Rosin <peda@axentia.se>
+ +L:    linux-iio@vger.kernel.org
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/iio/multiplexer/iio-mux.txt
+ +F:    drivers/iio/multiplexer/iio-mux.c
+ +
   IIO SUBSYSTEM AND DRIVERS
   M:    Jonathan Cameron <jic23@kernel.org>
   R:    Hartmut Knaack <knaack.h@gmx.de>
@@@ -6666,12 -6609,6 +6666,12 @@@ F:    Documentation/input/multi-touch-prot
   F:    drivers/input/input-mt.c
   K:    \b(ABS|SYN)_MT_
   
+ +INSIDE SECURE CRYPTO DRIVER
+ +M:    Antoine Tenart <antoine.tenart@free-electrons.com>
+ +F:    drivers/crypto/inside-secure/
+ +S:    Maintained
+ +L:    linux-crypto@vger.kernel.org
+ +
   INTEL ASoC BDW/HSW DRIVERS
   M:    Jie Yang <yang.jie@linux.intel.com>
   L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
@@@ -6801,7 -6738,6 +6801,7 @@@ F:      Documentation/networking/i40e.tx
   F:    Documentation/networking/i40evf.txt
   F:    drivers/net/ethernet/intel/
   F:    drivers/net/ethernet/intel/*/
+ +F:    include/linux/avf/virtchnl.h
   
   INTEL RDMA RNIC DRIVER
   M:     Faisal Latif <faisal.latif@intel.com>
@@@ -7207,7 -7143,7 +7207,7 @@@ S:      Maintaine
   F:    drivers/media/platform/rcar_jpu.c
   
   JSM Neo PCI based serial card
- -M:    Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
+ +M:    Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
   L:    linux-serial@vger.kernel.org
   S:    Maintained
   F:    drivers/tty/serial/jsm/
@@@ -7350,7 -7286,7 +7350,7 @@@ F:      arch/powerpc/kvm
   
   KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
   M:    Christian Borntraeger <borntraeger@de.ibm.com>
- M:    Cornelia Huck <cornelia.huck@de.ibm.com>
+ M:    Cornelia Huck <cohuck@redhat.com>
   L:    linux-s390@vger.kernel.org
   W:    http://www.ibm.com/developerworks/linux/linux390/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
@@@ -7413,7 -7349,7 +7413,7 @@@ KEYS/KEYRINGS
   M:    David Howells <dhowells@redhat.com>
   L:    keyrings@vger.kernel.org
   S:    Maintained
- -F:    Documentation/security/keys.txt
+ +F:    Documentation/security/keys/core.rst
   F:    include/linux/key.h
   F:    include/linux/key-type.h
   F:    include/linux/keyctl.h
@@@ -7427,7 -7363,7 +7427,7 @@@ M:      Mimi Zohar <zohar@linux.vnet.ibm.com
   L:    linux-security-module@vger.kernel.org
   L:    keyrings@vger.kernel.org
   S:    Supported
- -F:    Documentation/security/keys-trusted-encrypted.txt
+ +F:    Documentation/security/keys/trusted-encrypted.rst
   F:    include/keys/trusted-type.h
   F:    security/keys/trusted.c
   F:    security/keys/trusted.h
@@@ -7438,7 -7374,7 +7438,7 @@@ M:      David Safford <safford@us.ibm.com
   L:    linux-security-module@vger.kernel.org
   L:    keyrings@vger.kernel.org
   S:    Supported
- -F:    Documentation/security/keys-trusted-encrypted.txt
+ +F:    Documentation/security/keys/trusted-encrypted.rst
   F:    include/keys/encrypted-type.h
   F:    security/keys/encrypted-keys/
   
@@@ -7448,7 -7384,7 +7448,7 @@@ W:      http://kgdb.wiki.kernel.org
   L:    kgdb-bugreport@lists.sourceforge.net
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jwessel/kgdb.git
   S:    Maintained
- -F:    Documentation/DocBook/kgdb.tmpl
+ +F:    Documentation/dev-tools/kgdb.rst
   F:    drivers/misc/kgdbts.c
   F:    drivers/tty/serial/kgdboc.c
   F:    include/linux/kdb.h
@@@ -7602,15 -7538,6 +7602,15 @@@ S:    Maintaine
   F:    drivers/ata/pata_*.c
   F:    drivers/ata/ata_generic.c
   
+ +LIBATA PATA FARADAY FTIDE010 AND GEMINI SATA BRIDGE DRIVERS
+ +M:    Linus Walleij <linus.walleij@linaro.org>
+ +L:    linux-ide@vger.kernel.org
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+ +S:    Maintained
+ +F:    drivers/ata/pata_ftide010.c
+ +F:    drivers/ata/sata_gemini.c
+ +F:    drivers/ata/sata_gemini.h
+ +
   LIBATA SATA AHCI PLATFORM devices support
   M:    Hans de Goede <hdegoede@redhat.com>
   M:    Tejun Heo <tj@kernel.org>
@@@ -7629,7 -7556,7 +7629,7 @@@ S:      Maintaine
   F:    drivers/ata/sata_promise.*
   
   LIBLOCKDEP
- -M:    Sasha Levin <sasha.levin@oracle.com>
+ +M:    Sasha Levin <alexander.levin@verizon.com>
   S:    Maintained
   F:    tools/lib/lockdep/
   
@@@ -7780,7 -7707,7 +7780,7 @@@ F:      drivers/platform/x86/hp_accel.
   
   LIVE PATCHING
   M:    Josh Poimboeuf <jpoimboe@redhat.com>
- -M:    Jessica Yu <jeyu@redhat.com>
+ +M:    Jessica Yu <jeyu@kernel.org>
   M:    Jiri Kosina <jikos@kernel.org>
   M:    Miroslav Benes <mbenes@suse.cz>
   R:    Petr Mladek <pmladek@suse.com>
@@@ -8051,12 -7978,6 +8051,12 @@@ S:    Maintaine
   F:    drivers/net/ethernet/marvell/mv643xx_eth.*
   F:    include/linux/mv643xx.h
   
+ +MARVELL MV88X3310 PHY DRIVER
+ +M:    Russell King <rmk@armlinux.org.uk>
+ +L:    netdev@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/net/phy/marvell10g.c
+ +
   MARVELL MVNETA ETHERNET DRIVER
   M:    Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
   L:    netdev@vger.kernel.org
@@@ -8110,16 -8031,6 +8110,16 @@@ S:    Maintaine
   F:    Documentation/hwmon/max20751
   F:    drivers/hwmon/max20751.c
   
+ +MAX2175 SDR TUNER DRIVER
+ +M:    Ramesh Shanmugasundaram <ramesh.shanmugasundaram@bp.renesas.com>
+ +L:    linux-media@vger.kernel.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/media/i2c/max2175.txt
+ +F:    Documentation/media/v4l-drivers/max2175.rst
+ +F:    drivers/media/i2c/max2175*
+ +F:    include/uapi/linux/max2175.h
+ +
   MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER
   L:    linux-hwmon@vger.kernel.org
   S:    Orphan
@@@ -8150,11 -8061,11 +8150,11 @@@ S:   Supporte
   F:    drivers/power/supply/max14577_charger.c
   F:    drivers/power/supply/max77693_charger.c
   
- -MAXIM MAX77802 MULTIFUNCTION PMIC DEVICE DRIVERS
- -M:    Javier Martinez Canillas <javier@osg.samsung.com>
+ +MAXIM MAX77802 PMIC REGULATOR DEVICE DRIVER
+ +M:    Javier Martinez Canillas <javier@dowhile0.org>
   L:    linux-kernel@vger.kernel.org
   S:    Supported
- -F:    drivers/*/*max77802*.c
+ +F:    drivers/regulator/max77802-regulator.c
   F:    Documentation/devicetree/bindings/*/*max77802.txt
   F:    include/dt-bindings/*/*max77802.h
   
@@@ -8200,27 -8111,6 +8200,27 @@@ L:    linux-iio@vger.kernel.or
   S:    Maintained
   F:    drivers/iio/dac/cio-dac.c
   
+ +MEDIA DRIVERS FOR RENESAS - DRIF
+ +M:    Ramesh Shanmugasundaram <ramesh.shanmugasundaram@bp.renesas.com>
+ +L:    linux-media@vger.kernel.org
+ +L:    linux-renesas-soc@vger.kernel.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Supported
+ +F:    Documentation/devicetree/bindings/media/renesas,drif.txt
+ +F:    drivers/media/platform/rcar_drif.c
+ +
+ +MEDIA DRIVERS FOR FREESCALE IMX
+ +M:    Steve Longerbeam <slongerbeam@gmail.com>
+ +M:    Philipp Zabel <p.zabel@pengutronix.de>
+ +L:    linux-media@vger.kernel.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/media/imx.txt
+ +F:    Documentation/media/v4l-drivers/imx.rst
+ +F:    drivers/staging/media/imx/
+ +F:    include/linux/imx-media.h
+ +F:    include/media/imx.h
+ +
   MEDIA DRIVERS FOR RENESAS - FCP
   M:    Laurent Pinchart <laurent.pinchart@ideasonboard.com>
   L:    linux-media@vger.kernel.org
@@@ -8378,11 -8268,6 +8378,11 @@@ L:    linux-wireless@vger.kernel.or
   S:    Maintained
   F:    drivers/net/wireless/mediatek/mt7601u/
   
+ +MEDIATEK RANDOM NUMBER GENERATOR SUPPORT
+ +M:      Sean Wang <sean.wang@mediatek.com>
+ +S:      Maintained
+ +F:      drivers/char/hw_random/mtk-rng.c
+ +
   MEGACHIPS STDPXXXX-GE-B850V3-FW LVDS/DP++ BRIDGES
   M:    Peter Senna Tschudin <peter.senna@collabora.com>
   M:    Martin Donnelly <martin.donnelly@ge.com>
@@@ -8426,26 -8311,6 +8426,26 @@@ W:    http://www.mellanox.co
   Q:    http://patchwork.ozlabs.org/project/netdev/list/
   F:    drivers/net/ethernet/mellanox/mlx5/core/en_*
   
+ +MELLANOX ETHERNET INNOVA DRIVER
+ +M:    Ilan Tayari <ilant@mellanox.com>
+ +R:    Boris Pismenny <borisp@mellanox.com>
+ +L:    netdev@vger.kernel.org
+ +S:    Supported
+ +W:    http://www.mellanox.com
+ +Q:    http://patchwork.ozlabs.org/project/netdev/list/
+ +F:    drivers/net/ethernet/mellanox/mlx5/core/fpga/*
+ +F:    include/linux/mlx5/mlx5_ifc_fpga.h
+ +
+ +MELLANOX ETHERNET INNOVA IPSEC DRIVER
+ +M:    Ilan Tayari <ilant@mellanox.com>
+ +R:    Boris Pismenny <borisp@mellanox.com>
+ +L:    netdev@vger.kernel.org
+ +S:    Supported
+ +W:    http://www.mellanox.com
+ +Q:    http://patchwork.ozlabs.org/project/netdev/list/
+ +F:    drivers/net/ethernet/mellanox/mlx5/core/en_ipsec/*
+ +F:    drivers/net/ethernet/mellanox/mlx5/core/ipsec*
+ +
   MELLANOX ETHERNET SWITCH DRIVERS
   M:    Jiri Pirko <jiri@mellanox.com>
   M:    Ido Schimmel <idosch@mellanox.com>
@@@ -8455,14 -8320,6 +8455,14 @@@ W:    http://www.mellanox.co
   Q:    http://patchwork.ozlabs.org/project/netdev/list/
   F:    drivers/net/ethernet/mellanox/mlxsw/
   
+ +MELLANOX FIRMWARE FLASH LIBRARY (mlxfw)
+ +M:    Yotam Gigi <yotamg@mellanox.com>
+ +L:    netdev@vger.kernel.org
+ +S:    Supported
+ +W:    http://www.mellanox.com
+ +Q:    http://patchwork.ozlabs.org/project/netdev/list/
+ +F:    drivers/net/ethernet/mellanox/mlxfw/
+ +
   MELLANOX MLXCPLD I2C AND MUX DRIVER
   M:    Vadim Pasternak <vadimp@mellanox.com>
   M:    Michael Shych <michaelsh@mellanox.com>
@@@ -8581,7 -8438,7 +8581,7 @@@ T:      git git://git.monstr.eu/linux-2.6-mi
   S:    Supported
   F:    arch/microblaze/
   
- -MICROCHIP / ATMEL AT91 / AT32 SERIAL DRIVER
+ +MICROCHIP / ATMEL AT91 SERIAL DRIVER
   M:    Richard Genoud <richard.genoud@gmail.com>
   S:    Maintained
   F:    drivers/tty/serial/atmel_serial.c
@@@ -8604,16 -8461,6 +8604,16 @@@ F:    drivers/media/platform/atmel/atmel-i
   F:    drivers/media/platform/atmel/atmel-isc-regs.h
   F:    devicetree/bindings/media/atmel-isc.txt
   
+ +MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER
+ +M:    Woojung Huh <Woojung.Huh@microchip.com>
+ +M:    Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
+ +L:    netdev@vger.kernel.org
+ +S:    Maintained
+ +F:    net/dsa/tag_ksz.c
+ +F:    drivers/net/dsa/microchip/*
+ +F:    include/linux/platform_data/microchip-ksz.h
+ +F:    Documentation/devicetree/bindings/net/dsa/ksz.txt
+ +
   MICROCHIP USB251XB DRIVER
   M:    Richard Leitner <richard.leitner@skidata.com>
   L:    linux-usb@vger.kernel.org
@@@ -8661,7 -8508,7 +8661,7 @@@ S:      Odd Fixe
   F:    drivers/media/radio/radio-miropcm20*
   
   MELLANOX MLX4 core VPI driver
- -M:    Yishai Hadas <yishaih@mellanox.com>
+ +M:    Tariq Toukan <tariqt@mellanox.com>
   L:    netdev@vger.kernel.org
   L:    linux-rdma@vger.kernel.org
   W:    http://www.mellanox.com
@@@ -8669,6 -8516,7 +8669,6 @@@ Q:      http://patchwork.ozlabs.org/project/
   S:    Supported
   F:    drivers/net/ethernet/mellanox/mlx4/
   F:    include/linux/mlx4/
- -F:    include/uapi/rdma/mlx4-abi.h
   
   MELLANOX MLX4 IB driver
   M:    Yishai Hadas <yishaih@mellanox.com>
@@@ -8678,7 -8526,6 +8678,7 @@@ Q:      http://patchwork.kernel.org/project/
   S:    Supported
   F:    drivers/infiniband/hw/mlx4/
   F:    include/linux/mlx4/
+ +F:    include/uapi/rdma/mlx4-abi.h
   
   MELLANOX MLX5 core VPI driver
   M:    Saeed Mahameed <saeedm@mellanox.com>
@@@ -8691,6 -8538,7 +8691,6 @@@ Q:      http://patchwork.ozlabs.org/project/
   S:    Supported
   F:    drivers/net/ethernet/mellanox/mlx5/core/
   F:    include/linux/mlx5/
- -F:    include/uapi/rdma/mlx5-abi.h
   
   MELLANOX MLX5 IB driver
   M:    Matan Barak <matanb@mellanox.com>
@@@ -8701,7 -8549,6 +8701,7 @@@ Q:      http://patchwork.kernel.org/project/
   S:    Supported
   F:    drivers/infiniband/hw/mlx5/
   F:    include/linux/mlx5/
+ +F:    include/uapi/rdma/mlx5-abi.h
   
   MELEXIS MLX90614 DRIVER
   M:    Crt Mori <cmo@melexis.com>
@@@ -8741,7 -8588,7 +8741,7 @@@ S:      Maintaine
   F:    drivers/media/dvb-frontends/mn88473*
   
   MODULE SUPPORT
- -M:    Jessica Yu <jeyu@redhat.com>
+ +M:    Jessica Yu <jeyu@kernel.org>
   M:    Rusty Russell <rusty@rustcorp.com.au>
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git modules-next
   S:    Maintained
@@@ -8869,15 -8716,6 +8869,15 @@@ S:    Orpha
   F:    drivers/mmc/host/mmc_spi.c
   F:    include/linux/spi/mmc_spi.h
   
+ +MULTIPLEXER SUBSYSTEM
+ +M:    Peter Rosin <peda@axentia.se>
+ +S:    Maintained
+ +F:    Documentation/ABI/testing/mux/sysfs-class-mux*
+ +F:    Documentation/devicetree/bindings/mux/
+ +F:    include/linux/dt-bindings/mux/
+ +F:    include/linux/mux/
+ +F:    drivers/mux/
+ +
   MULTISOUND SOUND DRIVER
   M:    Andrew Veliath <andrewtv@usa.net>
   S:    Maintained
@@@ -9106,16 -8944,6 +9106,16 @@@ F:    net/ipv6
   F:    include/net/ip*
   F:    arch/x86/net/*
   
+ +NETWORKING [TLS]
+ +M:    Ilya Lesokhin <ilyal@mellanox.com>
+ +M:    Aviad Yehezkel <aviadye@mellanox.com>
+ +M:    Dave Watson <davejwatson@fb.com>
+ +L:    netdev@vger.kernel.org
+ +S:    Maintained
+ +F:    net/tls/*
+ +F:    include/uapi/linux/tls.h
+ +F:    include/net/tls.h
+ +
   NETWORKING [IPSEC]
   M:    Steffen Klassert <steffen.klassert@secunet.com>
   M:    Herbert Xu <herbert@gondor.apana.org.au>
@@@ -9197,6 -9025,9 +9197,6 @@@ F:      include/uapi/linux/nfc.
   F:    drivers/nfc/
   F:    include/linux/platform_data/nfcmrvl.h
   F:    include/linux/platform_data/nxp-nci.h
- -F:    include/linux/platform_data/pn544.h
- -F:    include/linux/platform_data/st21nfca.h
- -F:    include/linux/platform_data/st-nci.h
   F:    Documentation/devicetree/bindings/net/nfc/
   
   NFS, SUNRPC, AND LOCKD CLIENTS
@@@ -9588,13 -9419,6 +9588,13 @@@ M:    Harald Welte <laforge@gnumonks.org
   S:    Maintained
   F:    drivers/char/pcmcia/cm4040_cs.*
   
+ +OMNIVISION OV5640 SENSOR DRIVER
+ +M:    Steve Longerbeam <slongerbeam@gmail.com>
+ +L:    linux-media@vger.kernel.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Maintained
+ +F:    drivers/media/i2c/ov5640.c
+ +
   OMNIVISION OV5647 SENSOR DRIVER
   M:    Ramiro Oliveira <roliveir@synopsys.com>
   L:    linux-media@vger.kernel.org
@@@ -9610,13 -9434,6 +9610,13 @@@ S:    Maintaine
   F:    drivers/media/i2c/ov7670.c
   F:    Documentation/devicetree/bindings/media/i2c/ov7670.txt
   
+ +OMNIVISION OV13858 SENSOR DRIVER
+ +M:    Sakari Ailus <sakari.ailus@linux.intel.com>
+ +L:    linux-media@vger.kernel.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Maintained
+ +F:    drivers/media/i2c/ov13858.c
+ +
   ONENAND FLASH DRIVER
   M:    Kyungmin Park <kyungmin.park@samsung.com>
   L:    linux-mtd@lists.infradead.org
@@@ -10267,13 -10084,6 +10267,13 @@@ M: Heikki Krogerus <heikki.krogerus@lin
   S:    Maintained
   F:    drivers/pinctrl/intel/
   
+ +PIN CONTROLLER - QUALCOMM
+ +M:    Bjorn Andersson <bjorn.andersson@linaro.org>
+ +S:    Maintained
+ +L:    linux-arm-msm@vger.kernel.org
+ +F:    Documentation/devicetree/bindings/pinctrl/qcom,*.txt
+ +F:    drivers/pinctrl/qcom/
+ +
   PIN CONTROLLER - RENESAS
   M:    Laurent Pinchart <laurent.pinchart@ideasonboard.com>
   M:    Geert Uytterhoeven <geert+renesas@glider.be>
@@@ -10345,7 -10155,7 +10345,7 @@@ T:   git git://git.kernel.org/pub/scm/lin
   S:    Maintained
   F:    Documentation/hwmon/pmbus
   F:    drivers/hwmon/pmbus/
- -F:    include/linux/i2c/pmbus.h
+ +F:    include/linux/pmbus.h
   
   PMC SIERRA MaxRAID DRIVER
   L:    linux-scsi@vger.kernel.org
@@@ -10640,7 -10450,7 +10640,7 @@@ S:   Orpha
   
   PXA RTC DRIVER
   M:    Robert Jarzmik <robert.jarzmik@free.fr>
- -L:    rtc-linux@googlegroups.com
+ +L:    linux-rtc@vger.kernel.org
   S:    Maintained
   
   QAT DRIVER
@@@ -10735,7 -10545,6 +10735,7 @@@ M:   Laurentiu Tudor <laurentiu.tudor@nxp
   L:    linux-kernel@vger.kernel.org
   S:    Maintained
   F:    drivers/staging/fsl-mc/
+ +F:    Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
   
   QT1010 MEDIA DRIVER
   M:    Antti Palosaari <crope@iki.fi>
@@@ -10775,14 -10584,6 +10775,14 @@@ T: git git://git.kernel.org/pub/scm/lin
   S:    Supported
   F:    arch/hexagon/
   
+ +QUALCOMM VENUS VIDEO ACCELERATOR DRIVER
+ +M:    Stanimir Varbanov <stanimir.varbanov@linaro.org>
+ +L:    linux-media@vger.kernel.org
+ +L:    linux-arm-msm@vger.kernel.org
+ +T:    git git://linuxtv.org/media_tree.git
+ +S:    Maintained
+ +F:    drivers/media/platform/qcom/venus/
+ +
   QUALCOMM WCN36XX WIRELESS DRIVER
   M:    Eugene Krasnikov <k.eugene.e@gmail.com>
   L:    wcn36xx@lists.infradead.org
@@@ -10798,14 -10599,6 +10798,14 @@@ L: qemu-devel@nongnu.or
   S:    Maintained
   F:    drivers/firmware/qemu_fw_cfg.c
   
+ +QUANTENNA QTNFMAC WIRELESS DRIVER
+ +M:   Igor Mitsyanko <imitsyanko@quantenna.com>
+ +M:   Avinash Patil <avinashp@quantenna.com>
+ +M:   Sergey Matyukevich <smatyukevich@quantenna.com>
+ +L:   linux-wireless@vger.kernel.org
+ +S:   Maintained
+ +F:   drivers/net/wireless/quantenna
+ +
   RADOS BLOCK DEVICE (RBD)
   M:    Ilya Dryomov <idryomov@gmail.com>
   M:    Sage Weil <sage@redhat.com>
@@@ -10964,7 -10757,7 +10964,7 @@@ X:   kernel/torture.
   REAL TIME CLOCK (RTC) SUBSYSTEM
   M:    Alessandro Zummo <a.zummo@towertech.it>
   M:    Alexandre Belloni <alexandre.belloni@free-electrons.com>
- -L:    rtc-linux@googlegroups.com
+ +L:    linux-rtc@vger.kernel.org
   Q:    http://patchwork.ozlabs.org/project/rtc-linux/list/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
   S:    Maintained
@@@ -11039,11 -10832,11 +11039,11 @@@ L:        linux-iio@vger.kernel.or
   S:    Supported
   F:    drivers/iio/adc/rcar_gyro_adc.c
   
- -RENESAS USB2 PHY DRIVER
+ +RENESAS USB PHY DRIVER
   M:    Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
   L:    linux-renesas-soc@vger.kernel.org
   S:    Maintained
- -F:    drivers/phy/phy-rcar-gen3-usb2.c
+ +F:    drivers/phy/renesas/phy-rcar-gen3-usb*.c
   
   RESET CONTROLLER FRAMEWORK
   M:    Philipp Zabel <p.zabel@pengutronix.de>
@@@ -11200,7 -10993,7 +11200,7 @@@ S:   Supporte
   F:    arch/s390/
   F:    drivers/s390/
   F:    Documentation/s390/
- -F:    Documentation/DocBook/s390*
+ +F:    Documentation/driver-api/s390-drivers.rst
   
   S390 COMMON I/O LAYER
   M:    Sebastian Ott <sebott@linux.vnet.ibm.com>
@@@ -11268,7 -11061,7 +11268,7 @@@ S:   Supporte
   F:    drivers/iommu/s390-iommu.c
   
   S390 VFIO-CCW DRIVER
- M:    Cornelia Huck <cornelia.huck@de.ibm.com>
+ M:    Cornelia Huck <cohuck@redhat.com>
   M:    Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
   L:    linux-s390@vger.kernel.org
   L:    kvm@vger.kernel.org
@@@ -11445,12 -11238,12 +11445,12 @@@ L:        linux-kernel@vger.kernel.or
   S:    Supported
   F:    Documentation/devicetree/bindings/phy/samsung-phy.txt
   F:    Documentation/phy/samsung-usb2.txt
- -F:    drivers/phy/phy-exynos4210-usb2.c
- -F:    drivers/phy/phy-exynos4x12-usb2.c
- -F:    drivers/phy/phy-exynos5250-usb2.c
- -F:    drivers/phy/phy-s5pv210-usb2.c
- -F:    drivers/phy/phy-samsung-usb2.c
- -F:    drivers/phy/phy-samsung-usb2.h
+ +F:    drivers/phy/samsung/phy-exynos4210-usb2.c
+ +F:    drivers/phy/samsung/phy-exynos4x12-usb2.c
+ +F:    drivers/phy/samsung/phy-exynos5250-usb2.c
+ +F:    drivers/phy/samsung/phy-s5pv210-usb2.c
+ +F:    drivers/phy/samsung/phy-samsung-usb2.c
+ +F:    drivers/phy/samsung/phy-samsung-usb2.h
   
   SERIAL DRIVERS
   M:    Greg Kroah-Hartman <gregkh@linuxfoundation.org>
@@@ -11475,6 -11268,7 +11475,6 @@@ F:   drivers/media/rc/serial_ir.
   
   STI CEC DRIVER
   M:    Benjamin Gaignard <benjamin.gaignard@linaro.org>
- -L:    kernel@stlinux.com
   S:    Maintained
   F:    drivers/staging/media/st-cec/
   F:    Documentation/devicetree/bindings/media/stih-cec.txt
@@@ -11534,9 -11328,6 +11534,9 @@@ F:   Documentation/tee.tx
   
   THUNDERBOLT DRIVER
   M:    Andreas Noever <andreas.noever@gmail.com>
+ +M:    Michael Jamet <michael.jamet@intel.com>
+ +M:    Mika Westerberg <mika.westerberg@linux.intel.com>
+ +M:    Yehezkel Bernat <yehezkel.bernat@intel.com>
   S:    Maintained
   F:    drivers/thunderbolt/
   
@@@ -11564,14 -11355,6 +11564,14 @@@ F: kernel/time/alarmtimer.
   F:    kernel/time/ntp.c
   F:    tools/testing/selftests/timers/
   
+ +TI TRF7970A NFC DRIVER
+ +M:    Mark Greer <mgreer@animalcreek.com>
+ +L:    linux-wireless@vger.kernel.org
+ +L:    linux-nfc@lists.01.org (moderated for non-subscribers)
+ +S:    Supported
+ +F:    drivers/nfc/trf7970a.c
+ +F:    Documentation/devicetree/bindings/net/nfc/trf7970a.txt
+ +
   SC1200 WDT DRIVER
   M:    Zwane Mwaikambo <zwanem@gmail.com>
   S:    Maintained
@@@ -11712,7 -11495,6 +11712,7 @@@ F:   kernel/seccomp.
   F:    include/uapi/linux/seccomp.h
   F:    include/linux/seccomp.h
   F:    tools/testing/selftests/seccomp/*
+ +F:    Documentation/userspace-api/seccomp_filter.rst
   K:    \bsecure_computing
   K:    \bTIF_SECCOMP\b
   
@@@ -11771,7 -11553,6 +11771,7 @@@ S:   Supporte
   F:    include/linux/selinux*
   F:    security/selinux/
   F:    scripts/selinux/
+ +F:    Documentation/admin-guide/LSM/SELinux.rst
   
   APPARMOR SECURITY MODULE
   M:    John Johansen <john.johansen@canonical.com>
@@@ -11780,21 -11561,18 +11780,21 @@@ W:        apparmor.wiki.kernel.or
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jj/apparmor-dev.git
   S:    Supported
   F:    security/apparmor/
+ +F:    Documentation/admin-guide/LSM/apparmor.rst
   
   LOADPIN SECURITY MODULE
   M:    Kees Cook <keescook@chromium.org>
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git lsm/loadpin
   S:    Supported
   F:    security/loadpin/
+ +F:    Documentation/admin-guide/LSM/LoadPin.rst
   
   YAMA SECURITY MODULE
   M:    Kees Cook <keescook@chromium.org>
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git yama/tip
   S:    Supported
   F:    security/yama/
+ +F:    Documentation/admin-guide/LSM/Yama.rst
   
   SENSABLE PHANTOM
   M:    Jiri Slaby <jirislaby@gmail.com>
@@@ -12000,7 -11778,6 +12000,7 @@@ T:   git git://git.kernel.org/pub/scm/lin
   S:    Supported
   F:    arch/arm/mach-davinci/
   F:    drivers/i2c/busses/i2c-davinci.c
+ +F:    arch/arm/boot/dts/da850*
   
   TI DAVINCI SERIES MEDIA DRIVER
   M:    "Lad, Prabhakar" <prabhakar.csengg@gmail.com>
@@@ -12097,7 -11874,7 +12097,7 @@@ L:   linux-security-module@vger.kernel.or
   W:    http://schaufler-ca.com
   T:    git git://github.com/cschaufler/smack-next
   S:    Maintained
- -F:    Documentation/security/Smack.txt
+ +F:    Documentation/admin-guide/LSM/Smack.rst
   F:    security/smack/
   
   DRIVERS FOR ADAPTIVE VOLTAGE SCALING (AVS)
@@@ -12187,9 -11964,8 +12187,9 @@@ F:   drivers/leds/leds-net48xx.
   
   SOFTLOGIC 6x10 MPEG CODEC
   M:    Bluecherry Maintainers <maintainers@bluecherrydvr.com>
+ +M:    Anton Sviridenko <anton@corp.bluecherry.net>
   M:    Andrey Utkin <andrey.utkin@corp.bluecherry.net>
- -M:    Andrey Utkin <andrey.krieger.utkin@gmail.com>
+ +M:    Andrey Utkin <andrey_utkin@fastmail.com>
   M:    Ismael Luceno <ismael@iodev.co.uk>
   L:    linux-media@vger.kernel.org
   S:    Supported
@@@ -12857,8 -12633,6 +12857,8 @@@ F:   include/linux/soc/ti/ti_sci_protocol
   F:    Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
   F:    include/dt-bindings/genpd/k2g.h
   F:    drivers/soc/ti/ti_sci_pm_domains.c
+ +F:    Documentation/devicetree/bindings/reset/ti,sci-reset.txt
+ +F:    drivers/reset/reset-ti-sci.c
   
   THANKO'S RAREMONO AM/FM/SW RADIO RECEIVER USB DRIVER
   M:    Hans Verkuil <hverkuil@xs4all.nl>
@@@ -13108,7 -12882,7 +13108,7 @@@ M:   Wolfram Sang <wsa+renesas@sang-engin
   L:    linux-mmc@vger.kernel.org
   S:    Supported
   F:    drivers/mmc/host/tmio_mmc*
- -F:    drivers/mmc/host/sh_mobile_sdhi.c
+ +F:    drivers/mmc/host/renesas_sdhi*
   F:    include/linux/mfd/tmio.h
   
   TMP401 HARDWARE MONITOR DRIVER
@@@ -13137,7 -12911,6 +13137,7 @@@ F:   Documentation/media/v4l-drivers/tm60
   
   TW5864 VIDEO4LINUX DRIVER
   M:    Bluecherry Maintainers <maintainers@bluecherrydvr.com>
+ +M:    Anton Sviridenko <anton@corp.bluecherry.net>
   M:    Andrey Utkin <andrey.utkin@corp.bluecherry.net>
   M:    Andrey Utkin <andrey_utkin@fastmail.com>
   L:    linux-media@vger.kernel.org
@@@ -13690,17 -13463,6 +13690,17 @@@ W: http://en.wikipedia.org/wiki/Util-li
   T:    git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git
   S:    Maintained
   
+ +UUID HELPERS
+ +M:    Christoph Hellwig <hch@lst.de>
+ +R:    Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ +L:    linux-kernel@vger.kernel.org
+ +T:    git git://git.infradead.org/users/hch/uuid.git
+ +F:    lib/uuid.c
+ +F:    lib/test_uuid.c
+ +F:    include/linux/uuid.h
+ +F:    include/uapi/linux/uuid.h
+ +S:    Maintained
+ +
   UVESAFB DRIVER
   M:    Michal Januszewski <spock@gentoo.org>
   L:    linux-fbdev@vger.kernel.org
@@@ -13763,12 -13525,6 +13763,12 @@@ S: Maintaine
   F:    drivers/media/v4l2-core/videobuf2-*
   F:    include/media/videobuf2-*
   
+ +VIDEO MULTIPLEXER DRIVER
+ +M:    Philipp Zabel <p.zabel@pengutronix.de>
+ +L:    linux-media@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/media/platform/video-mux.c
+ +
   VIRTIO AND VHOST VSOCK DRIVER
   M:    Stefan Hajnoczi <stefanha@redhat.com>
   L:    kvm@vger.kernel.org
@@@ -13814,7 -13570,7 +13814,7 @@@ F:   include/uapi/linux/virtio_*.
   F:    drivers/crypto/virtio/
   
   VIRTIO DRIVERS FOR S390
- M:    Cornelia Huck <cornelia.huck@de.ibm.com>
+ M:    Cornelia Huck <cohuck@redhat.com>
   M:    Halil Pasic <pasic@linux.vnet.ibm.com>
   L:    linux-s390@vger.kernel.org
   L:    virtualization@lists.linux-foundation.org
@@@ -14013,7 -13769,6 +14013,7 @@@ M:   Evgeniy Polyakov <zbr@ioremap.net
   S:    Maintained
   F:    Documentation/w1/
   F:    drivers/w1/
+ +F:    include/linux/w1.h
   
   W83791D HARDWARE MONITORING DRIVER
   M:    Marc Hulsman <m.hulsman@tudelft.nl>
@@@ -14106,7 -13861,7 +14106,7 @@@ S:   Odd fixe
   F:    drivers/net/wireless/wl3501*
   
   WOLFSON MICROELECTRONICS DRIVERS
- -L:    patches@opensource.wolfsonmicro.com
+ +L:    patches@opensource.cirrus.com
   T:    git https://github.com/CirrusLogic/linux-drivers.git
   W:    https://github.com/CirrusLogic/linux-drivers/wiki
   S:    Supported
diff --combined arch/arm64/Kconfig

index 9f7a934ff707a00b8495ab0ca533fb86468d3aba,6252365b0c96d3bde1dbb3e54ed49b14d53e790f..192208ea284224dd73c5bd663d5b21e4c6caf347
--- 1/arch/arm64/Kconfig
--- 2/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@@ -3,7 -3,6 +3,7 @@@ config ARM6
         select ACPI_CCA_REQUIRED if ACPI
         select ACPI_GENERIC_GSI if ACPI
         select ACPI_GTDT if ACPI
+ +      select ACPI_IORT if ACPI
         select ACPI_REDUCED_HARDWARE_ONLY if ACPI
         select ACPI_MCFG if ACPI
         select ACPI_SPCR_TABLE if ACPI
@@@ -20,9 -19,7 +20,9 @@@
         select ARCH_HAS_STRICT_KERNEL_RWX
         select ARCH_HAS_STRICT_MODULE_RWX
         select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+ +      select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA
         select ARCH_USE_CMPXCHG_LOCKREF
+ +      select ARCH_SUPPORTS_MEMORY_FAILURE
         select ARCH_SUPPORTS_ATOMIC_RMW
         select ARCH_SUPPORTS_NUMA_BALANCING
         select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
@@@ -44,7 -41,6 +44,7 @@@
         select EDAC_SUPPORT
         select FRAME_POINTER
         select GENERIC_ALLOCATOR
+ +      select GENERIC_ARCH_TOPOLOGY
         select GENERIC_CLOCKEVENTS
         select GENERIC_CLOCKEVENTS_BROADCAST
         select GENERIC_CPU_AUTOPROBE
@@@ -96,7 -92,6 +96,7 @@@
         select HAVE_IRQ_TIME_ACCOUNTING
         select HAVE_MEMBLOCK
         select HAVE_MEMBLOCK_NODE_MAP if NUMA
+ +      select HAVE_NMI if ACPI_APEI_SEA
         select HAVE_PATA_PLATFORM
         select HAVE_PERF_EVENTS
         select HAVE_PERF_REGS
@@@ -210,7 -205,7 +210,7 @@@ config GENERIC_CALIBRATE_DELA
   config ZONE_DMA
         def_bool y
   
- -config HAVE_GENERIC_RCU_GUP
+ +config HAVE_GENERIC_GUP
         def_bool y
   
   config ARCH_DMA_ADDR_T_64BIT
@@@ -249,9 -244,6 +249,9 @@@ config PGTABLE_LEVEL
   config ARCH_SUPPORTS_UPROBES
         def_bool y
   
+ +config ARCH_PROC_KCORE_TEXT
+ +      def_bool y
+ +
   source "init/Kconfig"
   
   source "kernel/Kconfig.freezer"
@@@ -488,6 -480,17 +488,17 @@@ config CAVIUM_ERRATUM_2745
   
           If unsure, say Y.
   
+ config CAVIUM_ERRATUM_30115
+       bool "Cavium erratum 30115: Guest may disable interrupts in host"
+       default y
+       help
+         On ThunderX T88 pass 1.x through 2.2, T81 pass 1.0 through
+         1.2, and T83 Pass 1.0, KVM guest execution may disable
+         interrupts in host. Trapping both GICv3 group-0 and group-1
+         accesses sidesteps the issue.
+ 
+         If unsure, say Y.
+ 
   config QCOM_FALKOR_ERRATUM_1003
         bool "Falkor E1003: Incorrect translation due to ASID change"
         default y
@@@ -990,7 -993,7 +1001,7 @@@ config RANDOMIZE_BAS
   
   config RANDOMIZE_MODULE_REGION_FULL
         bool "Randomize the module region independently from the core kernel"
- -      depends on RANDOMIZE_BASE && !DYNAMIC_FTRACE
+ +      depends on RANDOMIZE_BASE
         default y
         help
           Randomizes the location of the module region without considering the
@@@ -1092,6 -1095,10 +1103,6 @@@ config SYSVIPC_COMPA
         def_bool y
         depends on COMPAT && SYSVIPC
   
- -config KEYS_COMPAT
- -      def_bool y
- -      depends on COMPAT && KEYS
- -
   endmenu
   
   menu "Power management options"
diff --combined arch/arm64/include/asm/esr.h

index 28bf02efce76d7adf8ae514ce10d87c0f093d659,e7d8e281ff62f7780bf2bae77788c7e237a9887b..8cabd57b634832ca7c4df7d8fee39549233167dd
--- 1/arch/arm64/include/asm/esr.h
--- 2/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@@ -19,6 -19,7 +19,7 @@@
   #define __ASM_ESR_H
   
   #include <asm/memory.h>
+ #include <asm/sysreg.h>
   
   #define ESR_ELx_EC_UNKNOWN    (0x00)
   #define ESR_ELx_EC_WFx                (0x01)
@@@ -83,7 -84,6 +84,7 @@@
   #define ESR_ELx_WNR           (UL(1) << 6)
   
   /* Shared ISS field definitions for Data/Instruction aborts */
+ +#define ESR_ELx_FnV           (UL(1) << 10)
   #define ESR_ELx_EA            (UL(1) << 9)
   #define ESR_ELx_S1PTW         (UL(1) << 7)
   
@@@ -182,6 -182,29 +183,29 @@@
   #define ESR_ELx_SYS64_ISS_SYS_CNTFRQ  (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \
                                          ESR_ELx_SYS64_ISS_DIR_READ)
   
+ #define esr_sys64_to_sysreg(e)                                        \
+       sys_reg((((e) & ESR_ELx_SYS64_ISS_OP0_MASK) >>          \
+                ESR_ELx_SYS64_ISS_OP0_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >>          \
+                ESR_ELx_SYS64_ISS_OP1_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >>          \
+                ESR_ELx_SYS64_ISS_CRN_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >>          \
+                ESR_ELx_SYS64_ISS_CRM_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >>          \
+                ESR_ELx_SYS64_ISS_OP2_SHIFT))
+ 
+ #define esr_cp15_to_sysreg(e)                                 \
+       sys_reg(3,                                              \
+               (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >>          \
+                ESR_ELx_SYS64_ISS_OP1_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >>          \
+                ESR_ELx_SYS64_ISS_CRN_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >>          \
+                ESR_ELx_SYS64_ISS_CRM_SHIFT),                  \
+               (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >>          \
+                ESR_ELx_SYS64_ISS_OP2_SHIFT))
+ 
   #ifndef __ASSEMBLY__
   #include <asm/types.h>
   
diff --combined arch/powerpc/kvm/book3s_hv.c

index 773b35d16a0b61ddc3b13f02fd8a7eaca9d4976b,c6313c5d331c0a960f278b7fca34b7b1f1e0d1e9..0b436df746fcb094d8bd8b3c928c0862d6a90df5
--- 1/arch/powerpc/kvm/book3s_hv.c
--- 2/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@@ -46,6 -46,8 +46,8 @@@
   #include <linux/of.h>
   
   #include <asm/reg.h>
+ #include <asm/ppc-opcode.h>
+ #include <asm/disassemble.h>
   #include <asm/cputable.h>
   #include <asm/cacheflush.h>
   #include <asm/tlbflush.h>
@@@ -645,6 -647,7 +647,7 @@@ static void kvmppc_create_dtl_entry(str
         unsigned long stolen;
         unsigned long core_stolen;
         u64 now;
+       unsigned long flags;
   
         dt = vcpu->arch.dtl_ptr;
         vpa = vcpu->arch.vpa.pinned_addr;
@@@ -652,10 -655,10 +655,10 @@@
         core_stolen = vcore_stolen_time(vc, now);
         stolen = core_stolen - vcpu->arch.stolen_logged;
         vcpu->arch.stolen_logged = core_stolen;
-       spin_lock_irq(&vcpu->arch.tbacct_lock);
+       spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
         stolen += vcpu->arch.busy_stolen;
         vcpu->arch.busy_stolen = 0;
-       spin_unlock_irq(&vcpu->arch.tbacct_lock);
+       spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
         if (!dt || !vpa)
                 return;
         memset(dt, 0, sizeof(struct dtl_entry));
@@@ -675,6 -678,26 +678,26 @@@
         vcpu->arch.dtl.dirty = true;
   }
   
+ /* See if there is a doorbell interrupt pending for a vcpu */
+ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
+ {
+       int thr;
+       struct kvmppc_vcore *vc;
+ 
+       if (vcpu->arch.doorbell_request)
+               return true;
+       /*
+        * Ensure that the read of vcore->dpdes comes after the read
+        * of vcpu->doorbell_request.  This barrier matches the
+        * lwsync in book3s_hv_rmhandlers.S just before the
+        * fast_guest_return label.
+        */
+       smp_rmb();
+       vc = vcpu->arch.vcore;
+       thr = vcpu->vcpu_id - vc->first_vcpuid;
+       return !!(vc->dpdes & (1 << thr));
+ }
+ 
   static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
   {
         if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
@@@ -926,6 -949,101 +949,101 @@@ static int kvmppc_emulate_debug_inst(st
         }
   }
   
+ static void do_nothing(void *x)
+ {
+ }
+ 
+ static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
+ {
+       int thr, cpu, pcpu, nthreads;
+       struct kvm_vcpu *v;
+       unsigned long dpdes;
+ 
+       nthreads = vcpu->kvm->arch.emul_smt_mode;
+       dpdes = 0;
+       cpu = vcpu->vcpu_id & ~(nthreads - 1);
+       for (thr = 0; thr < nthreads; ++thr, ++cpu) {
+               v = kvmppc_find_vcpu(vcpu->kvm, cpu);
+               if (!v)
+                       continue;
+               /*
+                * If the vcpu is currently running on a physical cpu thread,
+                * interrupt it in order to pull it out of the guest briefly,
+                * which will update its vcore->dpdes value.
+                */
+               pcpu = READ_ONCE(v->cpu);
+               if (pcpu >= 0)
+                       smp_call_function_single(pcpu, do_nothing, NULL, 1);
+               if (kvmppc_doorbell_pending(v))
+                       dpdes |= 1 << thr;
+       }
+       return dpdes;
+ }
+ 
+ /*
+  * On POWER9, emulate doorbell-related instructions in order to
+  * give the guest the illusion of running on a multi-threaded core.
+  * The instructions emulated are msgsndp, msgclrp, mfspr TIR,
+  * and mfspr DPDES.
+  */
+ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
+ {
+       u32 inst, rb, thr;
+       unsigned long arg;
+       struct kvm *kvm = vcpu->kvm;
+       struct kvm_vcpu *tvcpu;
+ 
+       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+               return EMULATE_FAIL;
+       if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
+               return RESUME_GUEST;
+       if (get_op(inst) != 31)
+               return EMULATE_FAIL;
+       rb = get_rb(inst);
+       thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
+       switch (get_xop(inst)) {
+       case OP_31_XOP_MSGSNDP:
+               arg = kvmppc_get_gpr(vcpu, rb);
+               if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
+                       break;
+               arg &= 0x3f;
+               if (arg >= kvm->arch.emul_smt_mode)
+                       break;
+               tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
+               if (!tvcpu)
+                       break;
+               if (!tvcpu->arch.doorbell_request) {
+                       tvcpu->arch.doorbell_request = 1;
+                       kvmppc_fast_vcpu_kick_hv(tvcpu);
+               }
+               break;
+       case OP_31_XOP_MSGCLRP:
+               arg = kvmppc_get_gpr(vcpu, rb);
+               if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
+                       break;
+               vcpu->arch.vcore->dpdes = 0;
+               vcpu->arch.doorbell_request = 0;
+               break;
+       case OP_31_XOP_MFSPR:
+               switch (get_sprn(inst)) {
+               case SPRN_TIR:
+                       arg = thr;
+                       break;
+               case SPRN_DPDES:
+                       arg = kvmppc_read_dpdes(vcpu);
+                       break;
+               default:
+                       return EMULATE_FAIL;
+               }
+               kvmppc_set_gpr(vcpu, get_rt(inst), arg);
+               break;
+       default:
+               return EMULATE_FAIL;
+       }
+       kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+       return RESUME_GUEST;
+ }
+ 
   static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
                                  struct task_struct *tsk)
   {
@@@ -971,15 -1089,20 +1089,20 @@@
                 r = RESUME_GUEST;
                 break;
         case BOOK3S_INTERRUPT_MACHINE_CHECK:
-               /*
-                * Deliver a machine check interrupt to the guest.
-                * We have to do this, even if the host has handled the
-                * machine check, because machine checks use SRR0/1 and
-                * the interrupt might have trashed guest state in them.
-                */
-               kvmppc_book3s_queue_irqprio(vcpu,
-                                           BOOK3S_INTERRUPT_MACHINE_CHECK);
-               r = RESUME_GUEST;
+               /* Exit to guest with KVM_EXIT_NMI as exit reason */
+               run->exit_reason = KVM_EXIT_NMI;
+               run->hw.hardware_exit_reason = vcpu->arch.trap;
+               /* Clear out the old NMI status from run->flags */
+               run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
+               /* Now set the NMI status */
+               if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
+                       run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
+               else
+                       run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
+ 
+               r = RESUME_HOST;
+               /* Print the MCE event to host console. */
+               machine_check_print_event_info(&vcpu->arch.mce_evt, false);
                 break;
         case BOOK3S_INTERRUPT_PROGRAM:
         {
@@@ -1048,12 -1171,19 +1171,19 @@@
                 break;
         /*
          * This occurs if the guest (kernel or userspace), does something that
-        * is prohibited by HFSCR.  We just generate a program interrupt to
-        * the guest.
+        * is prohibited by HFSCR.
+        * On POWER9, this could be a doorbell instruction that we need
+        * to emulate.
+        * Otherwise, we just generate a program interrupt to the guest.
          */
         case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
-               kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-               r = RESUME_GUEST;
+               r = EMULATE_FAIL;
+               if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
+                       r = kvmppc_emulate_doorbell_instr(vcpu);
+               if (r == EMULATE_FAIL) {
+                       kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+                       r = RESUME_GUEST;
+               }
                 break;
         case BOOK3S_INTERRUPT_HV_RM_HARD:
                 r = RESUME_PASSTHROUGH;
@@@ -1143,6 -1273,12 +1273,12 @@@ static void kvmppc_set_lpcr(struct kvm_
         mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
         if (cpu_has_feature(CPU_FTR_ARCH_207S))
                 mask |= LPCR_AIL;
+       /*
+        * On POWER9, allow userspace to enable large decrementer for the
+        * guest, whether or not the host has it enabled.
+        */
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               mask |= LPCR_LD;
   
         /* Broken 32-bit version of LPCR must not clear top bits */
         if (preserve_top32)
@@@ -1611,7 -1747,7 +1747,7 @@@ static struct kvmppc_vcore *kvmppc_vcor
         init_swait_queue_head(&vcore->wq);
         vcore->preempt_tb = TB_NIL;
         vcore->lpcr = kvm->arch.lpcr;
-       vcore->first_vcpuid = core * threads_per_vcore();
+       vcore->first_vcpuid = core * kvm->arch.smt_mode;
         vcore->kvm = kvm;
         INIT_LIST_HEAD(&vcore->preempt_list);
   
@@@ -1770,14 -1906,10 +1906,10 @@@ static struct kvm_vcpu *kvmppc_core_vcp
                                                    unsigned int id)
   {
         struct kvm_vcpu *vcpu;
-       int err = -EINVAL;
+       int err;
         int core;
         struct kvmppc_vcore *vcore;
   
-       core = id / threads_per_vcore();
-       if (core >= KVM_MAX_VCORES)
-               goto out;
- 
         err = -ENOMEM;
         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
         if (!vcpu)
@@@ -1808,6 -1940,20 +1940,20 @@@
         vcpu->arch.busy_preempt = TB_NIL;
         vcpu->arch.intr_msr = MSR_SF | MSR_ME;
   
+       /*
+        * Set the default HFSCR for the guest from the host value.
+        * This value is only used on POWER9.
+        * On POWER9 DD1, TM doesn't work, so we make sure to
+        * prevent the guest from using it.
+        * On POWER9, we want to virtualize the doorbell facility, so we
+        * turn off the HFSCR bit, which causes those instructions to trap.
+        */
+       vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
+       if (!cpu_has_feature(CPU_FTR_TM))
+               vcpu->arch.hfscr &= ~HFSCR_TM;
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               vcpu->arch.hfscr &= ~HFSCR_MSGP;
+ 
         kvmppc_mmu_book3s_hv_init(vcpu);
   
         vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@@ -1815,11 -1961,17 +1961,17 @@@
         init_waitqueue_head(&vcpu->arch.cpu_run);
   
         mutex_lock(&kvm->lock);
-       vcore = kvm->arch.vcores[core];
-       if (!vcore) {
-               vcore = kvmppc_vcore_create(kvm, core);
-               kvm->arch.vcores[core] = vcore;
-               kvm->arch.online_vcores++;
+       vcore = NULL;
+       err = -EINVAL;
+       core = id / kvm->arch.smt_mode;
+       if (core < KVM_MAX_VCORES) {
+               vcore = kvm->arch.vcores[core];
+               if (!vcore) {
+                       err = -ENOMEM;
+                       vcore = kvmppc_vcore_create(kvm, core);
+                       kvm->arch.vcores[core] = vcore;
+                       kvm->arch.online_vcores++;
+               }
         }
         mutex_unlock(&kvm->lock);
   
@@@ -1847,6 -1999,43 +1999,43 @@@ out
         return ERR_PTR(err);
   }
   
+ static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
+                             unsigned long flags)
+ {
+       int err;
+       int esmt = 0;
+ 
+       if (flags)
+               return -EINVAL;
+       if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
+               return -EINVAL;
+       if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+               /*
+                * On POWER8 (or POWER7), the threading mode is "strict",
+                * so we pack smt_mode vcpus per vcore.
+                */
+               if (smt_mode > threads_per_subcore)
+                       return -EINVAL;
+       } else {
+               /*
+                * On POWER9, the threading mode is "loose",
+                * so each vcpu gets its own vcore.
+                */
+               esmt = smt_mode;
+               smt_mode = 1;
+       }
+       mutex_lock(&kvm->lock);
+       err = -EBUSY;
+       if (!kvm->arch.online_vcores) {
+               kvm->arch.smt_mode = smt_mode;
+               kvm->arch.emul_smt_mode = esmt;
+               err = 0;
+       }
+       mutex_unlock(&kvm->lock);
+ 
+       return err;
+ }
+ 
   static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
   {
         if (vpa->pinned_addr)
@@@ -1897,7 -2086,7 +2086,7 @@@ static void kvmppc_end_cede(struct kvm_
         }
   }
   
- extern void __kvmppc_vcore_entry(void);
+ extern int __kvmppc_vcore_entry(void);
   
   static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
                                    struct kvm_vcpu *vcpu)
@@@ -1962,10 -2151,6 +2151,6 @@@ static void kvmppc_release_hwthread(in
         tpaca->kvm_hstate.kvm_split_mode = NULL;
   }
   
- static void do_nothing(void *x)
- {
- }
- 
   static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
   {
         int i;
@@@ -1983,11 -2168,35 +2168,35 @@@
                         smp_call_function_single(cpu + i, do_nothing, NULL, 1);
   }
   
+ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
+ {
+       struct kvm *kvm = vcpu->kvm;
+ 
+       /*
+        * With radix, the guest can do TLB invalidations itself,
+        * and it could choose to use the local form (tlbiel) if
+        * it is invalidating a translation that has only ever been
+        * used on one vcpu.  However, that doesn't mean it has
+        * only ever been used on one physical cpu, since vcpus
+        * can move around between pcpus.  To cope with this, when
+        * a vcpu moves from one pcpu to another, we need to tell
+        * any vcpus running on the same core as this vcpu previously
+        * ran to flush the TLB.  The TLB is shared between threads,
+        * so we use a single bit in .need_tlb_flush for all 4 threads.
+        */
+       if (vcpu->arch.prev_cpu != pcpu) {
+               if (vcpu->arch.prev_cpu >= 0 &&
+                   cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
+                   cpu_first_thread_sibling(pcpu))
+                       radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
+               vcpu->arch.prev_cpu = pcpu;
+       }
+ }
+ 
   static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
   {
         int cpu;
         struct paca_struct *tpaca;
-       struct kvmppc_vcore *mvc = vc->master_vcore;
         struct kvm *kvm = vc->kvm;
   
         cpu = vc->pcpu;
@@@ -1997,36 -2206,16 +2206,16 @@@
                         vcpu->arch.timer_running = 0;
                 }
                 cpu += vcpu->arch.ptid;
-               vcpu->cpu = mvc->pcpu;
+               vcpu->cpu = vc->pcpu;
                 vcpu->arch.thread_cpu = cpu;
- 
-               /*
-                * With radix, the guest can do TLB invalidations itself,
-                * and it could choose to use the local form (tlbiel) if
-                * it is invalidating a translation that has only ever been
-                * used on one vcpu.  However, that doesn't mean it has
-                * only ever been used on one physical cpu, since vcpus
-                * can move around between pcpus.  To cope with this, when
-                * a vcpu moves from one pcpu to another, we need to tell
-                * any vcpus running on the same core as this vcpu previously
-                * ran to flush the TLB.  The TLB is shared between threads,
-                * so we use a single bit in .need_tlb_flush for all 4 threads.
-                */
-               if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
-                       if (vcpu->arch.prev_cpu >= 0 &&
-                           cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
-                           cpu_first_thread_sibling(cpu))
-                               radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
-                       vcpu->arch.prev_cpu = cpu;
-               }
                 cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
         }
         tpaca = &paca[cpu];
         tpaca->kvm_hstate.kvm_vcpu = vcpu;
-       tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
+       tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
         /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
         smp_wmb();
-       tpaca->kvm_hstate.kvm_vcore = mvc;
+       tpaca->kvm_hstate.kvm_vcore = vc;
         if (cpu != smp_processor_id())
                 kvmppc_ipi_thread(cpu);
   }
@@@ -2155,8 -2344,7 +2344,7 @@@ struct core_info 
         int             max_subcore_threads;
         int             total_threads;
         int             subcore_threads[MAX_SUBCORES];
-       struct kvm      *subcore_vm[MAX_SUBCORES];
-       struct list_head vcs[MAX_SUBCORES];
+       struct kvmppc_vcore *vc[MAX_SUBCORES];
   };
   
   /*
@@@ -2167,17 -2355,12 +2355,12 @@@ static int subcore_thread_map[MAX_SUBCO
   
   static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
   {
-       int sub;
- 
         memset(cip, 0, sizeof(*cip));
         cip->n_subcores = 1;
         cip->max_subcore_threads = vc->num_threads;
         cip->total_threads = vc->num_threads;
         cip->subcore_threads[0] = vc->num_threads;
-       cip->subcore_vm[0] = vc->kvm;
-       for (sub = 0; sub < MAX_SUBCORES; ++sub)
-               INIT_LIST_HEAD(&cip->vcs[sub]);
-       list_add_tail(&vc->preempt_list, &cip->vcs[0]);
+       cip->vc[0] = vc;
   }
   
   static bool subcore_config_ok(int n_subcores, int n_threads)
@@@ -2197,9 -2380,8 +2380,8 @@@
         return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
   }
   
- static void init_master_vcore(struct kvmppc_vcore *vc)
+ static void init_vcore_to_run(struct kvmppc_vcore *vc)
   {
-       vc->master_vcore = vc;
         vc->entry_exit_map = 0;
         vc->in_guest = 0;
         vc->napping_threads = 0;
@@@ -2224,9 -2406,9 +2406,9 @@@ static bool can_dynamic_split(struct kv
         ++cip->n_subcores;
         cip->total_threads += vc->num_threads;
         cip->subcore_threads[sub] = vc->num_threads;
-       cip->subcore_vm[sub] = vc->kvm;
-       init_master_vcore(vc);
-       list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
+       cip->vc[sub] = vc;
+       init_vcore_to_run(vc);
+       list_del_init(&vc->preempt_list);
   
         return true;
   }
@@@ -2294,6 -2476,18 +2476,18 @@@ static void collect_piggybacks(struct c
         spin_unlock(&lp->lock);
   }
   
+ static bool recheck_signals(struct core_info *cip)
+ {
+       int sub, i;
+       struct kvm_vcpu *vcpu;
+ 
+       for (sub = 0; sub < cip->n_subcores; ++sub)
+               for_each_runnable_thread(i, vcpu, cip->vc[sub])
+                       if (signal_pending(vcpu->arch.run_task))
+                               return true;
+       return false;
+ }
+ 
   static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
   {
         int still_running = 0, i;
@@@ -2331,7 -2525,6 +2525,6 @@@
                         wake_up(&vcpu->arch.cpu_run);
                 }
         }
-       list_del_init(&vc->preempt_list);
         if (!is_master) {
                 if (still_running > 0) {
                         kvmppc_vcore_preempt(vc);
@@@ -2393,6 -2586,21 +2586,21 @@@ static inline int kvmppc_set_host_core(
         return 0;
   }
   
+ static void set_irq_happened(int trap)
+ {
+       switch (trap) {
+       case BOOK3S_INTERRUPT_EXTERNAL:
+               local_paca->irq_happened |= PACA_IRQ_EE;
+               break;
+       case BOOK3S_INTERRUPT_H_DOORBELL:
+               local_paca->irq_happened |= PACA_IRQ_DBELL;
+               break;
+       case BOOK3S_INTERRUPT_HMI:
+               local_paca->irq_happened |= PACA_IRQ_HMI;
+               break;
+       }
+ }
+ 
   /*
    * Run a set of guest threads on a physical core.
    * Called with vc->lock held.
@@@ -2403,7 -2611,7 +2611,7 @@@ static noinline void kvmppc_run_core(st
         int i;
         int srcu_idx;
         struct core_info core_info;
-       struct kvmppc_vcore *pvc, *vcnext;
+       struct kvmppc_vcore *pvc;
         struct kvm_split_mode split_info, *sip;
         int split, subcore_size, active;
         int sub;
@@@ -2412,6 -2620,7 +2620,7 @@@
         int pcpu, thr;
         int target_threads;
         int controlled_threads;
+       int trap;
   
         /*
          * Remove from the list any threads that have a signal pending
@@@ -2426,7 -2635,7 +2635,7 @@@
         /*
          * Initialize *vc.
          */
-       init_master_vcore(vc);
+       init_vcore_to_run(vc);
         vc->preempt_tb = TB_NIL;
   
         /*
@@@ -2463,6 -2672,43 +2672,43 @@@
         if (vc->num_threads < target_threads)
                 collect_piggybacks(&core_info, target_threads);
   
+       /*
+        * On radix, arrange for TLB flushing if necessary.
+        * This has to be done before disabling interrupts since
+        * it uses smp_call_function().
+        */
+       pcpu = smp_processor_id();
+       if (kvm_is_radix(vc->kvm)) {
+               for (sub = 0; sub < core_info.n_subcores; ++sub)
+                       for_each_runnable_thread(i, vcpu, core_info.vc[sub])
+                               kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+       }
+ 
+       /*
+        * Hard-disable interrupts, and check resched flag and signals.
+        * If we need to reschedule or deliver a signal, clean up
+        * and return without going into the guest(s).
+        */
+       local_irq_disable();
+       hard_irq_disable();
+       if (lazy_irq_pending() || need_resched() ||
+           recheck_signals(&core_info)) {
+               local_irq_enable();
+               vc->vcore_state = VCORE_INACTIVE;
+               /* Unlock all except the primary vcore */
+               for (sub = 1; sub < core_info.n_subcores; ++sub) {
+                       pvc = core_info.vc[sub];
+                       /* Put back on to the preempted vcores list */
+                       kvmppc_vcore_preempt(pvc);
+                       spin_unlock(&pvc->lock);
+               }
+               for (i = 0; i < controlled_threads; ++i)
+                       kvmppc_release_hwthread(pcpu + i);
+               return;
+       }
+ 
+       kvmppc_clear_host_core(pcpu);
+ 
         /* Decide on micro-threading (split-core) mode */
         subcore_size = threads_per_subcore;
         cmd_bit = stat_bit = 0;
@@@ -2486,13 -2732,10 +2732,10 @@@
                 split_info.ldbar = mfspr(SPRN_LDBAR);
                 split_info.subcore_size = subcore_size;
                 for (sub = 0; sub < core_info.n_subcores; ++sub)
-                       split_info.master_vcs[sub] =
-                               list_first_entry(&core_info.vcs[sub],
-                                       struct kvmppc_vcore, preempt_list);
+                       split_info.vc[sub] = core_info.vc[sub];
                 /* order writes to split_info before kvm_split_mode pointer */
                 smp_wmb();
         }
-       pcpu = smp_processor_id();
         for (thr = 0; thr < controlled_threads; ++thr)
                 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
   
@@@ -2512,32 -2755,29 +2755,29 @@@
                 }
         }
   
-       kvmppc_clear_host_core(pcpu);
- 
         /* Start all the threads */
         active = 0;
         for (sub = 0; sub < core_info.n_subcores; ++sub) {
                 thr = subcore_thread_map[sub];
                 thr0_done = false;
                 active |= 1 << thr;
-               list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
-                       pvc->pcpu = pcpu + thr;
-                       for_each_runnable_thread(i, vcpu, pvc) {
-                               kvmppc_start_thread(vcpu, pvc);
-                               kvmppc_create_dtl_entry(vcpu, pvc);
-                               trace_kvm_guest_enter(vcpu);
-                               if (!vcpu->arch.ptid)
-                                       thr0_done = true;
-                               active |= 1 << (thr + vcpu->arch.ptid);
-                       }
-                       /*
-                        * We need to start the first thread of each subcore
-                        * even if it doesn't have a vcpu.
-                        */
-                       if (pvc->master_vcore == pvc && !thr0_done)
-                               kvmppc_start_thread(NULL, pvc);
-                       thr += pvc->num_threads;
+               pvc = core_info.vc[sub];
+               pvc->pcpu = pcpu + thr;
+               for_each_runnable_thread(i, vcpu, pvc) {
+                       kvmppc_start_thread(vcpu, pvc);
+                       kvmppc_create_dtl_entry(vcpu, pvc);
+                       trace_kvm_guest_enter(vcpu);
+                       if (!vcpu->arch.ptid)
+                               thr0_done = true;
+                       active |= 1 << (thr + vcpu->arch.ptid);
                 }
+               /*
+                * We need to start the first thread of each subcore
+                * even if it doesn't have a vcpu.
+                */
+               if (!thr0_done)
+                       kvmppc_start_thread(NULL, pvc);
+               thr += pvc->num_threads;
         }
   
         /*
@@@ -2564,17 -2804,27 +2804,27 @@@
         trace_kvmppc_run_core(vc, 0);
   
         for (sub = 0; sub < core_info.n_subcores; ++sub)
-               list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
-                       spin_unlock(&pvc->lock);
+               spin_unlock(&core_info.vc[sub]->lock);
+ 
+       /*
+        * Interrupts will be enabled once we get into the guest,
+        * so tell lockdep that we're about to enable interrupts.
+        */
+       trace_hardirqs_on();
   
         guest_enter();
   
         srcu_idx = srcu_read_lock(&vc->kvm->srcu);
   
-       __kvmppc_vcore_entry();
+       trap = __kvmppc_vcore_entry();
   
         srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
   
+       guest_exit();
+ 
+       trace_hardirqs_off();
+       set_irq_happened(trap);
+ 
         spin_lock(&vc->lock);
         /* prevent other vcpu threads from doing kvmppc_start_thread() now */
         vc->vcore_state = VCORE_EXITING;
@@@ -2602,6 -2852,10 +2852,10 @@@
                 split_info.do_nap = 0;
         }
   
+       kvmppc_set_host_core(pcpu);
+ 
+       local_irq_enable();
+ 
         /* Let secondaries go back to the offline loop */
         for (i = 0; i < controlled_threads; ++i) {
                 kvmppc_release_hwthread(pcpu + i);
@@@ -2610,18 -2864,15 +2864,15 @@@
                 cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
         }
   
-       kvmppc_set_host_core(pcpu);
- 
         spin_unlock(&vc->lock);
   
         /* make sure updates to secondary vcpu structs are visible now */
         smp_mb();
-       guest_exit();
   
-       for (sub = 0; sub < core_info.n_subcores; ++sub)
-               list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
-                                        preempt_list)
-                       post_guest_process(pvc, pvc == vc);
+       for (sub = 0; sub < core_info.n_subcores; ++sub) {
+               pvc = core_info.vc[sub];
+               post_guest_process(pvc, pvc == vc);
+       }
   
         spin_lock(&vc->lock);
         preempt_enable();
@@@ -2666,6 -2917,30 +2917,30 @@@ static void shrink_halt_poll_ns(struct 
                 vc->halt_poll_ns /= halt_poll_ns_shrink;
   }
   
+ #ifdef CONFIG_KVM_XICS
+ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+ {
+       if (!xive_enabled())
+               return false;
+       return vcpu->arch.xive_saved_state.pipr <
+               vcpu->arch.xive_saved_state.cppr;
+ }
+ #else
+ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+ {
+       return false;
+ }
+ #endif /* CONFIG_KVM_XICS */
+ 
+ static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
+ {
+       if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
+           kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
+               return true;
+ 
+       return false;
+ }
+ 
   /*
    * Check to see if any of the runnable vcpus on the vcore have pending
    * exceptions or are no longer ceded
@@@ -2676,8 -2951,7 +2951,7 @@@ static int kvmppc_vcore_check_block(str
         int i;
   
         for_each_runnable_thread(i, vcpu, vc) {
-               if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded ||
-                   vcpu->arch.prodded)
+               if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
                         return 1;
         }
   
@@@ -2819,15 -3093,14 +3093,14 @@@ static int kvmppc_run_vcpu(struct kvm_r
          */
         if (!signal_pending(current)) {
                 if (vc->vcore_state == VCORE_PIGGYBACK) {
-                       struct kvmppc_vcore *mvc = vc->master_vcore;
-                       if (spin_trylock(&mvc->lock)) {
-                               if (mvc->vcore_state == VCORE_RUNNING &&
-                                   !VCORE_IS_EXITING(mvc)) {
+                       if (spin_trylock(&vc->lock)) {
+                               if (vc->vcore_state == VCORE_RUNNING &&
+                                   !VCORE_IS_EXITING(vc)) {
                                         kvmppc_create_dtl_entry(vcpu, vc);
                                         kvmppc_start_thread(vcpu, vc);
                                         trace_kvm_guest_enter(vcpu);
                                 }
-                               spin_unlock(&mvc->lock);
+                               spin_unlock(&vc->lock);
                         }
                 } else if (vc->vcore_state == VCORE_RUNNING &&
                            !VCORE_IS_EXITING(vc)) {
@@@ -2863,7 -3136,7 +3136,7 @@@
                         break;
                 n_ceded = 0;
                 for_each_runnable_thread(i, v, vc) {
-                       if (!v->arch.pending_exceptions && !v->arch.prodded)
+                       if (!kvmppc_vcpu_woken(v))
                                 n_ceded += v->arch.ceded;
                         else
                                 v->arch.ceded = 0;
@@@ -3368,7 -3641,7 +3641,7 @@@ void kvmppc_alloc_host_rm_ops(void
                 return;
         }
   
- -      get_online_cpus();
+ +      cpus_read_lock();
   
         for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
                 if (!cpu_online(cpu))
@@@ -3390,17 -3663,17 +3663,17 @@@
         l_ops = (unsigned long) ops;
   
         if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
- -              put_online_cpus();
+ +              cpus_read_unlock();
                 kfree(ops->rm_core);
                 kfree(ops);
                 return;
         }
   
- -      cpuhp_setup_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE,
- -                                "ppc/kvm_book3s:prepare",
- -                                kvmppc_set_host_core,
- -                                kvmppc_clear_host_core);
- -      put_online_cpus();
+ +      cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
+ +                                           "ppc/kvm_book3s:prepare",
+ +                                           kvmppc_set_host_core,
+ +                                           kvmppc_clear_host_core);
+ +      cpus_read_unlock();
   }
   
   void kvmppc_free_host_rm_ops(void)
@@@ -3518,6 -3791,19 +3791,19 @@@ static int kvmppc_core_init_vm_hv(struc
         if (!cpu_has_feature(CPU_FTR_ARCH_300))
                 kvm_hv_vm_activated();
   
+       /*
+        * Initialize smt_mode depending on processor.
+        * POWER8 and earlier have to use "strict" threading, where
+        * all vCPUs in a vcore have to run on the same (sub)core,
+        * whereas on POWER9 the threads can each run a different
+        * guest.
+        */
+       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+               kvm->arch.smt_mode = threads_per_subcore;
+       else
+               kvm->arch.smt_mode = 1;
+       kvm->arch.emul_smt_mode = 1;
+ 
         /*
          * Create a debugfs directory for the VM
          */
@@@ -3947,6 -4233,7 +4233,7 @@@ static struct kvmppc_ops kvm_ops_hv = 
   #endif
         .configure_mmu = kvmhv_configure_mmu,
         .get_rmmu_info = kvmhv_get_rmmu_info,
+       .set_smt_mode = kvmhv_set_smt_mode,
   };
   
   static int kvm_init_subcore_bitmap(void)
diff --combined arch/s390/include/asm/kvm_host.h

index 6baae236f461d97b02c7d00b2dd9c57dafe75e30,495aedbaf44757018b7bb4d217b6d94f563c3910..a409d59919344a277fac0858c2bd12ece1bd83c9
--- 1/arch/s390/include/asm/kvm_host.h
--- 2/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@@ -42,9 -42,11 +42,11 @@@
   #define KVM_HALT_POLL_NS_DEFAULT 80000
   
   /* s390-specific vcpu->requests bit members */
- #define KVM_REQ_ENABLE_IBS         8
- #define KVM_REQ_DISABLE_IBS        9
- #define KVM_REQ_ICPT_OPEREXC       10
+ #define KVM_REQ_ENABLE_IBS    KVM_ARCH_REQ(0)
+ #define KVM_REQ_DISABLE_IBS   KVM_ARCH_REQ(1)
+ #define KVM_REQ_ICPT_OPEREXC  KVM_ARCH_REQ(2)
+ #define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3)
+ #define KVM_REQ_STOP_MIGRATION  KVM_ARCH_REQ(4)
   
   #define SIGP_CTRL_C           0x80
   #define SIGP_CTRL_SCN_MASK    0x3f
@@@ -56,7 -58,7 +58,7 @@@ union bsca_sigp_ctrl 
                 __u8 r : 1;
                 __u8 scn : 6;
         };
- } __packed;
+ };
   
   union esca_sigp_ctrl {
         __u16 value;
@@@ -65,14 -67,14 +67,14 @@@
                 __u8 reserved: 7;
                 __u8 scn;
         };
- } __packed;
+ };
   
   struct esca_entry {
         union esca_sigp_ctrl sigp_ctrl;
         __u16   reserved1[3];
         __u64   sda;
         __u64   reserved2[6];
- } __packed;
+ };
   
   struct bsca_entry {
         __u8    reserved0;
@@@ -80,7 -82,7 +82,7 @@@
         __u16   reserved[3];
         __u64   sda;
         __u64   reserved2[2];
- } __attribute__((packed));
+ };
   
   union ipte_control {
         unsigned long val;
@@@ -97,7 -99,7 +99,7 @@@ struct bsca_block 
         __u64   mcn;
         __u64   reserved2;
         struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
- } __attribute__((packed));
+ };
   
   struct esca_block {
         union ipte_control ipte_control;
@@@ -105,7 -107,7 +107,7 @@@
         __u64   mcn[4];
         __u64   reserved2[20];
         struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
- } __packed;
+ };
   
   /*
    * This struct is used to store some machine check info from lowcore
@@@ -274,7 -276,7 +276,7 @@@ struct kvm_s390_sie_block 
   
   struct kvm_s390_itdb {
         __u8    data[256];
- } __packed;
+ };
   
   struct sie_page {
         struct kvm_s390_sie_block sie_block;
@@@ -282,7 -284,7 +284,7 @@@
         __u8 reserved218[1000];         /* 0x0218 */
         struct kvm_s390_itdb itdb;      /* 0x0600 */
         __u8 reserved700[2304];         /* 0x0700 */
- } __packed;
+ };
   
   struct kvm_vcpu_stat {
         u64 exit_userspace;
@@@ -556,6 -558,7 +558,6 @@@ struct kvm_s390_float_interrupt 
         struct mutex ais_lock;
         u8 simm;
         u8 nimm;
- -      int ais_enabled;
   };
   
   struct kvm_hw_wp_info_arch {
@@@ -695,7 -698,7 +697,7 @@@ struct sie_page2 
         __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64];    /* 0x0000 */
         struct kvm_s390_crypto_cb crycb;                /* 0x0800 */
         u8 reserved900[0x1000 - 0x900];                 /* 0x0900 */
- } __packed;
+ };
   
   struct kvm_s390_vsie {
         struct mutex mutex;
@@@ -705,6 -708,12 +707,12 @@@
         struct page *pages[KVM_MAX_VCPUS];
   };
   
+ struct kvm_s390_migration_state {
+       unsigned long bitmap_size;      /* in bits (number of guest pages) */
+       atomic64_t dirty_pages;         /* number of dirty pages */
+       unsigned long *pgste_bitmap;
+ };
+ 
   struct kvm_arch{
         void *sca;
         int use_esca;
@@@ -732,6 -741,7 +740,7 @@@
         struct kvm_s390_crypto crypto;
         struct kvm_s390_vsie vsie;
         u64 epoch;
+       struct kvm_s390_migration_state *migration_state;
         /* subset of available cpu features enabled by user space */
         DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
   };
diff --combined arch/s390/kvm/gaccess.c

index 875f8bea8c670dc83f626349d6a35cc17bd0cf8a,17e3a4e71bc90e74c3624c3f0370df56b6bbff53..653cae5e1ee1f97b6de07f665cf2b90ba8619d8e
--- 1/arch/s390/kvm/gaccess.c
--- 2/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@@ -89,7 -89,7 +89,7 @@@ struct region3_table_entry_fc1 
         unsigned long f  : 1; /* Fetch-Protection Bit */
         unsigned long fc : 1; /* Format-Control */
         unsigned long p  : 1; /* DAT-Protection Bit */
-       unsigned long co : 1; /* Change-Recording Override */
+       unsigned long iep: 1; /* Instruction-Execution-Protection */
         unsigned long    : 2;
         unsigned long i  : 1; /* Region-Invalid Bit */
         unsigned long cr : 1; /* Common-Region Bit */
@@@ -131,7 -131,7 +131,7 @@@ struct segment_entry_fc1 
         unsigned long f  : 1; /* Fetch-Protection Bit */
         unsigned long fc : 1; /* Format-Control */
         unsigned long p  : 1; /* DAT-Protection Bit */
-       unsigned long co : 1; /* Change-Recording Override */
+       unsigned long iep: 1; /* Instruction-Execution-Protection */
         unsigned long    : 2;
         unsigned long i  : 1; /* Segment-Invalid Bit */
         unsigned long cs : 1; /* Common-Segment Bit */
@@@ -168,7 -168,8 +168,8 @@@ union page_table_entry 
                 unsigned long z  : 1; /* Zero Bit */
                 unsigned long i  : 1; /* Page-Invalid Bit */
                 unsigned long p  : 1; /* DAT-Protection Bit */
-               unsigned long    : 9;
+               unsigned long iep: 1; /* Instruction-Execution-Protection */
+               unsigned long    : 8;
         };
   };
   
@@@ -241,7 -242,7 +242,7 @@@ struct ale 
         unsigned long asteo  : 25; /* ASN-Second-Table-Entry Origin */
         unsigned long        : 6;
         unsigned long astesn : 32; /* ASTE Sequence Number */
- } __packed;
+ };
   
   struct aste {
         unsigned long i      : 1; /* ASX-Invalid Bit */
@@@ -257,7 -258,7 +258,7 @@@
         unsigned long ald    : 32;
         unsigned long astesn : 32;
         /* .. more fields there */
- } __packed;
+ };
   
   int ipte_lock_held(struct kvm_vcpu *vcpu)
   {
@@@ -485,6 -486,7 +486,7 @@@ enum prot_type 
         PROT_TYPE_KEYC = 1,
         PROT_TYPE_ALC  = 2,
         PROT_TYPE_DAT  = 3,
+       PROT_TYPE_IEP  = 4,
   };
   
   static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
@@@ -500,6 -502,9 +502,9 @@@
         switch (code) {
         case PGM_PROTECTION:
                 switch (prot) {
+               case PROT_TYPE_IEP:
+                       tec->b61 = 1;
+                       /* FALL THROUGH */
                 case PROT_TYPE_LA:
                         tec->b56 = 1;
                         break;
@@@ -551,26 -556,26 +556,26 @@@ static int get_vcpu_asce(struct kvm_vcp
         int rc;
         struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
   
- -      if (!psw.t) {
+ +      if (!psw.dat) {
                 asce->val = 0;
                 asce->r = 1;
                 return 0;
         }
   
- -      if (mode == GACC_IFETCH)
- -              psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY;
+ +      if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME))
+ +              psw.as = PSW_BITS_AS_PRIMARY;
   
         switch (psw.as) {
- -      case PSW_AS_PRIMARY:
+ +      case PSW_BITS_AS_PRIMARY:
                 asce->val = vcpu->arch.sie_block->gcr[1];
                 return 0;
- -      case PSW_AS_SECONDARY:
+ +      case PSW_BITS_AS_SECONDARY:
                 asce->val = vcpu->arch.sie_block->gcr[7];
                 return 0;
- -      case PSW_AS_HOME:
+ +      case PSW_BITS_AS_HOME:
                 asce->val = vcpu->arch.sie_block->gcr[13];
                 return 0;
- -      case PSW_AS_ACCREG:
+ +      case PSW_BITS_AS_ACCREG:
                 rc = ar_translation(vcpu, asce, ar, mode);
                 if (rc > 0)
                         return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
@@@ -591,6 -596,7 +596,7 @@@ static int deref_table(struct kvm *kvm
    * @gpa: points to where guest physical (absolute) address should be stored
    * @asce: effective asce
    * @mode: indicates the access mode to be used
+  * @prot: returns the type for protection exceptions
    *
    * Translate a guest virtual address into a guest absolute address by means
    * of dynamic address translation as specified by the architecture.
@@@ -606,19 -612,21 +612,21 @@@
    */
   static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
                                      unsigned long *gpa, const union asce asce,
-                                    enum gacc_mode mode)
+                                    enum gacc_mode mode, enum prot_type *prot)
   {
         union vaddress vaddr = {.addr = gva};
         union raddress raddr = {.addr = gva};
         union page_table_entry pte;
         int dat_protection = 0;
+       int iep_protection = 0;
         union ctlreg0 ctlreg0;
         unsigned long ptr;
-       int edat1, edat2;
+       int edat1, edat2, iep;
   
         ctlreg0.val = vcpu->arch.sie_block->gcr[0];
         edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
         edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
+       iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
         if (asce.r)
                 goto real_address;
         ptr = asce.origin * 4096;
@@@ -702,6 -710,7 +710,7 @@@
                         return PGM_TRANSLATION_SPEC;
                 if (rtte.fc && edat2) {
                         dat_protection |= rtte.fc1.p;
+                       iep_protection = rtte.fc1.iep;
                         raddr.rfaa = rtte.fc1.rfaa;
                         goto absolute_address;
                 }
@@@ -729,6 -738,7 +738,7 @@@
                         return PGM_TRANSLATION_SPEC;
                 if (ste.fc && edat1) {
                         dat_protection |= ste.fc1.p;
+                       iep_protection = ste.fc1.iep;
                         raddr.sfaa = ste.fc1.sfaa;
                         goto absolute_address;
                 }
@@@ -745,12 -755,19 +755,19 @@@
         if (pte.z)
                 return PGM_TRANSLATION_SPEC;
         dat_protection |= pte.p;
+       iep_protection = pte.iep;
         raddr.pfra = pte.pfra;
   real_address:
         raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
   absolute_address:
-       if (mode == GACC_STORE && dat_protection)
+       if (mode == GACC_STORE && dat_protection) {
+               *prot = PROT_TYPE_DAT;
                 return PGM_PROTECTION;
+       }
+       if (mode == GACC_IFETCH && iep_protection && iep) {
+               *prot = PROT_TYPE_IEP;
+               return PGM_PROTECTION;
+       }
         if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
                 return PGM_ADDRESSING;
         *gpa = raddr.addr;
@@@ -771,7 -788,7 +788,7 @@@ static int low_address_protection_enabl
   
         if (!ctlreg0.lap)
                 return 0;
- -      if (psw_bits(*psw).t && asce.p)
+ +      if (psw_bits(*psw).dat && asce.p)
                 return 0;
         return 1;
   }
@@@ -782,6 -799,7 +799,7 @@@ static int guest_page_range(struct kvm_
   {
         psw_t *psw = &vcpu->arch.sie_block->gpsw;
         int lap_enabled, rc = 0;
+       enum prot_type prot;
   
         lap_enabled = low_address_protection_enabled(vcpu, asce);
         while (nr_pages) {
@@@ -790,8 -808,8 +808,8 @@@
                         return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
                                          PROT_TYPE_LA);
                 ga &= PAGE_MASK;
- -              if (psw_bits(*psw).t) {
+ +              if (psw_bits(*psw).dat) {
-                       rc = guest_translate(vcpu, ga, pages, asce, mode);
+                       rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
                         if (rc < 0)
                                 return rc;
                 } else {
@@@ -800,7 -818,7 +818,7 @@@
                                 rc = PGM_ADDRESSING;
                 }
                 if (rc)
-                       return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT);
+                       return trans_exc(vcpu, rc, ga, ar, mode, prot);
                 ga += PAGE_SIZE;
                 pages++;
                 nr_pages--;
@@@ -831,7 -849,7 +849,7 @@@ int access_guest(struct kvm_vcpu *vcpu
                 pages = vmalloc(nr_pages * sizeof(unsigned long));
         if (!pages)
                 return -ENOMEM;
- -      need_ipte_lock = psw_bits(*psw).t && !asce.r;
+ +      need_ipte_lock = psw_bits(*psw).dat && !asce.r;
         if (need_ipte_lock)
                 ipte_lock(vcpu);
         rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
@@@ -886,6 -904,7 +904,7 @@@ int guest_translate_address(struct kvm_
                             unsigned long *gpa, enum gacc_mode mode)
   {
         psw_t *psw = &vcpu->arch.sie_block->gpsw;
+       enum prot_type prot;
         union asce asce;
         int rc;
   
@@@ -899,10 -918,10 +918,10 @@@
                                          mode, PROT_TYPE_LA);
         }
   
- -      if (psw_bits(*psw).t && !asce.r) {      /* Use DAT? */
+ +      if (psw_bits(*psw).dat && !asce.r) {    /* Use DAT? */
-               rc = guest_translate(vcpu, gva, gpa, asce, mode);
+               rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
                 if (rc > 0)
-                       return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT);
+                       return trans_exc(vcpu, rc, gva, 0, mode, prot);
         } else {
                 *gpa = kvm_s390_real_to_abs(vcpu, gva);
                 if (kvm_is_error_gpa(vcpu->kvm, *gpa))
@@@ -977,12 -996,11 +996,12 @@@ static int kvm_s390_shadow_tables(struc
         ptr = asce.origin * 4096;
         if (asce.r) {
                 *fake = 1;
+ +              ptr = 0;
                 asce.dt = ASCE_TYPE_REGION1;
         }
         switch (asce.dt) {
         case ASCE_TYPE_REGION1:
- -              if (vaddr.rfx01 > asce.tl && !asce.r)
+ +              if (vaddr.rfx01 > asce.tl && !*fake)
                         return PGM_REGION_FIRST_TRANS;
                 break;
         case ASCE_TYPE_REGION2:
@@@ -1010,7 -1028,8 +1029,7 @@@
                 union region1_table_entry rfte;
   
                 if (*fake) {
- -                      /* offset in 16EB guest memory block */
- -                      ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
+ +                      ptr += (unsigned long) vaddr.rfx << 53;
                         rfte.val = ptr;
                         goto shadow_r2t;
                 }
@@@ -1036,7 -1055,8 +1055,7 @@@ shadow_r2t
                 union region2_table_entry rste;
   
                 if (*fake) {
- -                      /* offset in 8PB guest memory block */
- -                      ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
+ +                      ptr += (unsigned long) vaddr.rsx << 42;
                         rste.val = ptr;
                         goto shadow_r3t;
                 }
@@@ -1063,7 -1083,8 +1082,7 @@@ shadow_r3t
                 union region3_table_entry rtte;
   
                 if (*fake) {
- -                      /* offset in 4TB guest memory block */
- -                      ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
+ +                      ptr += (unsigned long) vaddr.rtx << 31;
                         rtte.val = ptr;
                         goto shadow_sgt;
                 }
@@@ -1099,7 -1120,8 +1118,7 @@@ shadow_sgt
                 union segment_table_entry ste;
   
                 if (*fake) {
- -                      /* offset in 2G guest memory block */
- -                      ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
+ +                      ptr += (unsigned long) vaddr.sx << 20;
                         ste.val = ptr;
                         goto shadow_pgt;
                 }
diff --combined arch/s390/kvm/interrupt.c

index 2d120fef7d90d915e33d7f19cb0ba39e9a6a264e,f2c78fc1852d38a1955f573c1e0b1da687ddc307..a619ddae610da2c0fabaf66a9a123d1572b51f9c
--- 1/arch/s390/kvm/interrupt.c
--- 2/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@@ -251,8 -251,13 +251,13 @@@ static unsigned long deliverable_irqs(s
                 __clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
         if (psw_mchk_disabled(vcpu))
                 active_mask &= ~IRQ_PEND_MCHK_MASK;
+       /*
+        * Check both floating and local interrupt's cr14 because
+        * bit IRQ_PEND_MCHK_REP could be set in both cases.
+        */
         if (!(vcpu->arch.sie_block->gcr[14] &
-             vcpu->kvm->arch.float_int.mchk.cr14))
+          (vcpu->kvm->arch.float_int.mchk.cr14 |
+          vcpu->arch.local_int.irq.mchk.cr14)))
                 __clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
   
         /*
@@@ -1876,6 -1881,28 +1881,28 @@@ out
         return ret < 0 ? ret : n;
   }
   
+ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr)
+ {
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+       struct kvm_s390_ais_all ais;
+ 
+       if (attr->attr < sizeof(ais))
+               return -EINVAL;
+ 
+       if (!test_kvm_facility(kvm, 72))
+               return -ENOTSUPP;
+ 
+       mutex_lock(&fi->ais_lock);
+       ais.simm = fi->simm;
+       ais.nimm = fi->nimm;
+       mutex_unlock(&fi->ais_lock);
+ 
+       if (copy_to_user((void __user *)attr->addr, &ais, sizeof(ais)))
+               return -EFAULT;
+ 
+       return 0;
+ }
+ 
   static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
   {
         int r;
@@@ -1885,6 -1912,9 +1912,9 @@@
                 r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
                                           attr->attr);
                 break;
+       case KVM_DEV_FLIC_AISM_ALL:
+               r = flic_ais_mode_get_all(dev->kvm, attr);
+               break;
         default:
                 r = -EINVAL;
         }
@@@ -2160,7 -2190,7 +2190,7 @@@ static int modify_ais_mode(struct kvm *
         struct kvm_s390_ais_req req;
         int ret = 0;
   
- -      if (!fi->ais_enabled)
+ +      if (!test_kvm_facility(kvm, 72))
                 return -ENOTSUPP;
   
         if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
@@@ -2204,7 -2234,7 +2234,7 @@@ static int kvm_s390_inject_airq(struct 
         };
         int ret = 0;
   
- -      if (!fi->ais_enabled || !adapter->suppressible)
+ +      if (!test_kvm_facility(kvm, 72) || !adapter->suppressible)
                 return kvm_s390_inject_vm(kvm, &s390int);
   
         mutex_lock(&fi->ais_lock);
@@@ -2235,6 -2265,25 +2265,25 @@@ static int flic_inject_airq(struct kvm 
         return kvm_s390_inject_airq(kvm, adapter);
   }
   
+ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
+ {
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+       struct kvm_s390_ais_all ais;
+ 
+       if (!test_kvm_facility(kvm, 72))
+               return -ENOTSUPP;
+ 
+       if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais)))
+               return -EFAULT;
+ 
+       mutex_lock(&fi->ais_lock);
+       fi->simm = ais.simm;
+       fi->nimm = ais.nimm;
+       mutex_unlock(&fi->ais_lock);
+ 
+       return 0;
+ }
+ 
   static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
   {
         int r = 0;
@@@ -2277,6 -2326,9 +2326,9 @@@
         case KVM_DEV_FLIC_AIRQ_INJECT:
                 r = flic_inject_airq(dev->kvm, attr);
                 break;
+       case KVM_DEV_FLIC_AISM_ALL:
+               r = flic_ais_mode_set_all(dev->kvm, attr);
+               break;
         default:
                 r = -EINVAL;
         }
@@@ -2298,6 -2350,7 +2350,7 @@@ static int flic_has_attr(struct kvm_dev
         case KVM_DEV_FLIC_CLEAR_IO_IRQ:
         case KVM_DEV_FLIC_AISM:
         case KVM_DEV_FLIC_AIRQ_INJECT:
+       case KVM_DEV_FLIC_AISM_ALL:
                 return 0;
         }
         return -ENXIO;
@@@ -2415,6 -2468,42 +2468,42 @@@ static int set_adapter_int(struct kvm_k
         return ret;
   }
   
+ /*
+  * Inject the machine check to the guest.
+  */
+ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
+                                    struct mcck_volatile_info *mcck_info)
+ {
+       struct kvm_s390_interrupt_info inti;
+       struct kvm_s390_irq irq;
+       struct kvm_s390_mchk_info *mchk;
+       union mci mci;
+       __u64 cr14 = 0;         /* upper bits are not used */
+ 
+       mci.val = mcck_info->mcic;
+       if (mci.sr)
+               cr14 |= MCCK_CR14_RECOVERY_SUB_MASK;
+       if (mci.dg)
+               cr14 |= MCCK_CR14_DEGRAD_SUB_MASK;
+       if (mci.w)
+               cr14 |= MCCK_CR14_WARN_SUB_MASK;
+ 
+       mchk = mci.ck ? &inti.mchk : &irq.u.mchk;
+       mchk->cr14 = cr14;
+       mchk->mcic = mcck_info->mcic;
+       mchk->ext_damage_code = mcck_info->ext_damage_code;
+       mchk->failing_storage_address = mcck_info->failing_storage_address;
+       if (mci.ck) {
+               /* Inject the floating machine check */
+               inti.type = KVM_S390_MCHK;
+               WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti));
+       } else {
+               /* Inject the machine check to specified vcpu */
+               irq.type = KVM_S390_MCHK;
+               WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
+       }
+ }
+ 
   int kvm_set_routing_entry(struct kvm *kvm,
                           struct kvm_kernel_irq_routing_entry *e,
                           const struct kvm_irq_routing_entry *ue)
diff --combined arch/s390/kvm/kvm-s390.c

index b0d7de5a533dcc50249575ff80a1a67ddec077db,ef6419654c162277dc761890424f27612a1d7187..3f2884e99ed4ce461cdb6f08148968880e90747b
--- 1/arch/s390/kvm/kvm-s390.c
--- 2/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@@ -30,6 -30,7 +30,7 @@@
   #include <linux/vmalloc.h>
   #include <linux/bitmap.h>
   #include <linux/sched/signal.h>
+ #include <linux/string.h>
   
   #include <asm/asm-offsets.h>
   #include <asm/lowcore.h>
@@@ -386,6 -387,7 +387,7 @@@ int kvm_vm_ioctl_check_extension(struc
         case KVM_CAP_S390_SKEYS:
         case KVM_CAP_S390_IRQ_STATE:
         case KVM_CAP_S390_USER_INSTR0:
+       case KVM_CAP_S390_CMMA_MIGRATION:
         case KVM_CAP_S390_AIS:
                 r = 1;
                 break;
@@@ -558,6 -560,7 +560,6 @@@ static int kvm_vm_ioctl_enable_cap(stru
                 } else {
                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
                         set_kvm_facility(kvm->arch.model.fac_list, 72);
- -                      kvm->arch.float_int.ais_enabled = 1;
                         r = 0;
                 }
                 mutex_unlock(&kvm->lock);
@@@ -749,6 -752,129 +751,129 @@@ static int kvm_s390_vm_set_crypto(struc
         return 0;
   }
   
+ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
+ {
+       int cx;
+       struct kvm_vcpu *vcpu;
+ 
+       kvm_for_each_vcpu(cx, vcpu, kvm)
+               kvm_s390_sync_request(req, vcpu);
+ }
+ 
+ /*
+  * Must be called with kvm->srcu held to avoid races on memslots, and with
+  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
+  */
+ static int kvm_s390_vm_start_migration(struct kvm *kvm)
+ {
+       struct kvm_s390_migration_state *mgs;
+       struct kvm_memory_slot *ms;
+       /* should be the only one */
+       struct kvm_memslots *slots;
+       unsigned long ram_pages;
+       int slotnr;
+ 
+       /* migration mode already enabled */
+       if (kvm->arch.migration_state)
+               return 0;
+ 
+       slots = kvm_memslots(kvm);
+       if (!slots || !slots->used_slots)
+               return -EINVAL;
+ 
+       mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
+       if (!mgs)
+               return -ENOMEM;
+       kvm->arch.migration_state = mgs;
+ 
+       if (kvm->arch.use_cmma) {
+               /*
+                * Get the last slot. They should be sorted by base_gfn, so the
+                * last slot is also the one at the end of the address space.
+                * We have verified above that at least one slot is present.
+                */
+               ms = slots->memslots + slots->used_slots - 1;
+               /* round up so we only use full longs */
+               ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
+               /* allocate enough bytes to store all the bits */
+               mgs->pgste_bitmap = vmalloc(ram_pages / 8);
+               if (!mgs->pgste_bitmap) {
+                       kfree(mgs);
+                       kvm->arch.migration_state = NULL;
+                       return -ENOMEM;
+               }
+ 
+               mgs->bitmap_size = ram_pages;
+               atomic64_set(&mgs->dirty_pages, ram_pages);
+               /* mark all the pages in active slots as dirty */
+               for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
+                       ms = slots->memslots + slotnr;
+                       bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
+               }
+ 
+               kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
+       }
+       return 0;
+ }
+ 
+ /*
+  * Must be called with kvm->lock to avoid races with ourselves and
+  * kvm_s390_vm_start_migration.
+  */
+ static int kvm_s390_vm_stop_migration(struct kvm *kvm)
+ {
+       struct kvm_s390_migration_state *mgs;
+ 
+       /* migration mode already disabled */
+       if (!kvm->arch.migration_state)
+               return 0;
+       mgs = kvm->arch.migration_state;
+       kvm->arch.migration_state = NULL;
+ 
+       if (kvm->arch.use_cmma) {
+               kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
+               vfree(mgs->pgste_bitmap);
+       }
+       kfree(mgs);
+       return 0;
+ }
+ 
+ static int kvm_s390_vm_set_migration(struct kvm *kvm,
+                                    struct kvm_device_attr *attr)
+ {
+       int idx, res = -ENXIO;
+ 
+       mutex_lock(&kvm->lock);
+       switch (attr->attr) {
+       case KVM_S390_VM_MIGRATION_START:
+               idx = srcu_read_lock(&kvm->srcu);
+               res = kvm_s390_vm_start_migration(kvm);
+               srcu_read_unlock(&kvm->srcu, idx);
+               break;
+       case KVM_S390_VM_MIGRATION_STOP:
+               res = kvm_s390_vm_stop_migration(kvm);
+               break;
+       default:
+               break;
+       }
+       mutex_unlock(&kvm->lock);
+ 
+       return res;
+ }
+ 
+ static int kvm_s390_vm_get_migration(struct kvm *kvm,
+                                    struct kvm_device_attr *attr)
+ {
+       u64 mig = (kvm->arch.migration_state != NULL);
+ 
+       if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
+               return -ENXIO;
+ 
+       if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
+               return -EFAULT;
+       return 0;
+ }
+ 
   static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
   {
         u8 gtod_high;
@@@ -1089,6 -1215,9 +1214,9 @@@ static int kvm_s390_vm_set_attr(struct 
         case KVM_S390_VM_CRYPTO:
                 ret = kvm_s390_vm_set_crypto(kvm, attr);
                 break;
+       case KVM_S390_VM_MIGRATION:
+               ret = kvm_s390_vm_set_migration(kvm, attr);
+               break;
         default:
                 ret = -ENXIO;
                 break;
@@@ -1111,6 -1240,9 +1239,9 @@@ static int kvm_s390_vm_get_attr(struct 
         case KVM_S390_VM_CPU_MODEL:
                 ret = kvm_s390_get_cpu_model(kvm, attr);
                 break;
+       case KVM_S390_VM_MIGRATION:
+               ret = kvm_s390_vm_get_migration(kvm, attr);
+               break;
         default:
                 ret = -ENXIO;
                 break;
@@@ -1178,6 -1310,9 +1309,9 @@@ static int kvm_s390_vm_has_attr(struct 
                         break;
                 }
                 break;
+       case KVM_S390_VM_MIGRATION:
+               ret = 0;
+               break;
         default:
                 ret = -ENXIO;
                 break;
@@@ -1285,6 -1420,182 +1419,182 @@@ out
         return r;
   }
   
+ /*
+  * Base address and length must be sent at the start of each block, therefore
+  * it's cheaper to send some clean data, as long as it's less than the size of
+  * two longs.
+  */
+ #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
+ /* for consistency */
+ #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
+ 
+ /*
+  * This function searches for the next page with dirty CMMA attributes, and
+  * saves the attributes in the buffer up to either the end of the buffer or
+  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
+  * no trailing clean bytes are saved.
+  * In case no dirty bits were found, or if CMMA was not enabled or used, the
+  * output buffer will indicate 0 as length.
+  */
+ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
+                                 struct kvm_s390_cmma_log *args)
+ {
+       struct kvm_s390_migration_state *s = kvm->arch.migration_state;
+       unsigned long bufsize, hva, pgstev, i, next, cur;
+       int srcu_idx, peek, r = 0, rr;
+       u8 *res;
+ 
+       cur = args->start_gfn;
+       i = next = pgstev = 0;
+ 
+       if (unlikely(!kvm->arch.use_cmma))
+               return -ENXIO;
+       /* Invalid/unsupported flags were specified */
+       if (args->flags & ~KVM_S390_CMMA_PEEK)
+               return -EINVAL;
+       /* Migration mode query, and we are not doing a migration */
+       peek = !!(args->flags & KVM_S390_CMMA_PEEK);
+       if (!peek && !s)
+               return -EINVAL;
+       /* CMMA is disabled or was not used, or the buffer has length zero */
+       bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
+       if (!bufsize || !kvm->mm->context.use_cmma) {
+               memset(args, 0, sizeof(*args));
+               return 0;
+       }
+ 
+       if (!peek) {
+               /* We are not peeking, and there are no dirty pages */
+               if (!atomic64_read(&s->dirty_pages)) {
+                       memset(args, 0, sizeof(*args));
+                       return 0;
+               }
+               cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
+                                   args->start_gfn);
+               if (cur >= s->bitmap_size)      /* nothing found, loop back */
+                       cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
+               if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
+                       memset(args, 0, sizeof(*args));
+                       return 0;
+               }
+               next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
+       }
+ 
+       res = vmalloc(bufsize);
+       if (!res)
+               return -ENOMEM;
+ 
+       args->start_gfn = cur;
+ 
+       down_read(&kvm->mm->mmap_sem);
+       srcu_idx = srcu_read_lock(&kvm->srcu);
+       while (i < bufsize) {
+               hva = gfn_to_hva(kvm, cur);
+               if (kvm_is_error_hva(hva)) {
+                       r = -EFAULT;
+                       break;
+               }
+               /* decrement only if we actually flipped the bit to 0 */
+               if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
+                       atomic64_dec(&s->dirty_pages);
+               r = get_pgste(kvm->mm, hva, &pgstev);
+               if (r < 0)
+                       pgstev = 0;
+               /* save the value */
+               res[i++] = (pgstev >> 24) & 0x3;
+               /*
+                * if the next bit is too far away, stop.
+                * if we reached the previous "next", find the next one
+                */
+               if (!peek) {
+                       if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
+                               break;
+                       if (cur == next)
+                               next = find_next_bit(s->pgste_bitmap,
+                                                    s->bitmap_size, cur + 1);
+               /* reached the end of the bitmap or of the buffer, stop */
+                       if ((next >= s->bitmap_size) ||
+                           (next >= args->start_gfn + bufsize))
+                               break;
+               }
+               cur++;
+       }
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
+       up_read(&kvm->mm->mmap_sem);
+       args->count = i;
+       args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
+ 
+       rr = copy_to_user((void __user *)args->values, res, args->count);
+       if (rr)
+               r = -EFAULT;
+ 
+       vfree(res);
+       return r;
+ }
+ 
+ /*
+  * This function sets the CMMA attributes for the given pages. If the input
+  * buffer has zero length, no action is taken, otherwise the attributes are
+  * set and the mm->context.use_cmma flag is set.
+  */
+ static int kvm_s390_set_cmma_bits(struct kvm *kvm,
+                                 const struct kvm_s390_cmma_log *args)
+ {
+       unsigned long hva, mask, pgstev, i;
+       uint8_t *bits;
+       int srcu_idx, r = 0;
+ 
+       mask = args->mask;
+ 
+       if (!kvm->arch.use_cmma)
+               return -ENXIO;
+       /* invalid/unsupported flags */
+       if (args->flags != 0)
+               return -EINVAL;
+       /* Enforce sane limit on memory allocation */
+       if (args->count > KVM_S390_CMMA_SIZE_MAX)
+               return -EINVAL;
+       /* Nothing to do */
+       if (args->count == 0)
+               return 0;
+ 
+       bits = vmalloc(sizeof(*bits) * args->count);
+       if (!bits)
+               return -ENOMEM;
+ 
+       r = copy_from_user(bits, (void __user *)args->values, args->count);
+       if (r) {
+               r = -EFAULT;
+               goto out;
+       }
+ 
+       down_read(&kvm->mm->mmap_sem);
+       srcu_idx = srcu_read_lock(&kvm->srcu);
+       for (i = 0; i < args->count; i++) {
+               hva = gfn_to_hva(kvm, args->start_gfn + i);
+               if (kvm_is_error_hva(hva)) {
+                       r = -EFAULT;
+                       break;
+               }
+ 
+               pgstev = bits[i];
+               pgstev = pgstev << 24;
+               mask &= _PGSTE_GPS_USAGE_MASK;
+               set_pgste_bits(kvm->mm, hva, mask, pgstev);
+       }
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
+       up_read(&kvm->mm->mmap_sem);
+ 
+       if (!kvm->mm->context.use_cmma) {
+               down_write(&kvm->mm->mmap_sem);
+               kvm->mm->context.use_cmma = 1;
+               up_write(&kvm->mm->mmap_sem);
+       }
+ out:
+       vfree(bits);
+       return r;
+ }
+ 
   long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
   {
@@@ -1363,6 -1674,29 +1673,29 @@@
                 r = kvm_s390_set_skeys(kvm, &args);
                 break;
         }
+       case KVM_S390_GET_CMMA_BITS: {
+               struct kvm_s390_cmma_log args;
+ 
+               r = -EFAULT;
+               if (copy_from_user(&args, argp, sizeof(args)))
+                       break;
+               r = kvm_s390_get_cmma_bits(kvm, &args);
+               if (!r) {
+                       r = copy_to_user(argp, &args, sizeof(args));
+                       if (r)
+                               r = -EFAULT;
+               }
+               break;
+       }
+       case KVM_S390_SET_CMMA_BITS: {
+               struct kvm_s390_cmma_log args;
+ 
+               r = -EFAULT;
+               if (copy_from_user(&args, argp, sizeof(args)))
+                       break;
+               r = kvm_s390_set_cmma_bits(kvm, &args);
+               break;
+       }
         default:
                 r = -ENOTTY;
         }
@@@ -1532,6 -1866,7 +1865,6 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
         mutex_init(&kvm->arch.float_int.ais_lock);
         kvm->arch.float_int.simm = 0;
         kvm->arch.float_int.nimm = 0;
- -      kvm->arch.float_int.ais_enabled = 0;
         spin_lock_init(&kvm->arch.float_int.lock);
         for (i = 0; i < FIRQ_LIST_COUNT; i++)
                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
@@@ -1631,6 -1966,10 +1964,10 @@@ void kvm_arch_destroy_vm(struct kvm *kv
         kvm_s390_destroy_adapters(kvm);
         kvm_s390_clear_float_irqs(kvm);
         kvm_s390_vsie_destroy(kvm);
+       if (kvm->arch.migration_state) {
+               vfree(kvm->arch.migration_state->pgste_bitmap);
+               kfree(kvm->arch.migration_state);
+       }
         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
   }
   
@@@ -1975,7 -2314,6 +2312,6 @@@ int kvm_s390_vcpu_setup_cmma(struct kvm
         if (!vcpu->arch.sie_block->cbrlo)
                 return -ENOMEM;
   
-       vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
         return 0;
   }
@@@ -2439,7 -2777,7 +2775,7 @@@ static int kvm_s390_handle_requests(str
   {
   retry:
         kvm_s390_vcpu_request_handled(vcpu);
-       if (!vcpu->requests)
+       if (!kvm_request_pending(vcpu))
                 return 0;
         /*
          * We use MMU_RELOAD just to re-arm the ipte notifier for the
@@@ -2488,6 -2826,27 +2824,27 @@@
                 goto retry;
         }
   
+       if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
+               /*
+                * Disable CMMA virtualization; we will emulate the ESSA
+                * instruction manually, in order to provide additional
+                * functionalities needed for live migration.
+                */
+               vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
+               goto retry;
+       }
+ 
+       if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
+               /*
+                * Re-enable CMMA virtualization if CMMA is available and
+                * was used.
+                */
+               if ((vcpu->kvm->arch.use_cmma) &&
+                   (vcpu->kvm->mm->context.use_cmma))
+                       vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+               goto retry;
+       }
+ 
         /* nothing to do, just clear the request */
         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
   
@@@ -2682,6 -3041,9 +3039,9 @@@ static int vcpu_post_run_fault_in_sie(s
   
   static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
   {
+       struct mcck_volatile_info *mcck_info;
+       struct sie_page *sie_page;
+ 
         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
                    vcpu->arch.sie_block->icptcode);
         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
@@@ -2692,6 -3054,15 +3052,15 @@@
         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
   
+       if (exit_reason == -EINTR) {
+               VCPU_EVENT(vcpu, 3, "%s", "machine check");
+               sie_page = container_of(vcpu->arch.sie_block,
+                                       struct sie_page, sie_block);
+               mcck_info = &sie_page->mcck_info;
+               kvm_s390_reinject_machine_check(vcpu, mcck_info);
+               return 0;
+       }
+ 
         if (vcpu->arch.sie_block->icptcode > 0) {
                 int rc = kvm_handle_sie_intercept(vcpu);
   
diff --combined arch/s390/kvm/priv.c

index e53292a892575c40134d5cc35c999fea9a64b97c,a226c459809bf0d16657cec3b578c4fe43cd729b..8a1dac793d6b0ad0685ffd7a35743ca511274035
--- 1/arch/s390/kvm/priv.c
--- 2/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@@ -24,6 -24,7 +24,7 @@@
   #include <asm/ebcdic.h>
   #include <asm/sysinfo.h>
   #include <asm/pgtable.h>
+ #include <asm/page-states.h>
   #include <asm/pgalloc.h>
   #include <asm/gmap.h>
   #include <asm/io.h>
@@@ -361,7 -362,7 +362,7 @@@ static int handle_sske(struct kvm_vcpu 
                 }
         }
         if (m3 & SSKE_MB) {
- -              if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT)
+ +              if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT)
                         vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
                 else
                         vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
@@@ -374,7 -375,7 +375,7 @@@
   static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
   {
         vcpu->stat.instruction_ipte_interlock++;
- -      if (psw_bits(vcpu->arch.sie_block->gpsw).p)
+ +      if (psw_bits(vcpu->arch.sie_block->gpsw).pstate)
                 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
         wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
         kvm_s390_retry_instr(vcpu);
@@@ -901,7 -902,7 +902,7 @@@ static int handle_pfmf(struct kvm_vcpu 
                 /* only support 2G frame size if EDAT2 is available and we are
                    not in 24-bit addressing mode */
                 if (!test_kvm_facility(vcpu->kvm, 78) ||
- -                  psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_24BIT)
+ +                  psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT)
                         return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
                 end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
                 break;
@@@ -938,7 -939,7 +939,7 @@@
                 start += PAGE_SIZE;
         }
         if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
- -              if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) {
+ +              if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
                         vcpu->run->s.regs.gprs[reg2] = end;
                 } else {
                         vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
@@@ -949,13 -950,72 +950,72 @@@
         return 0;
   }
   
+ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
+ {
+       struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state;
+       int r1, r2, nappended, entries;
+       unsigned long gfn, hva, res, pgstev, ptev;
+       unsigned long *cbrlo;
+ 
+       /*
+        * We don't need to set SD.FPF.SK to 1 here, because if we have a
+        * machine check here we either handle it or crash
+        */
+ 
+       kvm_s390_get_regs_rre(vcpu, &r1, &r2);
+       gfn = vcpu->run->s.regs.gprs[r2] >> PAGE_SHIFT;
+       hva = gfn_to_hva(vcpu->kvm, gfn);
+       entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+ 
+       if (kvm_is_error_hva(hva))
+               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ 
+       nappended = pgste_perform_essa(vcpu->kvm->mm, hva, orc, &ptev, &pgstev);
+       if (nappended < 0) {
+               res = orc ? 0x10 : 0;
+               vcpu->run->s.regs.gprs[r1] = res; /* Exception Indication */
+               return 0;
+       }
+       res = (pgstev & _PGSTE_GPS_USAGE_MASK) >> 22;
+       /*
+        * Set the block-content state part of the result. 0 means resident, so
+        * nothing to do if the page is valid. 2 is for preserved pages
+        * (non-present and non-zero), and 3 for zero pages (non-present and
+        * zero).
+        */
+       if (ptev & _PAGE_INVALID) {
+               res |= 2;
+               if (pgstev & _PGSTE_GPS_ZERO)
+                       res |= 1;
+       }
+       vcpu->run->s.regs.gprs[r1] = res;
+       /*
+        * It is possible that all the normal 511 slots were full, in which case
+        * we will now write in the 512th slot, which is reserved for host use.
+        * In both cases we let the normal essa handling code process all the
+        * slots, including the reserved one, if needed.
+        */
+       if (nappended > 0) {
+               cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo & PAGE_MASK);
+               cbrlo[entries] = gfn << PAGE_SHIFT;
+       }
+ 
+       if (orc) {
+               /* increment only if we are really flipping the bit to 1 */
+               if (!test_and_set_bit(gfn, ms->pgste_bitmap))
+                       atomic64_inc(&ms->dirty_pages);
+       }
+ 
+       return nappended;
+ }
+ 
   static int handle_essa(struct kvm_vcpu *vcpu)
   {
         /* entries expected to be 1FF */
         int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
         unsigned long *cbrlo;
         struct gmap *gmap;
-       int i;
+       int i, orc;
   
         VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
         gmap = vcpu->arch.gmap;
@@@ -965,12 -1025,45 +1025,45 @@@
   
         if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- 
-       if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
+       /* Check for invalid operation request code */
+       orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
+       if (orc > ESSA_MAX)
                 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
   
-       /* Retry the ESSA instruction */
-       kvm_s390_retry_instr(vcpu);
+       if (likely(!vcpu->kvm->arch.migration_state)) {
+               /*
+                * CMMA is enabled in the KVM settings, but is disabled in
+                * the SIE block and in the mm_context, and we are not doing
+                * a migration. Enable CMMA in the mm_context.
+                * Since we need to take a write lock to write to the context
+                * to avoid races with storage keys handling, we check if the
+                * value really needs to be written to; if the value is
+                * already correct, we do nothing and avoid the lock.
+                */
+               if (vcpu->kvm->mm->context.use_cmma == 0) {
+                       down_write(&vcpu->kvm->mm->mmap_sem);
+                       vcpu->kvm->mm->context.use_cmma = 1;
+                       up_write(&vcpu->kvm->mm->mmap_sem);
+               }
+               /*
+                * If we are here, we are supposed to have CMMA enabled in
+                * the SIE block. Enabling CMMA works on a per-CPU basis,
+                * while the context use_cmma flag is per process.
+                * It's possible that the context flag is enabled and the
+                * SIE flag is not, so we set the flag always; if it was
+                * already set, nothing changes, otherwise we enable it
+                * on this CPU too.
+                */
+               vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+               /* Retry the ESSA instruction */
+               kvm_s390_retry_instr(vcpu);
+       } else {
+               /* Account for the possible extra cbrl entry */
+               i = do_essa(vcpu, orc);
+               if (i < 0)
+                       return i;
+               entries += i;
+       }
         vcpu->arch.sie_block->cbrlo &= PAGE_MASK;       /* reset nceo */
         cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
         down_read(&gmap->mm->mmap_sem);
diff --combined arch/x86/include/asm/msr-index.h

index d406894cd9a2f5b5c36234e1ceb2b5876feeb0cb,dbf266b0d14a5c0ce56c5cbd50a8d7890a9ffc2c..5573c75f8e4ced276c8585b71f0df9b786ea9e90
--- 1/arch/x86/include/asm/msr-index.h
--- 2/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@@ -137,8 -137,6 +137,8 @@@
   #define DEBUGCTLMSR_BTS_OFF_OS                (1UL <<  9)
   #define DEBUGCTLMSR_BTS_OFF_USR               (1UL << 10)
   #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI        (1UL << 11)
+ +#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
+ +#define DEBUGCTLMSR_FREEZE_IN_SMM     (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
   
   #define MSR_PEBS_FRONTEND             0x000003f7
   
@@@ -251,13 -249,9 +251,13 @@@
   #define HWP_MIN_PERF(x)               (x & 0xff)
   #define HWP_MAX_PERF(x)               ((x & 0xff) << 8)
   #define HWP_DESIRED_PERF(x)           ((x & 0xff) << 16)
- -#define HWP_ENERGY_PERF_PREFERENCE(x) ((x & 0xff) << 24)
- -#define HWP_ACTIVITY_WINDOW(x)                ((x & 0xff3) << 32)
- -#define HWP_PACKAGE_CONTROL(x)                ((x & 0x1) << 42)
+ +#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
+ +#define HWP_EPP_PERFORMANCE           0x00
+ +#define HWP_EPP_BALANCE_PERFORMANCE   0x80
+ +#define HWP_EPP_BALANCE_POWERSAVE     0xC0
+ +#define HWP_EPP_POWERSAVE             0xFF
+ +#define HWP_ACTIVITY_WINDOW(x)                ((unsigned long long)(x & 0xff3) << 32)
+ +#define HWP_PACKAGE_CONTROL(x)                ((unsigned long long)(x & 0x1) << 42)
   
   /* IA32_HWP_STATUS */
   #define HWP_GUARANTEED_CHANGE(x)      (x & 0x1)
@@@ -426,6 -420,8 +426,8 @@@
   #define MSR_IA32_TSC_ADJUST             0x0000003b
   #define MSR_IA32_BNDCFGS              0x00000d90
   
+ #define MSR_IA32_BNDCFGS_RSVD         0x00000ffc
+ 
   #define MSR_IA32_XSS                  0x00000da0
   
   #define FEATURE_CONTROL_LOCKED                                (1<<0)
@@@ -480,11 -476,9 +482,11 @@@
   #define MSR_MISC_PWR_MGMT             0x000001aa
   
   #define MSR_IA32_ENERGY_PERF_BIAS     0x000001b0
- -#define ENERGY_PERF_BIAS_PERFORMANCE  0
- -#define ENERGY_PERF_BIAS_NORMAL               6
- -#define ENERGY_PERF_BIAS_POWERSAVE    15
+ +#define ENERGY_PERF_BIAS_PERFORMANCE          0
+ +#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE  4
+ +#define ENERGY_PERF_BIAS_NORMAL                       6
+ +#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE    8
+ +#define ENERGY_PERF_BIAS_POWERSAVE            15
   
   #define MSR_IA32_PACKAGE_THERM_STATUS         0x000001b1
   
diff --combined arch/x86/kvm/emulate.c

index 80890dee66cebf370a3815e28f7bd7c34025b0d4,4a38b96563917dca0f7d5871c5fadf785b0c9ca7..fb0055953fbc9d6e6a3263e1d0bc4fc3ba900a8e
--- 1/arch/x86/kvm/emulate.c
--- 2/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@@ -900,7 -900,7 +900,7 @@@ static __always_inline int do_insn_fetc
         if (rc != X86EMUL_CONTINUE)                                     \
                 goto done;                                              \
         ctxt->_eip += sizeof(_type);                                    \
-       _x = *(_type __aligned(1) *) ctxt->fetch.ptr;                   \
+       memcpy(&_x, ctxt->fetch.ptr, sizeof(_type));                    \
         ctxt->fetch.ptr += sizeof(_type);                               \
         _x;                                                             \
   })
@@@ -2742,7 -2742,6 +2742,7 @@@ static int em_syscall(struct x86_emulat
                 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
         }
   
+ +      ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
         return X86EMUL_CONTINUE;
   }
   
@@@ -3941,6 -3940,25 +3941,25 @@@ static int check_fxsr(struct x86_emulat
         return X86EMUL_CONTINUE;
   }
   
+ /*
+  * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
+  * and restore MXCSR.
+  */
+ static size_t __fxstate_size(int nregs)
+ {
+       return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
+ }
+ 
+ static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
+ {
+       bool cr4_osfxsr;
+       if (ctxt->mode == X86EMUL_MODE_PROT64)
+               return __fxstate_size(16);
+ 
+       cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
+       return __fxstate_size(cr4_osfxsr ? 8 : 0);
+ }
+ 
   /*
    * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
    *  1) 16 bit mode
@@@ -3962,7 -3980,6 +3981,6 @@@
   static int em_fxsave(struct x86_emulate_ctxt *ctxt)
   {
         struct fxregs_state fx_state;
-       size_t size;
         int rc;
   
         rc = check_fxsr(ctxt);
@@@ -3978,68 -3995,42 +3996,42 @@@
         if (rc != X86EMUL_CONTINUE)
                 return rc;
   
-       if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)
-               size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]);
-       else
-               size = offsetof(struct fxregs_state, xmm_space[0]);
- 
-       return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
- }
- 
- static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt,
-               struct fxregs_state *new)
- {
-       int rc = X86EMUL_CONTINUE;
-       struct fxregs_state old;
- 
-       rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old));
-       if (rc != X86EMUL_CONTINUE)
-               return rc;
- 
-       /*
-        * 64 bit host will restore XMM 8-15, which is not correct on non-64
-        * bit guests.  Load the current values in order to preserve 64 bit
-        * XMMs after fxrstor.
-        */
- #ifdef CONFIG_X86_64
-       /* XXX: accessing XMM 8-15 very awkwardly */
-       memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16);
- #endif
- 
-       /*
-        * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but
-        * does save and restore MXCSR.
-        */
-       if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))
-               memcpy(new->xmm_space, old.xmm_space, 8 * 16);
- 
-       return rc;
+       return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
+                                  fxstate_size(ctxt));
   }
   
   static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
   {
         struct fxregs_state fx_state;
         int rc;
+       size_t size;
   
         rc = check_fxsr(ctxt);
         if (rc != X86EMUL_CONTINUE)
                 return rc;
   
-       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
-       if (rc != X86EMUL_CONTINUE)
-               return rc;
+       ctxt->ops->get_fpu(ctxt);
   
-       if (fx_state.mxcsr >> 16)
-               return emulate_gp(ctxt, 0);
+       size = fxstate_size(ctxt);
+       if (size < __fxstate_size(16)) {
+               rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
+               if (rc != X86EMUL_CONTINUE)
+                       goto out;
+       }
   
-       ctxt->ops->get_fpu(ctxt);
+       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+       if (rc != X86EMUL_CONTINUE)
+               goto out;
   
-       if (ctxt->mode < X86EMUL_MODE_PROT64)
-               rc = fxrstor_fixup(ctxt, &fx_state);
+       if (fx_state.mxcsr >> 16) {
+               rc = emulate_gp(ctxt, 0);
+               goto out;
+       }
   
         if (rc == X86EMUL_CONTINUE)
                 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
   
+ out:
         ctxt->ops->put_fpu(ctxt);
   
         return rc;
diff --combined arch/x86/kvm/mmu.c

index cb8225969255ec006fbff5eab76471f341d92b77,3ba600d09deae751645184c9ea9141b3807ebc6c..aafd399cf8c6f3d3e219ec636a73b19ee1e9d20d
--- 1/arch/x86/kvm/mmu.c
--- 2/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@@ -183,13 -183,13 +183,13 @@@ static u64 __read_mostly shadow_user_ma
   static u64 __read_mostly shadow_accessed_mask;
   static u64 __read_mostly shadow_dirty_mask;
   static u64 __read_mostly shadow_mmio_mask;
+ static u64 __read_mostly shadow_mmio_value;
   static u64 __read_mostly shadow_present_mask;
   
   /*
-  * The mask/value to distinguish a PTE that has been marked not-present for
-  * access tracking purposes.
-  * The mask would be either 0 if access tracking is disabled, or
-  * SPTE_SPECIAL_MASK|VMX_EPT_RWX_MASK if access tracking is enabled.
+  * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value.
+  * Non-present SPTEs with shadow_acc_track_value set are in place for access
+  * tracking.
    */
   static u64 __read_mostly shadow_acc_track_mask;
   static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK;
@@@ -207,16 -207,40 +207,40 @@@ static const u64 shadow_acc_track_saved
   static void mmu_spte_set(u64 *sptep, u64 spte);
   static void mmu_free_roots(struct kvm_vcpu *vcpu);
   
- void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
+ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
   {
+       BUG_ON((mmio_mask & mmio_value) != mmio_value);
+       shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK;
         shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
   }
   EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
   
+ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
+ {
+       return sp->role.ad_disabled;
+ }
+ 
+ static inline bool spte_ad_enabled(u64 spte)
+ {
+       MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+       return !(spte & shadow_acc_track_value);
+ }
+ 
+ static inline u64 spte_shadow_accessed_mask(u64 spte)
+ {
+       MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+       return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
+ }
+ 
+ static inline u64 spte_shadow_dirty_mask(u64 spte)
+ {
+       MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+       return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
+ }
+ 
   static inline bool is_access_track_spte(u64 spte)
   {
-       /* Always false if shadow_acc_track_mask is zero.  */
-       return (spte & shadow_acc_track_mask) == shadow_acc_track_value;
+       return !spte_ad_enabled(spte) && (spte & shadow_acc_track_mask) == 0;
   }
   
   /*
@@@ -270,7 -294,7 +294,7 @@@ static void mark_mmio_spte(struct kvm_v
         u64 mask = generation_mmio_spte_mask(gen);
   
         access &= ACC_WRITE_MASK | ACC_USER_MASK;
-       mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
+       mask |= shadow_mmio_value | access | gfn << PAGE_SHIFT;
   
         trace_mark_mmio_spte(sptep, gfn, access, gen);
         mmu_spte_set(sptep, mask);
@@@ -278,7 -302,7 +302,7 @@@
   
   static bool is_mmio_spte(u64 spte)
   {
-       return (spte & shadow_mmio_mask) == shadow_mmio_mask;
+       return (spte & shadow_mmio_mask) == shadow_mmio_value;
   }
   
   static gfn_t get_mmio_spte_gfn(u64 spte)
@@@ -315,12 -339,20 +339,20 @@@ static bool check_mmio_spte(struct kvm_
         return likely(kvm_gen == spte_gen);
   }
   
+ /*
+  * Sets the shadow PTE masks used by the MMU.
+  *
+  * Assumptions:
+  *  - Setting either @accessed_mask or @dirty_mask requires setting both
+  *  - At least one of @accessed_mask or @acc_track_mask must be set
+  */
   void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
                 u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
                 u64 acc_track_mask)
   {
-       if (acc_track_mask != 0)
-               acc_track_mask |= SPTE_SPECIAL_MASK;
+       BUG_ON(!dirty_mask != !accessed_mask);
+       BUG_ON(!accessed_mask && !acc_track_mask);
+       BUG_ON(acc_track_mask & shadow_acc_track_value);
   
         shadow_user_mask = user_mask;
         shadow_accessed_mask = accessed_mask;
@@@ -329,7 -361,6 +361,6 @@@
         shadow_x_mask = x_mask;
         shadow_present_mask = p_mask;
         shadow_acc_track_mask = acc_track_mask;
-       WARN_ON(shadow_accessed_mask != 0 && shadow_acc_track_mask != 0);
   }
   EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
   
@@@ -549,7 -580,7 +580,7 @@@ static bool spte_has_volatile_bits(u64 
             is_access_track_spte(spte))
                 return true;
   
-       if (shadow_accessed_mask) {
+       if (spte_ad_enabled(spte)) {
                 if ((spte & shadow_accessed_mask) == 0 ||
                     (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0))
                         return true;
@@@ -560,14 -591,17 +591,17 @@@
   
   static bool is_accessed_spte(u64 spte)
   {
-       return shadow_accessed_mask ? spte & shadow_accessed_mask
-                                   : !is_access_track_spte(spte);
+       u64 accessed_mask = spte_shadow_accessed_mask(spte);
+ 
+       return accessed_mask ? spte & accessed_mask
+                            : !is_access_track_spte(spte);
   }
   
   static bool is_dirty_spte(u64 spte)
   {
-       return shadow_dirty_mask ? spte & shadow_dirty_mask
-                                : spte & PT_WRITABLE_MASK;
+       u64 dirty_mask = spte_shadow_dirty_mask(spte);
+ 
+       return dirty_mask ? spte & dirty_mask : spte & PT_WRITABLE_MASK;
   }
   
   /* Rules for using mmu_spte_set:
@@@ -707,10 -741,10 +741,10 @@@ static u64 mmu_spte_get_lockless(u64 *s
   
   static u64 mark_spte_for_access_track(u64 spte)
   {
-       if (shadow_accessed_mask != 0)
+       if (spte_ad_enabled(spte))
                 return spte & ~shadow_accessed_mask;
   
-       if (shadow_acc_track_mask == 0 || is_access_track_spte(spte))
+       if (is_access_track_spte(spte))
                 return spte;
   
         /*
@@@ -729,7 -763,6 +763,6 @@@
         spte |= (spte & shadow_acc_track_saved_bits_mask) <<
                 shadow_acc_track_saved_bits_shift;
         spte &= ~shadow_acc_track_mask;
-       spte |= shadow_acc_track_value;
   
         return spte;
   }
@@@ -741,6 -774,7 +774,7 @@@ static u64 restore_acc_track_spte(u64 s
         u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift)
                          & shadow_acc_track_saved_bits_mask;
   
+       WARN_ON_ONCE(spte_ad_enabled(spte));
         WARN_ON_ONCE(!is_access_track_spte(spte));
   
         new_spte &= ~shadow_acc_track_mask;
@@@ -759,7 -793,7 +793,7 @@@ static bool mmu_spte_age(u64 *sptep
         if (!is_accessed_spte(spte))
                 return false;
   
-       if (shadow_accessed_mask) {
+       if (spte_ad_enabled(spte)) {
                 clear_bit((ffs(shadow_accessed_mask) - 1),
                           (unsigned long *)sptep);
         } else {
@@@ -1390,6 -1424,22 +1424,22 @@@ static bool spte_clear_dirty(u64 *sptep
         return mmu_spte_update(sptep, spte);
   }
   
+ static bool wrprot_ad_disabled_spte(u64 *sptep)
+ {
+       bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT,
+                                              (unsigned long *)sptep);
+       if (was_writable)
+               kvm_set_pfn_dirty(spte_to_pfn(*sptep));
+ 
+       return was_writable;
+ }
+ 
+ /*
+  * Gets the GFN ready for another round of dirty logging by clearing the
+  *    - D bit on ad-enabled SPTEs, and
+  *    - W bit on ad-disabled SPTEs.
+  * Returns true iff any D or W bits were cleared.
+  */
   static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
   {
         u64 *sptep;
@@@ -1397,7 -1447,10 +1447,10 @@@
         bool flush = false;
   
         for_each_rmap_spte(rmap_head, &iter, sptep)
-               flush |= spte_clear_dirty(sptep);
+               if (spte_ad_enabled(*sptep))
+                       flush |= spte_clear_dirty(sptep);
+               else
+                       flush |= wrprot_ad_disabled_spte(sptep);
   
         return flush;
   }
@@@ -1420,7 -1473,8 +1473,8 @@@ static bool __rmap_set_dirty(struct kv
         bool flush = false;
   
         for_each_rmap_spte(rmap_head, &iter, sptep)
-               flush |= spte_set_dirty(sptep);
+               if (spte_ad_enabled(*sptep))
+                       flush |= spte_set_dirty(sptep);
   
         return flush;
   }
@@@ -1452,7 -1506,8 +1506,8 @@@ static void kvm_mmu_write_protect_pt_ma
   }
   
   /**
-  * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages
+  * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages, or write
+  * protect the page if the D-bit isn't supported.
    * @kvm: kvm instance
    * @slot: slot to clear D-bit
    * @gfn_offset: start of the BITS_PER_LONG pages we care about
@@@ -1766,18 -1821,9 +1821,9 @@@ static int kvm_test_age_rmapp(struct kv
         u64 *sptep;
         struct rmap_iterator iter;
   
-       /*
-        * If there's no access bit in the secondary pte set by the hardware and
-        * fast access tracking is also not enabled, it's up to gup-fast/gup to
-        * set the access bit in the primary pte or in the page structure.
-        */
-       if (!shadow_accessed_mask && !shadow_acc_track_mask)
-               goto out;
- 
         for_each_rmap_spte(rmap_head, &iter, sptep)
                 if (is_accessed_spte(*sptep))
                         return 1;
- out:
         return 0;
   }
   
@@@ -1798,18 -1844,6 +1844,6 @@@ static void rmap_recycle(struct kvm_vcp
   
   int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
   {
-       /*
-        * In case of absence of EPT Access and Dirty Bits supports,
-        * emulate the accessed bit for EPT, by checking if this page has
-        * an EPT mapping, and clearing it if it does. On the next access,
-        * a new EPT mapping will be established.
-        * This has some overhead, but not as much as the cost of swapping
-        * out actively used pages or breaking up actively used hugepages.
-        */
-       if (!shadow_accessed_mask && !shadow_acc_track_mask)
-               return kvm_handle_hva_range(kvm, start, end, 0,
-                                           kvm_unmap_rmapp);
- 
         return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
   }
   
@@@ -2398,7 -2432,12 +2432,12 @@@ static void link_shadow_page(struct kvm
         BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
   
         spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
-              shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
+              shadow_user_mask | shadow_x_mask;
+ 
+       if (sp_ad_disabled(sp))
+               spte |= shadow_acc_track_value;
+       else
+               spte |= shadow_accessed_mask;
   
         mmu_spte_set(sptep, spte);
   
@@@ -2666,10 -2705,15 +2705,15 @@@ static int set_spte(struct kvm_vcpu *vc
   {
         u64 spte = 0;
         int ret = 0;
+       struct kvm_mmu_page *sp;
   
         if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
                 return 0;
   
+       sp = page_header(__pa(sptep));
+       if (sp_ad_disabled(sp))
+               spte |= shadow_acc_track_value;
+ 
         /*
          * For the EPT case, shadow_present_mask is 0 if hardware
          * supports exec-only page table entries.  In that case,
@@@ -2678,7 -2722,7 +2722,7 @@@
          */
         spte |= shadow_present_mask;
         if (!speculative)
-               spte |= shadow_accessed_mask;
+               spte |= spte_shadow_accessed_mask(spte);
   
         if (pte_access & ACC_EXEC_MASK)
                 spte |= shadow_x_mask;
@@@ -2735,7 -2779,7 +2779,7 @@@
   
         if (pte_access & ACC_WRITE_MASK) {
                 kvm_vcpu_mark_page_dirty(vcpu, gfn);
-               spte |= shadow_dirty_mask;
+               spte |= spte_shadow_dirty_mask(spte);
         }
   
         if (speculative)
@@@ -2877,16 -2921,16 +2921,16 @@@ static void direct_pte_prefetch(struct 
   {
         struct kvm_mmu_page *sp;
   
+       sp = page_header(__pa(sptep));
+ 
         /*
-        * Since it's no accessed bit on EPT, it's no way to
-        * distinguish between actually accessed translations
-        * and prefetched, so disable pte prefetch if EPT is
-        * enabled.
+        * Without accessed bits, there's no way to distinguish between
+        * actually accessed translations and prefetched, so disable pte
+        * prefetch if accessed bits aren't available.
          */
-       if (!shadow_accessed_mask)
+       if (sp_ad_disabled(sp))
                 return;
   
-       sp = page_header(__pa(sptep));
         if (sp->role.level > PT_PAGE_TABLE_LEVEL)
                 return;
   
@@@ -3698,15 -3742,12 +3742,15 @@@ static int kvm_arch_setup_async_pf(stru
         return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
   }
   
- -static bool can_do_async_pf(struct kvm_vcpu *vcpu)
+ +bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
   {
         if (unlikely(!lapic_in_kernel(vcpu) ||
                      kvm_event_needs_reinjection(vcpu)))
                 return false;
   
+ +      if (is_guest_mode(vcpu))
+ +              return false;
+ +
         return kvm_x86_ops->interrupt_allowed(vcpu);
   }
   
@@@ -3722,7 -3763,7 +3766,7 @@@ static bool try_async_pf(struct kvm_vcp
         if (!async)
                 return false; /* *pfn has correct page already */
   
- -      if (!prefault && can_do_async_pf(vcpu)) {
+ +      if (!prefault && kvm_can_do_async_pf(vcpu)) {
                 trace_kvm_try_async_get_page(gva, gfn);
                 if (kvm_find_async_pf_gfn(vcpu, gfn)) {
                         trace_kvm_async_pf_doublefault(gva, gfn);
@@@ -4290,6 -4331,7 +4334,7 @@@ static void init_kvm_tdp_mmu(struct kvm
   
         context->base_role.word = 0;
         context->base_role.smm = is_smm(vcpu);
+       context->base_role.ad_disabled = (shadow_accessed_mask == 0);
         context->page_fault = tdp_page_fault;
         context->sync_page = nonpaging_sync_page;
         context->invlpg = nonpaging_invlpg;
@@@ -4377,6 -4419,7 +4422,7 @@@ void kvm_init_shadow_ept_mmu(struct kvm
         context->root_level = context->shadow_root_level;
         context->root_hpa = INVALID_PAGE;
         context->direct_map = false;
+       context->base_role.ad_disabled = !accessed_dirty;
   
         update_permission_bitmask(vcpu, context, true);
         update_pkru_bitmask(vcpu, context, true);
@@@ -4636,6 -4679,7 +4682,7 @@@ static void kvm_mmu_pte_write(struct kv
         mask.smep_andnot_wp = 1;
         mask.smap_andnot_wp = 1;
         mask.smm = 1;
+       mask.ad_disabled = 1;
   
         /*
          * If we don't have indirect shadow pages, it means no page is
diff --combined arch/x86/kvm/mmu.h

index 330bf3a811fb07271de382b598be402c871f6496,41d362e95681c5de61f89d80037114bfaa04537e..a276834950c14a15681c9d125ddde8e9b9dc6af8
--- 1/arch/x86/kvm/mmu.h
--- 2/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@@ -51,7 -51,7 +51,7 @@@ static inline u64 rsvd_bits(int s, int 
         return ((1ULL << (e - s + 1)) - 1) << s;
   }
   
- void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
+ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
   
   void
   reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
@@@ -76,7 -76,6 +76,7 @@@ int handle_mmio_page_fault(struct kvm_v
   void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
   void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
                              bool accessed_dirty);
+ +bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
   
   static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
   {
diff --combined arch/x86/kvm/svm.c

index 33460fcdeef9e7ae51b673704c242b4226eef4d1,03df7c1da581a7317c4e566aa0db5e28f86134c6..905ea6052517fef7a09bf82f396222ade76be2a5
--- 1/arch/x86/kvm/svm.c
--- 2/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@@ -36,7 -36,6 +36,7 @@@
   #include <linux/slab.h>
   #include <linux/amd-iommu.h>
   #include <linux/hashtable.h>
+ +#include <linux/frame.h>
   
   #include <asm/apic.h>
   #include <asm/perf_event.h>
@@@ -190,6 -189,7 +190,7 @@@ struct vcpu_svm 
         struct nested_state nested;
   
         bool nmi_singlestep;
+       u64 nmi_singlestep_guest_rflags;
   
         unsigned int3_injected;
         unsigned long int3_rip;
@@@ -964,6 -964,18 +965,18 @@@ static void svm_disable_lbrv(struct vcp
         set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
   }
   
+ static void disable_nmi_singlestep(struct vcpu_svm *svm)
+ {
+       svm->nmi_singlestep = false;
+       if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
+               /* Clear our flags if they were not set by the guest */
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+                       svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+                       svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
+       }
+ }
+ 
   /* Note:
    * This hash table is used to map VM_ID to a struct kvm_arch,
    * when handling AMD IOMMU GALOG notification to schedule in
@@@ -1713,11 -1725,24 +1726,24 @@@ static void svm_vcpu_unblocking(struct 
   
   static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
   {
-       return to_svm(vcpu)->vmcb->save.rflags;
+       struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned long rflags = svm->vmcb->save.rflags;
+ 
+       if (svm->nmi_singlestep) {
+               /* Hide our flags if they were not set by the guest */
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+                       rflags &= ~X86_EFLAGS_TF;
+               if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+                       rflags &= ~X86_EFLAGS_RF;
+       }
+       return rflags;
   }
   
   static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
   {
+       if (to_svm(vcpu)->nmi_singlestep)
+               rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+ 
          /*
           * Any change of EFLAGS.VM is accompanied by a reload of SS
           * (caused by either a task switch or an inter-privilege IRET),
@@@ -2112,10 -2137,7 +2138,7 @@@ static int db_interception(struct vcpu_
         }
   
         if (svm->nmi_singlestep) {
-               svm->nmi_singlestep = false;
-               if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
-                       svm->vmcb->save.rflags &=
-                               ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+               disable_nmi_singlestep(svm);
         }
   
         if (svm->vcpu.guest_debug &
@@@ -2370,8 -2392,8 +2393,8 @@@ static void nested_svm_uninit_mmu_conte
   
   static int nested_svm_check_permissions(struct vcpu_svm *svm)
   {
-       if (!(svm->vcpu.arch.efer & EFER_SVME)
-           || !is_paging(&svm->vcpu)) {
+       if (!(svm->vcpu.arch.efer & EFER_SVME) ||
+           !is_paging(&svm->vcpu)) {
                 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
                 return 1;
         }
@@@ -2381,7 -2403,7 +2404,7 @@@
                 return 1;
         }
   
-        return 0;
+       return 0;
   }
   
   static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
@@@ -2534,6 -2556,31 +2557,31 @@@ static int nested_svm_exit_handled_msr(
         return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
   }
   
+ /* DB exceptions for our internal use must not cause vmexit */
+ static int nested_svm_intercept_db(struct vcpu_svm *svm)
+ {
+       unsigned long dr6;
+ 
+       /* if we're not singlestepping, it's not ours */
+       if (!svm->nmi_singlestep)
+               return NESTED_EXIT_DONE;
+ 
+       /* if it's not a singlestep exception, it's not ours */
+       if (kvm_get_dr(&svm->vcpu, 6, &dr6))
+               return NESTED_EXIT_DONE;
+       if (!(dr6 & DR6_BS))
+               return NESTED_EXIT_DONE;
+ 
+       /* if the guest is singlestepping, it should get the vmexit */
+       if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
+               disable_nmi_singlestep(svm);
+               return NESTED_EXIT_DONE;
+       }
+ 
+       /* it's ours, the nested hypervisor must not see this one */
+       return NESTED_EXIT_HOST;
+ }
+ 
   static int nested_svm_exit_special(struct vcpu_svm *svm)
   {
         u32 exit_code = svm->vmcb->control.exit_code;
@@@ -2589,8 -2636,12 +2637,12 @@@ static int nested_svm_intercept(struct 
         }
         case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
                 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
-               if (svm->nested.intercept_exceptions & excp_bits)
-                       vmexit = NESTED_EXIT_DONE;
+               if (svm->nested.intercept_exceptions & excp_bits) {
+                       if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
+                               vmexit = nested_svm_intercept_db(svm);
+                       else
+                               vmexit = NESTED_EXIT_DONE;
+               }
                 /* async page fault always cause vmexit */
                 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
                          svm->apf_reason != 0)
@@@ -4627,10 -4678,17 +4679,17 @@@ static void enable_nmi_window(struct kv
             == HF_NMI_MASK)
                 return; /* IRET will cause a vm exit */
   
+       if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0)
+               return; /* STGI will cause a vm exit */
+ 
+       if (svm->nested.exit_required)
+               return; /* we're not going to run the guest yet */
+ 
         /*
          * Something prevents NMI from been injected. Single step over possible
          * problem (IRET or exception injection or interrupt shadow)
          */
+       svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
         svm->nmi_singlestep = true;
         svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
   }
@@@ -4771,6 -4829,22 +4830,22 @@@ static void svm_vcpu_run(struct kvm_vcp
         if (unlikely(svm->nested.exit_required))
                 return;
   
+       /*
+        * Disable singlestep if we're injecting an interrupt/exception.
+        * We don't want our modified rflags to be pushed on the stack where
+        * we might not be able to easily reset them if we disabled NMI
+        * singlestep later.
+        */
+       if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
+               /*
+                * Event injection happens before external interrupts cause a
+                * vmexit and interrupts are disabled here, so smp_send_reschedule
+                * is enough to force an immediate vmexit.
+                */
+               disable_nmi_singlestep(svm);
+               smp_send_reschedule(vcpu->cpu);
+       }
+ 
         pre_svm_run(svm);
   
         sync_lapic_to_cr8(vcpu);
@@@ -4907,7 -4981,6 +4982,7 @@@
   
         mark_all_clean(svm->vmcb);
   }
+ +STACK_FRAME_NON_STANDARD(svm_vcpu_run);
   
   static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
   {
diff --combined arch/x86/kvm/vmx.c

index 6dcc4873e435c7357892e9dcd200c3c4c9b3688a,b4cfdcfdc1c1f9c965bac66b3cea384c2a6a3dfa..f76efad248aba0dc02bce77a4cd984343d181d79
--- 1/arch/x86/kvm/vmx.c
--- 2/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@@ -33,7 -33,6 +33,7 @@@
   #include <linux/slab.h>
   #include <linux/tboot.h>
   #include <linux/hrtimer.h>
+ +#include <linux/frame.h>
   #include "kvm_cache_regs.h"
   #include "x86.h"
   
@@@ -49,7 -48,6 +49,7 @@@
   #include <asm/kexec.h>
   #include <asm/apic.h>
   #include <asm/irq_remapping.h>
+ +#include <asm/mmu_context.h>
   
   #include "trace.h"
   #include "pmu.h"
@@@ -598,7 -596,6 +598,7 @@@ struct vcpu_vmx 
                 int           gs_ldt_reload_needed;
                 int           fs_reload_needed;
                 u64           msr_host_bndcfgs;
+ +              unsigned long vmcs_host_cr3;    /* May not match real cr3 */
                 unsigned long vmcs_host_cr4;    /* May not match real cr4 */
         } host_state;
         struct {
@@@ -913,8 -910,9 +913,9 @@@ static void nested_release_page_clean(s
         kvm_release_page_clean(page);
   }
   
+ static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
   static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
- static u64 construct_eptp(unsigned long root_hpa);
+ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
   static bool vmx_xsaves_supported(void);
   static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
   static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@@ -2428,7 -2426,7 +2429,7 @@@ static int nested_vmx_check_exception(s
         if (!(vmcs12->exception_bitmap & (1u << nr)))
                 return 0;
   
- -      nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
+ +      nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
                           vmcs_read32(VM_EXIT_INTR_INFO),
                           vmcs_readl(EXIT_QUALIFICATION));
         return 1;
@@@ -2772,7 -2770,7 +2773,7 @@@ static void nested_vmx_setup_ctls_msrs(
                 if (enable_ept_ad_bits) {
                         vmx->nested.nested_vmx_secondary_ctls_high |=
                                 SECONDARY_EXEC_ENABLE_PML;
-                      vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
+                       vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
                 }
         } else
                 vmx->nested.nested_vmx_ept_caps = 0;
@@@ -3198,7 -3196,8 +3199,8 @@@ static int vmx_get_msr(struct kvm_vcpu 
                 msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
                 break;
         case MSR_IA32_BNDCFGS:
-               if (!kvm_mpx_supported())
+               if (!kvm_mpx_supported() ||
+                   (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
                         return 1;
                 msr_info->data = vmcs_read64(GUEST_BNDCFGS);
                 break;
@@@ -3280,7 -3279,11 +3282,11 @@@ static int vmx_set_msr(struct kvm_vcpu 
                 vmcs_writel(GUEST_SYSENTER_ESP, data);
                 break;
         case MSR_IA32_BNDCFGS:
-               if (!kvm_mpx_supported())
+               if (!kvm_mpx_supported() ||
+                   (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
+                       return 1;
+               if (is_noncanonical_address(data & PAGE_MASK) ||
+                   (data & MSR_IA32_BNDCFGS_RSVD))
                         return 1;
                 vmcs_write64(GUEST_BNDCFGS, data);
                 break;
@@@ -4013,7 -4016,7 +4019,7 @@@ static inline void __vmx_flush_tlb(stru
         if (enable_ept) {
                 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
                         return;
-               ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
+               ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
         } else {
                 vpid_sync_context(vpid);
         }
@@@ -4188,14 -4191,15 +4194,15 @@@ static void vmx_set_cr0(struct kvm_vcp
         vmx->emulation_required = emulation_required(vcpu);
   }
   
- static u64 construct_eptp(unsigned long root_hpa)
+ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
   {
         u64 eptp;
   
         /* TODO write the value reading from MSR */
         eptp = VMX_EPT_DEFAULT_MT |
                 VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
-       if (enable_ept_ad_bits)
+       if (enable_ept_ad_bits &&
+           (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
                 eptp |= VMX_EPT_AD_ENABLE_BIT;
         eptp |= (root_hpa & PAGE_MASK);
   
@@@ -4209,7 -4213,7 +4216,7 @@@ static void vmx_set_cr3(struct kvm_vcp
   
         guest_cr3 = cr3;
         if (enable_ept) {
-               eptp = construct_eptp(cr3);
+               eptp = construct_eptp(vcpu, cr3);
                 vmcs_write64(EPT_POINTER, eptp);
                 if (is_paging(vcpu) || is_guest_mode(vcpu))
                         guest_cr3 = kvm_read_cr3(vcpu);
@@@ -5015,19 -5019,12 +5022,19 @@@ static void vmx_set_constant_host_state
         u32 low32, high32;
         unsigned long tmpl;
         struct desc_ptr dt;
- -      unsigned long cr0, cr4;
+ +      unsigned long cr0, cr3, cr4;
   
         cr0 = read_cr0();
         WARN_ON(cr0 & X86_CR0_TS);
         vmcs_writel(HOST_CR0, cr0);  /* 22.2.3 */
- -      vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
+ +
+ +      /*
+ +       * Save the most likely value for this task's CR3 in the VMCS.
+ +       * We can't use __get_current_cr3_fast() because we're not atomic.
+ +       */
+ +      cr3 = __read_cr3();
+ +      vmcs_writel(HOST_CR3, cr3);             /* 22.2.3  FIXME: shadow tables */
+ +      vmx->host_state.vmcs_host_cr3 = cr3;
   
         /* Save the most likely value for this task's CR4 in the VMCS. */
         cr4 = cr4_read_shadow();
@@@ -5170,7 -5167,8 +5177,8 @@@ static void ept_set_mmio_spte_mask(void
          * EPT Misconfigurations can be generated if the value of bits 2:0
          * of an EPT paging-structure entry is 110b (write/execute).
          */
-       kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE);
+       kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
+                                  VMX_EPT_MISCONFIG_WX_VALUE);
   }
   
   #define VMX_XSS_EXIT_BITMAP 0
@@@ -6220,17 -6218,6 +6228,6 @@@ static int handle_ept_violation(struct 
   
         exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
   
-       if (is_guest_mode(vcpu)
-           && !(exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) {
-               /*
-                * Fix up exit_qualification according to whether guest
-                * page table accesses are reads or writes.
-                */
-               u64 eptp = nested_ept_get_cr3(vcpu);
-               if (!(eptp & VMX_EPT_AD_ENABLE_BIT))
-                       exit_qualification &= ~EPT_VIOLATION_ACC_WRITE;
-       }
- 
         /*
          * EPT violation happened while executing iret from NMI,
          * "blocked by NMI" bit has to be set before next VM entry.
@@@ -6453,7 -6440,7 +6450,7 @@@ void vmx_enable_tdp(void
                 enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
                 0ull, VMX_EPT_EXECUTABLE_MASK,
                 cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
-               enable_ept_ad_bits ? 0ull : VMX_EPT_RWX_MASK);
+               VMX_EPT_RWX_MASK);
   
         ept_set_mmio_spte_mask();
         kvm_enable_tdp();
@@@ -6557,7 -6544,6 +6554,6 @@@ static __init int hardware_setup(void
         vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
         vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
         vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
   
         memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
                         vmx_msr_bitmap_legacy, PAGE_SIZE);
@@@ -7661,7 -7647,10 +7657,10 @@@ static int handle_invvpid(struct kvm_vc
         unsigned long type, types;
         gva_t gva;
         struct x86_exception e;
-       int vpid;
+       struct {
+               u64 vpid;
+               u64 gla;
+       } operand;
   
         if (!(vmx->nested.nested_vmx_secondary_ctls_high &
               SECONDARY_EXEC_ENABLE_VPID) ||
@@@ -7691,17 -7680,28 +7690,28 @@@
         if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
                         vmx_instruction_info, false, &gva))
                 return 1;
-       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid,
-                               sizeof(u32), &e)) {
+       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
+                               sizeof(operand), &e)) {
                 kvm_inject_page_fault(vcpu, &e);
                 return 1;
         }
+       if (operand.vpid >> 16) {
+               nested_vmx_failValid(vcpu,
+                       VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+               return kvm_skip_emulated_instruction(vcpu);
+       }
   
         switch (type) {
         case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
+               if (is_noncanonical_address(operand.gla)) {
+                       nested_vmx_failValid(vcpu,
+                               VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+                       return kvm_skip_emulated_instruction(vcpu);
+               }
+               /* fall through */
         case VMX_VPID_EXTENT_SINGLE_CONTEXT:
         case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
-               if (!vpid) {
+               if (!operand.vpid) {
                         nested_vmx_failValid(vcpu,
                                 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
                         return kvm_skip_emulated_instruction(vcpu);
@@@ -8662,7 -8662,6 +8672,7 @@@ static void vmx_handle_external_intr(st
                         );
         }
   }
+ +STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
   
   static bool vmx_has_high_real_mode_segbase(void)
   {
@@@ -8831,7 -8830,7 +8841,7 @@@ static void vmx_arm_hv_timer(struct kvm
   static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
- -      unsigned long debugctlmsr, cr4;
+ +      unsigned long debugctlmsr, cr3, cr4;
   
         /* Don't enter VMX if guest state is invalid, let the exit handler
            start emulation until we arrive back to a valid state */
@@@ -8853,12 -8852,6 +8863,12 @@@
         if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
                 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
   
+ +      cr3 = __get_current_cr3_fast();
+ +      if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
+ +              vmcs_writel(HOST_CR3, cr3);
+ +              vmx->host_state.vmcs_host_cr3 = cr3;
+ +      }
+ +
         cr4 = cr4_read_shadow();
         if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
                 vmcs_writel(HOST_CR4, cr4);
@@@ -9045,7 -9038,6 +9055,7 @@@
         vmx_recover_nmi_blocking(vmx);
         vmx_complete_interrupts(vmx);
   }
+ +STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
   
   static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
   {
@@@ -9394,6 -9386,11 +9404,11 @@@ static void nested_ept_inject_page_faul
         vmcs12->guest_physical_address = fault->address;
   }
   
+ static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
+ {
+       return nested_ept_get_cr3(vcpu) & VMX_EPT_AD_ENABLE_BIT;
+ }
+ 
   /* Callbacks for nested_ept_init_mmu_context: */
   
   static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
@@@ -9404,18 -9401,18 +9419,18 @@@
   
   static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
   {
-       u64 eptp;
+       bool wants_ad;
   
         WARN_ON(mmu_is_nested(vcpu));
-       eptp = nested_ept_get_cr3(vcpu);
-       if ((eptp & VMX_EPT_AD_ENABLE_BIT) && !enable_ept_ad_bits)
+       wants_ad = nested_ept_ad_enabled(vcpu);
+       if (wants_ad && !enable_ept_ad_bits)
                 return 1;
   
         kvm_mmu_unload(vcpu);
         kvm_init_shadow_ept_mmu(vcpu,
                         to_vmx(vcpu)->nested.nested_vmx_ept_caps &
                         VMX_EPT_EXECUTE_ONLY_BIT,
-                       eptp & VMX_EPT_AD_ENABLE_BIT);
+                       wants_ad);
         vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
         vcpu->arch.mmu.get_cr3           = nested_ept_get_cr3;
         vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
@@@ -10728,8 -10725,7 +10743,7 @@@ static void sync_vmcs12(struct kvm_vcp
                 vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
         }
   
-       if (nested_cpu_has_ept(vmcs12))
-               vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
+       vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
   
         if (nested_cpu_has_vid(vmcs12))
                 vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
@@@ -10754,8 -10750,6 +10768,6 @@@
         vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
         if (kvm_mpx_supported())
                 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
-       if (nested_cpu_has_xsaves(vmcs12))
-               vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
   }
   
   /*
@@@ -11152,7 -11146,8 +11164,8 @@@ static int vmx_set_hv_timer(struct kvm_
         vmx->hv_deadline_tsc = tscl + delta_tsc;
         vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
                         PIN_BASED_VMX_PREEMPTION_TIMER);
-       return 0;
+ 
+       return delta_tsc == 0;
   }
   
   static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
diff --combined arch/x86/kvm/x86.c

index 0e846f0cb83bb214811d0a12d2f700cc96a455f9,3a12b879f5424078905030b6de6d087aacbaf76a..6c7266f7766dcb6ec02b13b9b1439c9f9d547071
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -2841,10 -2841,10 +2841,10 @@@ void kvm_arch_vcpu_load(struct kvm_vcp
                         kvm_vcpu_write_tsc_offset(vcpu, offset);
                         vcpu->arch.tsc_catchup = 1;
                 }
-               if (kvm_lapic_hv_timer_in_use(vcpu) &&
-                               kvm_x86_ops->set_hv_timer(vcpu,
-                                       kvm_get_lapic_target_expiration_tsc(vcpu)))
-                       kvm_lapic_switch_to_sw_timer(vcpu);
+ 
+               if (kvm_lapic_hv_timer_in_use(vcpu))
+                       kvm_lapic_restart_hv_timer(vcpu);
+ 
                 /*
                  * On a host with synchronized TSC, there is no need to update
                  * kvmclock on vcpu->cpu migration
@@@ -5313,8 -5313,6 +5313,8 @@@ static void init_emulate_ctxt(struct kv
         kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
   
         ctxt->eflags = kvm_get_rflags(vcpu);
+ +      ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+ +
         ctxt->eip = kvm_rip_read(vcpu);
         ctxt->mode = (!is_protmode(vcpu))               ? X86EMUL_MODE_REAL :
                      (ctxt->eflags & X86_EFLAGS_VM)     ? X86EMUL_MODE_VM86 :
@@@ -5530,25 -5528,36 +5530,25 @@@ static int kvm_vcpu_check_hw_bp(unsigne
         return dr6;
   }
   
- -static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+ +static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
   {
         struct kvm_run *kvm_run = vcpu->run;
   
- -      /*
- -       * rflags is the old, "raw" value of the flags.  The new value has
- -       * not been saved yet.
- -       *
- -       * This is correct even for TF set by the guest, because "the
- -       * processor will not generate this exception after the instruction
- -       * that sets the TF flag".
- -       */
- -      if (unlikely(rflags & X86_EFLAGS_TF)) {
- -              if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- -                      kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
- -                                                DR6_RTM;
- -                      kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
- -                      kvm_run->debug.arch.exception = DB_VECTOR;
- -                      kvm_run->exit_reason = KVM_EXIT_DEBUG;
- -                      *r = EMULATE_USER_EXIT;
- -              } else {
- -                      /*
- -                       * "Certain debug exceptions may clear bit 0-3.  The
- -                       * remaining contents of the DR6 register are never
- -                       * cleared by the processor".
- -                       */
- -                      vcpu->arch.dr6 &= ~15;
- -                      vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
- -                      kvm_queue_exception(vcpu, DB_VECTOR);
- -              }
+ +      if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ +              kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+ +              kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+ +              kvm_run->debug.arch.exception = DB_VECTOR;
+ +              kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ +              *r = EMULATE_USER_EXIT;
+ +      } else {
+ +              /*
+ +               * "Certain debug exceptions may clear bit 0-3.  The
+ +               * remaining contents of the DR6 register are never
+ +               * cleared by the processor".
+ +               */
+ +              vcpu->arch.dr6 &= ~15;
+ +              vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+ +              kvm_queue_exception(vcpu, DB_VECTOR);
         }
   }
   
@@@ -5558,17 -5567,7 +5558,17 @@@ int kvm_skip_emulated_instruction(struc
         int r = EMULATE_DONE;
   
         kvm_x86_ops->skip_emulated_instruction(vcpu);
- -      kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ +
+ +      /*
+ +       * rflags is the old, "raw" value of the flags.  The new value has
+ +       * not been saved yet.
+ +       *
+ +       * This is correct even for TF set by the guest, because "the
+ +       * processor will not generate this exception after the instruction
+ +       * that sets the TF flag".
+ +       */
+ +      if (unlikely(rflags & X86_EFLAGS_TF))
+ +              kvm_vcpu_do_singlestep(vcpu, &r);
         return r == EMULATE_DONE;
   }
   EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
@@@ -5727,9 -5726,8 +5727,9 @@@ restart
                 toggle_interruptibility(vcpu, ctxt->interruptibility);
                 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
                 kvm_rip_write(vcpu, ctxt->eip);
- -              if (r == EMULATE_DONE)
- -                      kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ +              if (r == EMULATE_DONE &&
+ +                  (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+ +                      kvm_vcpu_do_singlestep(vcpu, &r);
                 if (!ctxt->have_exception ||
                     exception_type(ctxt->exception.vector) == EXCPT_TRAP)
                         __kvm_set_rflags(vcpu, ctxt->eflags);
@@@ -6011,7 -6009,7 +6011,7 @@@ static void kvm_set_mmio_spte_mask(void
                 mask &= ~1ull;
   #endif
   
-       kvm_mmu_set_mmio_spte_mask(mask);
+       kvm_mmu_set_mmio_spte_mask(mask, mask);
   }
   
   #ifdef CONFIG_X86_64
@@@ -6733,7 -6731,7 +6733,7 @@@ static int vcpu_enter_guest(struct kvm_
   
         bool req_immediate_exit = false;
   
-       if (vcpu->requests) {
+       if (kvm_request_pending(vcpu)) {
                 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
                         kvm_mmu_unload(vcpu);
                 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@@ -6897,7 -6895,7 +6897,7 @@@
                         kvm_x86_ops->sync_pir_to_irr(vcpu);
         }
   
-       if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
+       if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
             || need_resched() || signal_pending(current)) {
                 vcpu->mode = OUTSIDE_GUEST_MODE;
                 smp_wmb();
@@@ -8609,7 -8607,8 +8609,7 @@@ bool kvm_arch_can_inject_async_page_pre
         if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
                 return true;
         else
- -              return !kvm_event_needs_reinjection(vcpu) &&
- -                      kvm_x86_ops->interrupt_allowed(vcpu);
+ +              return kvm_can_do_async_pf(vcpu);
   }
   
   void kvm_arch_start_assignment(struct kvm *kvm)
diff --combined virt/kvm/arm/mmu.c

index 1c44aa35f909dadbc8dd7cf0ab0c54fa81347e21,f2d5b6cf06ae24fc9cf8118432e61c92e120434f..0e1fc75f3585774b7b885cf3f52eda59a4fd299f
--- 1/virt/kvm/arm/mmu.c
--- 2/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@@ -20,6 -20,7 +20,7 @@@
   #include <linux/kvm_host.h>
   #include <linux/io.h>
   #include <linux/hugetlb.h>
+ #include <linux/sched/signal.h>
   #include <trace/events/kvm.h>
   #include <asm/pgalloc.h>
   #include <asm/cacheflush.h>
@@@ -29,7 -30,6 +30,7 @@@
   #include <asm/kvm_asm.h>
   #include <asm/kvm_emulate.h>
   #include <asm/virt.h>
+ +#include <asm/system_misc.h>
   
   #include "trace.h"
   
@@@ -1262,6 -1262,24 +1263,24 @@@ static void coherent_cache_guest_page(s
         __coherent_cache_guest_page(vcpu, pfn, size);
   }
   
+ static void kvm_send_hwpoison_signal(unsigned long address,
+                                    struct vm_area_struct *vma)
+ {
+       siginfo_t info;
+ 
+       info.si_signo   = SIGBUS;
+       info.si_errno   = 0;
+       info.si_code    = BUS_MCEERR_AR;
+       info.si_addr    = (void __user *)address;
+ 
+       if (is_vm_hugetlb_page(vma))
+               info.si_addr_lsb = huge_page_shift(hstate_vma(vma));
+       else
+               info.si_addr_lsb = PAGE_SHIFT;
+ 
+       send_sig_info(SIGBUS, &info, current);
+ }
+ 
   static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                           struct kvm_memory_slot *memslot, unsigned long hva,
                           unsigned long fault_status)
@@@ -1331,6 -1349,10 +1350,10 @@@
         smp_rmb();
   
         pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
+       if (pfn == KVM_PFN_ERR_HWPOISON) {
+               kvm_send_hwpoison_signal(hva, vma);
+               return 0;
+       }
         if (is_error_noslot_pfn(pfn))
                 return -EFAULT;
   
@@@ -1431,25 -1453,6 +1454,25 @@@ out
                 kvm_set_pfn_accessed(pfn);
   }
   
+ +static bool is_abort_sea(unsigned long fault_status)
+ +{
+ +      switch (fault_status) {
+ +      case FSC_SEA:
+ +      case FSC_SEA_TTW0:
+ +      case FSC_SEA_TTW1:
+ +      case FSC_SEA_TTW2:
+ +      case FSC_SEA_TTW3:
+ +      case FSC_SECC:
+ +      case FSC_SECC_TTW0:
+ +      case FSC_SECC_TTW1:
+ +      case FSC_SECC_TTW2:
+ +      case FSC_SECC_TTW3:
+ +              return true;
+ +      default:
+ +              return false;
+ +      }
+ +}
+ +
   /**
    * kvm_handle_guest_abort - handles all 2nd stage aborts
    * @vcpu:     the VCPU pointer
@@@ -1472,29 -1475,19 +1495,29 @@@ int kvm_handle_guest_abort(struct kvm_v
         gfn_t gfn;
         int ret, idx;
   
+ +      fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
+ +
+ +      fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+ +
+ +      /*
+ +       * The host kernel will handle the synchronous external abort. There
+ +       * is no need to pass the error into the guest.
+ +       */
+ +      if (is_abort_sea(fault_status)) {
+ +              if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
+ +                      return 1;
+ +      }
+ +
         is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
         if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) {
                 kvm_inject_vabt(vcpu);
                 return 1;
         }
   
- -      fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
- -
         trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
                               kvm_vcpu_get_hfar(vcpu), fault_ipa);
   
         /* Check the stage-2 fault is trans. fault or write fault */
- -      fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
         if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
             fault_status != FSC_ACCESS) {
                 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 7 Jul 2017 01:38:31 +0000 (18:38 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 7 Jul 2017 01:38:31 +0000 (18:38 -0700)
		1	2
Documentation/admin-guide/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/esr.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_hv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/gaccess.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/interrupt.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/kvm-s390.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/priv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/msr-index.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/emulate.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/mmu.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/vmx.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/arm/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history