2 files changed, 111 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index fa73bbcb0c8d..4964bb2e931e 100644
+index 9f2bcb8b7f96..a60a4220be95 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -4209,6 +4209,15 @@
+@@ -4281,6 +4281,15 @@
Also, it enforces the PCI Local Bus spec
rule that those bits should be 0 in system reset
events (useful for kexec/kdump cases).
Safety option to keep boot IRQs enabled. This
should never be necessary.
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index 592e1c4ae697..aebf6f412203 100644
+index 92302d5749d8..71387b9aca33 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
-@@ -194,6 +194,106 @@ static int __init pci_apply_final_quirks(void)
+@@ -287,6 +287,106 @@ static int __init pci_apply_final_quirks(void)
}
fs_initcall_sync(pci_apply_final_quirks);
/*
* Decoding should be disabled for a PCI device during BAR sizing to avoid
* conflict. But doing so may cause problems on host bridge and perhaps other
-@@ -4974,6 +5074,8 @@ static const struct pci_dev_acs_enabled {
+@@ -5069,6 +5169,8 @@ static const struct pci_dev_acs_enabled {
{ PCI_VENDOR_ID_CAVIUM, 0xA060, pci_quirk_mf_endpoint_acs },
/* APM X-Gene */
{ PCI_VENDOR_ID_AMCC, 0xE004, pci_quirk_xgene_acs },
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
-index 73fad57408f7..99ae3e468ce6 100644
+index 5bbb5612b207..691ce10e7647 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
-@@ -79,7 +79,7 @@ module_param(halt_poll_ns, uint, 0644);
+@@ -82,7 +82,7 @@ module_param(halt_poll_ns, uint, 0644);
EXPORT_SYMBOL_GPL(halt_poll_ns);
/* Default doubles per-vcpu halt_poll_ns. */
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/dev.c b/net/core/dev.c
-index 555bbe774734..de2e0d0185fc 100644
+index 69a3e544676c..56a45b9b602e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
-@@ -10262,7 +10262,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
+@@ -10269,7 +10269,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
if (time_after(jiffies, warning_time +
READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
list_for_each_entry(dev, list, todo_list) {
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
include/linux/fortify-string.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
-index 7cad8bb031e9..acc24887db3e 100644
+index da51a83b2829..9d9e7822eddf 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -18,7 +18,7 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning("
- char *__p = (char *)(p); \
+ unsigned char *__p = (unsigned char *)(p); \
size_t __ret = SIZE_MAX; \
- size_t __p_size = __member_size(p); \
+ const size_t __p_size = __member_size(p); \
if (__p_size != SIZE_MAX && \
3 files changed, 21 insertions(+)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
-index 7ccdf991d18e..61aefeb3fdbc 100644
+index d3432687c9e6..2c20da9aa2ac 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
-@@ -251,6 +251,12 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
+@@ -249,6 +249,12 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
}
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index ee603f4edce1..ff92ff41d5ce 100644
+index c381770bcbf1..6690a3722007 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
-@@ -5342,6 +5342,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
+@@ -5413,6 +5413,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return 0;
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Sean Christopherson <seanjc@google.com>
-Date: Wed, 23 Aug 2023 18:01:04 -0700
-Subject: [PATCH] KVM: x86/mmu: Fix an sign-extension bug with mmu_seq that
- hangs vCPUs
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Upstream commit ba6e3fe25543 ("KVM: x86/mmu: Grab mmu_invalidate_seq in
-kvm_faultin_pfn()") unknowingly fixed the bug in v6.3 when refactoring
-how KVM tracks the sequence counter snapshot.
-
-Take the vCPU's mmu_seq snapshot as an "unsigned long" instead of an "int"
-when checking to see if a page fault is stale, as the sequence count is
-stored as an "unsigned long" everywhere else in KVM. This fixes a bug
-where KVM will effectively hang vCPUs due to always thinking page faults
-are stale, which results in KVM refusing to "fix" faults.
-
-mmu_invalidate_seq (née mmu_notifier_seq) is a sequence counter used when
-KVM is handling page faults to detect if userspace mappings relevant to
-the guest were invalidated between snapshotting the counter and acquiring
-mmu_lock, i.e. to ensure that the userspace mapping KVM is using to
-resolve the page fault is fresh. If KVM sees that the counter has
-changed, KVM simply resumes the guest without fixing the fault.
-
-What _should_ happen is that the source of the mmu_notifier invalidations
-eventually goes away, mmu_invalidate_seq becomes stable, and KVM can once
-again fix guest page fault(s).
-
-But for a long-lived VM and/or a VM that the host just doesn't particularly
-like, it's possible for a VM to be on the receiving end of 2 billion (with
-a B) mmu_notifier invalidations. When that happens, bit 31 will be set in
-mmu_invalidate_seq. This causes the value to be turned into a 32-bit
-negative value when implicitly cast to an "int" by is_page_fault_stale(),
-and then sign-extended into a 64-bit unsigned when the signed "int" is
-implicitly cast back to an "unsigned long" on the call to
-mmu_invalidate_retry_hva().
-
-As a result of the casting and sign-extension, given a sequence counter of
-e.g. 0x8002dc25, mmu_invalidate_retry_hva() ends up doing
-
- if (0x8002dc25 != 0xffffffff8002dc25)
-
-and signals that the page fault is stale and needs to be retried even
-though the sequence counter is stable, and KVM effectively hangs any vCPU
-that takes a page fault (EPT violation or #NPF when TDP is enabled).
-
-Reported-by: Brian Rak <brak@vultr.com>
-Reported-by: Amaan Cheval <amaan.cheval@gmail.com>
-Reported-by: Eric Wheeler <kvm@lists.ewheeler.net>
-Closes: https://lore.kernel.org/all/f023d927-52aa-7e08-2ee5-59a2fbc65953@gameservers.com
-Fixes: a955cad84cda ("KVM: x86/mmu: Retry page fault if root is invalidated by memslot update")
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-(cherry-picked from commit 82d811ff566594de3676f35808e8a9e19c5c864c in stable v6.1.51)
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
----
- arch/x86/kvm/mmu/mmu.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
-index 3220c1285984..c42ba5cde7a4 100644
---- a/arch/x86/kvm/mmu/mmu.c
-+++ b/arch/x86/kvm/mmu/mmu.c
-@@ -4261,7 +4261,8 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
- * root was invalidated by a memslot update or a relevant mmu_notifier fired.
- */
- static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
-- struct kvm_page_fault *fault, int mmu_seq)
-+ struct kvm_page_fault *fault,
-+ unsigned long mmu_seq)
- {
- struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa);
-
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: kiler129 <grzegorz@noflash.pl>
+Date: Mon, 18 Sep 2023 15:19:26 +0200
+Subject: [PATCH] allow opt-in to allow pass-through on broken hardware..
+
+adapted from https://github.com/kiler129/relax-intel-rmrr , licensed under MIT or GPL 2.0+
+
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/iommu/intel/iommu.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
+index a377f8e0a414..3be334d34317 100644
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -298,6 +298,7 @@ static int dmar_map_gfx = 1;
+ static int dmar_map_ipu = 1;
+ static int intel_iommu_superpage = 1;
+ static int iommu_identity_mapping;
++static int intel_relaxable_rmrr = 0;
+ static int iommu_skip_te_disable;
+
+ #define IDENTMAP_GFX 2
+@@ -359,6 +360,9 @@ static int __init intel_iommu_setup(char *str)
+ } else if (!strncmp(str, "tboot_noforce", 13)) {
+ pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
+ intel_iommu_tboot_noforce = 1;
++ } else if (!strncmp(str, "relax_rmrr", 10)) {
++ pr_info("Intel-IOMMU: assuming all RMRRs are relaxable. This can lead to instability or data loss\n");
++ intel_relaxable_rmrr = 1;
+ } else {
+ pr_notice("Unknown option - '%s'\n", str);
+ }
+@@ -2503,7 +2507,7 @@ static bool device_rmrr_is_relaxable(struct device *dev)
+ return false;
+
+ pdev = to_pci_dev(dev);
+- if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
++ if (intel_relaxable_rmrr || IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
+ return true;
+ else
+ return false;
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: kiler129 <grzegorz@noflash.pl>
-Date: Mon, 18 Sep 2023 15:19:26 +0200
-Subject: [PATCH] allow opt-in to allow pass-through on broken hardware..
-
-adapted from https://github.com/kiler129/relax-intel-rmrr , licensed under MIT or GPL 2.0+
----
- drivers/iommu/intel/iommu.c | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
-index 1c5ba4dbfe78..887667218e3b 100644
---- a/drivers/iommu/intel/iommu.c
-+++ b/drivers/iommu/intel/iommu.c
-@@ -297,6 +297,7 @@ static int dmar_map_gfx = 1;
- static int dmar_map_ipu = 1;
- static int intel_iommu_superpage = 1;
- static int iommu_identity_mapping;
-+static int intel_relaxable_rmrr = 0;
- static int iommu_skip_te_disable;
-
- #define IDENTMAP_GFX 2
-@@ -358,6 +359,9 @@ static int __init intel_iommu_setup(char *str)
- } else if (!strncmp(str, "tboot_noforce", 13)) {
- pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
- intel_iommu_tboot_noforce = 1;
-+ } else if (!strncmp(str, "relax_rmrr", 10)) {
-+ pr_info("Intel-IOMMU: assuming all RMRRs are relaxable. This can lead to instability or data loss\n");
-+ intel_relaxable_rmrr = 1;
- } else {
- pr_notice("Unknown option - '%s'\n", str);
- }
-@@ -2538,7 +2542,7 @@ static bool device_rmrr_is_relaxable(struct device *dev)
- return false;
-
- pdev = to_pci_dev(dev);
-- if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
-+ if (intel_relaxable_rmrr || IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
- return true;
- else
- return false;
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Wed, 13 Sep 2023 08:26:47 +0300
+Subject: [PATCH] net: thunderbolt: Fix TCPv6 GSO checksum calculation
+
+Alex reported that running ssh over IPv6 does not work with
+Thunderbolt/USB4 networking driver. The reason for that is that driver
+should call skb_is_gso() before calling skb_is_gso_v6(), and it should
+not return false after calculates the checksum successfully. This probably
+was a copy paste error from the original driver where it was done properly.
+
+Reported-by: Alex Balcanquall <alex@alexbal.com>
+Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable")
+Cc: stable@vger.kernel.org
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/net/thunderbolt/main.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/net/thunderbolt/main.c b/drivers/net/thunderbolt/main.c
+index 0c1e8970ee58..0a53ec293d04 100644
+--- a/drivers/net/thunderbolt/main.c
++++ b/drivers/net/thunderbolt/main.c
+@@ -1049,12 +1049,11 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb,
+ *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr, 0,
+ ip_hdr(skb)->protocol, 0);
+- } else if (skb_is_gso_v6(skb)) {
++ } else if (skb_is_gso(skb) && skb_is_gso_v6(skb)) {
+ tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data);
+ *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, 0,
+ IPPROTO_TCP, 0);
+- return false;
+ } else if (protocol == htons(ETH_P_IPV6)) {
+ tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset;
+ *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Mika Westerberg <mika.westerberg@linux.intel.com>
-Date: Wed, 13 Sep 2023 08:26:47 +0300
-Subject: [PATCH] net: thunderbolt: Fix TCPv6 GSO checksum calculation
-
-Alex reported that running ssh over IPv6 does not work with
-Thunderbolt/USB4 networking driver. The reason for that is that driver
-should call skb_is_gso() before calling skb_is_gso_v6(), and it should
-not return false after calculates the checksum successfully. This probably
-was a copy paste error from the original driver where it was done properly.
-
-Reported-by: Alex Balcanquall <alex@alexbal.com>
-Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable")
-Cc: stable@vger.kernel.org
-Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
-Reviewed-by: Eric Dumazet <edumazet@google.com>
-Reviewed-by: Jiri Pirko <jiri@nvidia.com>
-Reviewed-by: Jiri Pirko <jiri@nvidia.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- drivers/net/thunderbolt.c | 3 +--
- 1 file changed, 1 insertion(+), 2 deletions(-)
-
-diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c
-index 990484776f2d..0c554a7a5ce4 100644
---- a/drivers/net/thunderbolt.c
-+++ b/drivers/net/thunderbolt.c
-@@ -1005,12 +1005,11 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb,
- *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, 0,
- ip_hdr(skb)->protocol, 0);
-- } else if (skb_is_gso_v6(skb)) {
-+ } else if (skb_is_gso(skb) && skb_is_gso_v6(skb)) {
- tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data);
- *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, 0,
- IPPROTO_TCP, 0);
-- return false;
- } else if (protocol == htons(ETH_P_IPV6)) {
- tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset;
- *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Thu, 7 Sep 2023 16:02:30 +0300
+Subject: [PATCH] thunderbolt: Restart XDomain discovery handshake after
+ failure
+
+Alex reported that after rebooting the other host the peer-to-peer link
+does not come up anymore. The reason for this is that the host that was
+not rebooted tries to send the UUID request only 10 times according to
+the USB4 Inter-Domain spec and gives up if it does not get reply. Then
+when the other side is actually ready it cannot get the link established
+anymore. The USB4 Inter-Domain spec requires that the discovery protocol
+is restarted in that case so implement this now.
+
+Reported-by: Alex Balcanquall <alex@alexbal.com>
+Fixes: 8e1de7042596 ("thunderbolt: Add support for XDomain lane bonding")
+Cc: stable@vger.kernel.org
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/thunderbolt/xdomain.c | 58 +++++++++++++++++++++++++----------
+ 1 file changed, 41 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c
+index 5b5566862318..9803f0bbf20d 100644
+--- a/drivers/thunderbolt/xdomain.c
++++ b/drivers/thunderbolt/xdomain.c
+@@ -703,6 +703,27 @@ static void update_property_block(struct tb_xdomain *xd)
+ mutex_unlock(&xdomain_lock);
+ }
+
++static void start_handshake(struct tb_xdomain *xd)
++{
++ xd->state = XDOMAIN_STATE_INIT;
++ queue_delayed_work(xd->tb->wq, &xd->state_work,
++ msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
++}
++
++/* Can be called from state_work */
++static void __stop_handshake(struct tb_xdomain *xd)
++{
++ cancel_delayed_work_sync(&xd->properties_changed_work);
++ xd->properties_changed_retries = 0;
++ xd->state_retries = 0;
++}
++
++static void stop_handshake(struct tb_xdomain *xd)
++{
++ cancel_delayed_work_sync(&xd->state_work);
++ __stop_handshake(xd);
++}
++
+ static void tb_xdp_handle_request(struct work_struct *work)
+ {
+ struct xdomain_request_work *xw = container_of(work, typeof(*xw), work);
+@@ -765,6 +786,15 @@ static void tb_xdp_handle_request(struct work_struct *work)
+ case UUID_REQUEST:
+ tb_dbg(tb, "%llx: received XDomain UUID request\n", route);
+ ret = tb_xdp_uuid_response(ctl, route, sequence, uuid);
++ /*
++ * If we've stopped the discovery with an error such as
++ * timing out, we will restart the handshake now that we
++ * received UUID request from the remote host.
++ */
++ if (!ret && xd && xd->state == XDOMAIN_STATE_ERROR) {
++ dev_dbg(&xd->dev, "restarting handshake\n");
++ start_handshake(xd);
++ }
+ break;
+
+ case LINK_STATE_STATUS_REQUEST:
+@@ -1521,6 +1551,13 @@ static void tb_xdomain_queue_properties_changed(struct tb_xdomain *xd)
+ msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
+ }
+
++static void tb_xdomain_failed(struct tb_xdomain *xd)
++{
++ xd->state = XDOMAIN_STATE_ERROR;
++ queue_delayed_work(xd->tb->wq, &xd->state_work,
++ msecs_to_jiffies(XDOMAIN_DEFAULT_TIMEOUT));
++}
++
+ static void tb_xdomain_state_work(struct work_struct *work)
+ {
+ struct tb_xdomain *xd = container_of(work, typeof(*xd), state_work.work);
+@@ -1547,7 +1584,7 @@ static void tb_xdomain_state_work(struct work_struct *work)
+ if (ret) {
+ if (ret == -EAGAIN)
+ goto retry_state;
+- xd->state = XDOMAIN_STATE_ERROR;
++ tb_xdomain_failed(xd);
+ } else {
+ tb_xdomain_queue_properties_changed(xd);
+ if (xd->bonding_possible)
+@@ -1612,7 +1649,7 @@ static void tb_xdomain_state_work(struct work_struct *work)
+ if (ret) {
+ if (ret == -EAGAIN)
+ goto retry_state;
+- xd->state = XDOMAIN_STATE_ERROR;
++ tb_xdomain_failed(xd);
+ } else {
+ xd->state = XDOMAIN_STATE_ENUMERATED;
+ }
+@@ -1623,6 +1660,8 @@ static void tb_xdomain_state_work(struct work_struct *work)
+ break;
+
+ case XDOMAIN_STATE_ERROR:
++ dev_dbg(&xd->dev, "discovery failed, stopping handshake\n");
++ __stop_handshake(xd);
+ break;
+
+ default:
+@@ -1833,21 +1872,6 @@ static void tb_xdomain_release(struct device *dev)
+ kfree(xd);
+ }
+
+-static void start_handshake(struct tb_xdomain *xd)
+-{
+- xd->state = XDOMAIN_STATE_INIT;
+- queue_delayed_work(xd->tb->wq, &xd->state_work,
+- msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
+-}
+-
+-static void stop_handshake(struct tb_xdomain *xd)
+-{
+- cancel_delayed_work_sync(&xd->properties_changed_work);
+- cancel_delayed_work_sync(&xd->state_work);
+- xd->properties_changed_retries = 0;
+- xd->state_retries = 0;
+-}
+-
+ static int __maybe_unused tb_xdomain_suspend(struct device *dev)
+ {
+ stop_handshake(tb_to_xdomain(dev));
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Mika Westerberg <mika.westerberg@linux.intel.com>
-Date: Thu, 7 Sep 2023 16:02:30 +0300
-Subject: [PATCH] thunderbolt: Restart XDomain discovery handshake after
- failure
-
-Alex reported that after rebooting the other host the peer-to-peer link
-does not come up anymore. The reason for this is that the host that was
-not rebooted tries to send the UUID request only 10 times according to
-the USB4 Inter-Domain spec and gives up if it does not get reply. Then
-when the other side is actually ready it cannot get the link established
-anymore. The USB4 Inter-Domain spec requires that the discovery protocol
-is restarted in that case so implement this now.
-
-Reported-by: Alex Balcanquall <alex@alexbal.com>
-Fixes: 8e1de7042596 ("thunderbolt: Add support for XDomain lane bonding")
-Cc: stable@vger.kernel.org
-Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- drivers/thunderbolt/xdomain.c | 58 +++++++++++++++++++++++++----------
- 1 file changed, 41 insertions(+), 17 deletions(-)
-
-diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c
-index 3c51e47dd86b..0b17a4d4e9b9 100644
---- a/drivers/thunderbolt/xdomain.c
-+++ b/drivers/thunderbolt/xdomain.c
-@@ -704,6 +704,27 @@ static void update_property_block(struct tb_xdomain *xd)
- mutex_unlock(&xdomain_lock);
- }
-
-+static void start_handshake(struct tb_xdomain *xd)
-+{
-+ xd->state = XDOMAIN_STATE_INIT;
-+ queue_delayed_work(xd->tb->wq, &xd->state_work,
-+ msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
-+}
-+
-+/* Can be called from state_work */
-+static void __stop_handshake(struct tb_xdomain *xd)
-+{
-+ cancel_delayed_work_sync(&xd->properties_changed_work);
-+ xd->properties_changed_retries = 0;
-+ xd->state_retries = 0;
-+}
-+
-+static void stop_handshake(struct tb_xdomain *xd)
-+{
-+ cancel_delayed_work_sync(&xd->state_work);
-+ __stop_handshake(xd);
-+}
-+
- static void tb_xdp_handle_request(struct work_struct *work)
- {
- struct xdomain_request_work *xw = container_of(work, typeof(*xw), work);
-@@ -766,6 +787,15 @@ static void tb_xdp_handle_request(struct work_struct *work)
- case UUID_REQUEST:
- tb_dbg(tb, "%llx: received XDomain UUID request\n", route);
- ret = tb_xdp_uuid_response(ctl, route, sequence, uuid);
-+ /*
-+ * If we've stopped the discovery with an error such as
-+ * timing out, we will restart the handshake now that we
-+ * received UUID request from the remote host.
-+ */
-+ if (!ret && xd && xd->state == XDOMAIN_STATE_ERROR) {
-+ dev_dbg(&xd->dev, "restarting handshake\n");
-+ start_handshake(xd);
-+ }
- break;
-
- case LINK_STATE_STATUS_REQUEST:
-@@ -1522,6 +1552,13 @@ static void tb_xdomain_queue_properties_changed(struct tb_xdomain *xd)
- msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
- }
-
-+static void tb_xdomain_failed(struct tb_xdomain *xd)
-+{
-+ xd->state = XDOMAIN_STATE_ERROR;
-+ queue_delayed_work(xd->tb->wq, &xd->state_work,
-+ msecs_to_jiffies(XDOMAIN_DEFAULT_TIMEOUT));
-+}
-+
- static void tb_xdomain_state_work(struct work_struct *work)
- {
- struct tb_xdomain *xd = container_of(work, typeof(*xd), state_work.work);
-@@ -1548,7 +1585,7 @@ static void tb_xdomain_state_work(struct work_struct *work)
- if (ret) {
- if (ret == -EAGAIN)
- goto retry_state;
-- xd->state = XDOMAIN_STATE_ERROR;
-+ tb_xdomain_failed(xd);
- } else {
- tb_xdomain_queue_properties_changed(xd);
- if (xd->bonding_possible)
-@@ -1613,7 +1650,7 @@ static void tb_xdomain_state_work(struct work_struct *work)
- if (ret) {
- if (ret == -EAGAIN)
- goto retry_state;
-- xd->state = XDOMAIN_STATE_ERROR;
-+ tb_xdomain_failed(xd);
- } else {
- xd->state = XDOMAIN_STATE_ENUMERATED;
- }
-@@ -1624,6 +1661,8 @@ static void tb_xdomain_state_work(struct work_struct *work)
- break;
-
- case XDOMAIN_STATE_ERROR:
-+ dev_dbg(&xd->dev, "discovery failed, stopping handshake\n");
-+ __stop_handshake(xd);
- break;
-
- default:
-@@ -1793,21 +1832,6 @@ static void tb_xdomain_release(struct device *dev)
- kfree(xd);
- }
-
--static void start_handshake(struct tb_xdomain *xd)
--{
-- xd->state = XDOMAIN_STATE_INIT;
-- queue_delayed_work(xd->tb->wq, &xd->state_work,
-- msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
--}
--
--static void stop_handshake(struct tb_xdomain *xd)
--{
-- cancel_delayed_work_sync(&xd->properties_changed_work);
-- cancel_delayed_work_sync(&xd->state_work);
-- xd->properties_changed_retries = 0;
-- xd->state_retries = 0;
--}
--
- static int __maybe_unused tb_xdomain_suspend(struct device *dev)
- {
- stop_handshake(tb_to_xdomain(dev));
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Sat, 7 Oct 2023 12:57:02 +0200
+Subject: [PATCH] x86/cpu: Fix AMD erratum #1485 on Zen4-based CPUs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Fix erratum #1485 on Zen4 parts where running with STIBP disabled can
+cause an #UD exception. The performance impact of the fix is negligible.
+
+Reported-by: René Rebe <rene@exactcode.de>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Tested-by: René Rebe <rene@exactcode.de>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/D99589F4-BC5D-430B-87B2-72C20370CF57@exactcode.com
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/include/asm/msr-index.h | 9 +++++++--
+ arch/x86/kernel/cpu/amd.c | 8 ++++++++
+ 2 files changed, 15 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index 1d111350197f..b37abb55e948 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -637,12 +637,17 @@
+ /* AMD Last Branch Record MSRs */
+ #define MSR_AMD64_LBR_SELECT 0xc000010e
+
+-/* Fam 17h MSRs */
+-#define MSR_F17H_IRPERF 0xc00000e9
++/* Zen4 */
++#define MSR_ZEN4_BP_CFG 0xc001102e
++#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
+
++/* Zen 2 */
+ #define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
+ #define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
+
++/* Fam 17h MSRs */
++#define MSR_F17H_IRPERF 0xc00000e9
++
+ /* Fam 16h MSRs */
+ #define MSR_F16H_L2I_PERF_CTL 0xc0010230
+ #define MSR_F16H_L2I_PERF_CTR 0xc0010231
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index 7eca6a8abbb1..981bc23665a3 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -80,6 +80,10 @@ static const int amd_div0[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
+ AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
+
++static const int amd_erratum_1485[] =
++ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf),
++ AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf));
++
+ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
+ {
+ int osvw_id = *erratum++;
+@@ -1140,6 +1144,10 @@ static void init_amd(struct cpuinfo_x86 *c)
+ pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
+ setup_force_cpu_bug(X86_BUG_DIV0);
+ }
++
++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
++ cpu_has_amd_erratum(c, amd_erratum_1485))
++ msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
+ }
+
+ #ifdef CONFIG_X86_32
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 18 Oct 2023 12:41:03 -0700
+Subject: [PATCH] Revert "nSVM: Check for reserved encodings of TLB_CONTROL in
+ nested VMCB"
+
+Revert KVM's made-up consistency check on SVM's TLB control. The APM says
+that unsupported encodings are reserved, but the APM doesn't state that
+VMRUN checks for a supported encoding. Unless something is called out
+in "Canonicalization and Consistency Checks" or listed as MBZ (Must Be
+Zero), AMD behavior is typically to let software shoot itself in the foot.
+
+This reverts commit 174a921b6975ef959dd82ee9e8844067a62e3ec1.
+
+Fixes: 174a921b6975 ("nSVM: Check for reserved encodings of TLB_CONTROL in nested VMCB")
+Reported-by: Stefan Sterz <s.sterz@proxmox.com>
+Closes: https://lkml.kernel.org/r/b9915c9c-4cf6-051a-2d91-44cc6380f455%40proxmox.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/kvm/svm/nested.c | 15 ---------------
+ 1 file changed, 15 deletions(-)
+
+diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
+index 96936ddf1b3c..92db000409a9 100644
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -247,18 +247,6 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
+ kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
+ }
+
+-static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl)
+-{
+- /* Nested FLUSHBYASID is not supported yet. */
+- switch(tlb_ctl) {
+- case TLB_CONTROL_DO_NOTHING:
+- case TLB_CONTROL_FLUSH_ALL_ASID:
+- return true;
+- default:
+- return false;
+- }
+-}
+-
+ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
+ struct vmcb_ctrl_area_cached *control)
+ {
+@@ -278,9 +266,6 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
+ IOPM_SIZE)))
+ return false;
+
+- if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
+- return false;
+-
+ if (CC((control->int_ctl & V_NMI_ENABLE_MASK) &&
+ !vmcb12_is_intercept(control, INTERCEPT_NMI))) {
+ return false;
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: "Borislav Petkov (AMD)" <bp@alien8.de>
-Date: Sat, 7 Oct 2023 12:57:02 +0200
-Subject: [PATCH] x86/cpu: Fix AMD erratum #1485 on Zen4-based CPUs
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Fix erratum #1485 on Zen4 parts where running with STIBP disabled can
-cause an #UD exception. The performance impact of the fix is negligible.
-
-Reported-by: René Rebe <rene@exactcode.de>
-Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
-Tested-by: René Rebe <rene@exactcode.de>
-Cc: <stable@kernel.org>
-Link: https://lore.kernel.org/r/D99589F4-BC5D-430B-87B2-72C20370CF57@exactcode.com
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/include/asm/msr-index.h | 9 +++++++--
- arch/x86/kernel/cpu/amd.c | 8 ++++++++
- 2 files changed, 15 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
-index ebbf80d8b8bd..a79b10e57757 100644
---- a/arch/x86/include/asm/msr-index.h
-+++ b/arch/x86/include/asm/msr-index.h
-@@ -630,12 +630,17 @@
- /* AMD Last Branch Record MSRs */
- #define MSR_AMD64_LBR_SELECT 0xc000010e
-
--/* Fam 17h MSRs */
--#define MSR_F17H_IRPERF 0xc00000e9
-+/* Zen4 */
-+#define MSR_ZEN4_BP_CFG 0xc001102e
-+#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
-
-+/* Zen 2 */
- #define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
- #define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
-
-+/* Fam 17h MSRs */
-+#define MSR_F17H_IRPERF 0xc00000e9
-+
- /* Fam 16h MSRs */
- #define MSR_F16H_L2I_PERF_CTL 0xc0010230
- #define MSR_F16H_L2I_PERF_CTR 0xc0010231
-diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
-index 6daf6a8fa0c7..044e3869620c 100644
---- a/arch/x86/kernel/cpu/amd.c
-+++ b/arch/x86/kernel/cpu/amd.c
-@@ -79,6 +79,10 @@ static const int amd_div0[] =
- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
- AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
-
-+static const int amd_erratum_1485[] =
-+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf),
-+ AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf));
-+
- static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
- {
- int osvw_id = *erratum++;
-@@ -1124,6 +1128,10 @@ static void init_amd(struct cpuinfo_x86 *c)
- pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
- setup_force_cpu_bug(X86_BUG_DIV0);
- }
-+
-+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
-+ cpu_has_amd_erratum(c, amd_erratum_1485))
-+ msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
- }
-
- #ifdef CONFIG_X86_32
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 18 Oct 2023 12:41:04 -0700
+Subject: [PATCH] KVM: nSVM: Advertise support for flush-by-ASID
+
+Advertise support for FLUSHBYASID when nested SVM is enabled, as KVM can
+always emulate flushing TLB entries for a vmcb12 ASID, e.g. by running L2
+with a new, fresh ASID in vmcb02. Some modern hypervisors, e.g. VMWare
+Workstation 17, require FLUSHBYASID support and will refuse to run if it's
+not present.
+
+Punt on proper support, as "Honor L1's request to flush an ASID on nested
+VMRUN" is one of the TODO items in the (incomplete) list of issues that
+need to be addressed in order for KVM to NOT do a full TLB flush on every
+nested SVM transition (see nested_svm_transition_tlb_flush()).
+
+Reported-by: Stefan Sterz <s.sterz@proxmox.com>
+Closes: https://lkml.kernel.org/r/b9915c9c-4cf6-051a-2d91-44cc6380f455%40proxmox.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Stefan Sterz <s.sterz@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/kvm/svm/svm.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 2ec76ab525ea..ef3215286428 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4969,6 +4969,7 @@ static __init void svm_set_cpu_caps(void)
+ if (nested) {
+ kvm_cpu_cap_set(X86_FEATURE_SVM);
+ kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);
++ kvm_cpu_cap_set(X86_FEATURE_FLUSHBYASID);
+
+ if (nrips)
+ kvm_cpu_cap_set(X86_FEATURE_NRIPS);
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Stefan Sterz <s.sterz@proxmox.com>
-Date: Wed, 18 Oct 2023 10:45:45 +0200
-Subject: [PATCH] Revert "nSVM: Check for reserved encodings of TLB_CONTROL in
- nested VMCB"
-
-This reverts commit 174a921b6975ef959dd82ee9e8844067a62e3ec1.
-
-Signed-off-by: Stefan Sterz <s.sterz@proxmox.com>
----
- arch/x86/kvm/svm/nested.c | 15 ---------------
- 1 file changed, 15 deletions(-)
-
-diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
-index add65dd59756..61a6c0235519 100644
---- a/arch/x86/kvm/svm/nested.c
-+++ b/arch/x86/kvm/svm/nested.c
-@@ -242,18 +242,6 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
- kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
- }
-
--static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl)
--{
-- /* Nested FLUSHBYASID is not supported yet. */
-- switch(tlb_ctl) {
-- case TLB_CONTROL_DO_NOTHING:
-- case TLB_CONTROL_FLUSH_ALL_ASID:
-- return true;
-- default:
-- return false;
-- }
--}
--
- static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
- struct vmcb_ctrl_area_cached *control)
- {
-@@ -273,9 +261,6 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
- IOPM_SIZE)))
- return false;
-
-- if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
-- return false;
--
- return true;
- }
-
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Sean Christopherson <seanjc@google.com>
-Date: Wed, 18 Oct 2023 12:41:04 -0700
-Subject: [PATCH] KVM: nSVM: Advertise support for flush-by-ASID
-
-Advertise support for FLUSHBYASID when nested SVM is enabled, as KVM can
-always emulate flushing TLB entries for a vmcb12 ASID, e.g. by running L2
-with a new, fresh ASID in vmcb02. Some modern hypervisors, e.g. VMWare
-Workstation 17, require FLUSHBYASID support and will refuse to run if it's
-not present.
-
-Punt on proper support, as "Honor L1's request to flush an ASID on nested
-VMRUN" is one of the TODO items in the (incomplete) list of issues that
-need to be addressed in order for KVM to NOT do a full TLB flush on every
-nested SVM transition (see nested_svm_transition_tlb_flush()).
-
-Reported-by: Stefan Sterz <s.sterz@proxmox.com>
-Closes: https://lkml.kernel.org/r/b9915c9c-4cf6-051a-2d91-44cc6380f455%40proxmox.com
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Signed-off-by: Stefan Sterz <s.sterz@proxmox.com>
----
- arch/x86/kvm/svm/svm.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
-index fb9cde86930d..db8028864094 100644
---- a/arch/x86/kvm/svm/svm.c
-+++ b/arch/x86/kvm/svm/svm.c
-@@ -4921,6 +4921,7 @@ static __init void svm_set_cpu_caps(void)
- if (nested) {
- kvm_cpu_cap_set(X86_FEATURE_SVM);
- kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);
-+ kvm_cpu_cap_set(X86_FEATURE_FLUSHBYASID);
-
- if (nrips)
- kvm_cpu_cap_set(X86_FEATURE_NRIPS);
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 27 Sep 2023 17:19:52 -0700
+Subject: [PATCH] x86/fpu: Allow caller to constrain xfeatures when copying to
+ uabi buffer
+
+Plumb an xfeatures mask into __copy_xstate_to_uabi_buf() so that KVM can
+constrain which xfeatures are saved into the userspace buffer without
+having to modify the user_xfeatures field in KVM's guest_fpu state.
+
+KVM's ABI for KVM_GET_XSAVE{2} is that features that are not exposed to
+guest must not show up in the effective xstate_bv field of the buffer.
+Saving only the guest-supported xfeatures allows userspace to load the
+saved state on a different host with a fewer xfeatures, so long as the
+target host supports the xfeatures that are exposed to the guest.
+
+KVM currently sets user_xfeatures directly to restrict KVM_GET_XSAVE{2} to
+the set of guest-supported xfeatures, but doing so broke KVM's historical
+ABI for KVM_SET_XSAVE, which allows userspace to load any xfeatures that
+are supported by the *host*.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230928001956.924301-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 18164f66e6c59fda15c198b371fa008431efdb22)
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/include/asm/fpu/api.h | 3 ++-
+ arch/x86/kernel/fpu/core.c | 5 +++--
+ arch/x86/kernel/fpu/xstate.c | 7 +++++--
+ arch/x86/kernel/fpu/xstate.h | 3 ++-
+ arch/x86/kvm/x86.c | 21 +++++++++------------
+ 5 files changed, 21 insertions(+), 18 deletions(-)
+
+diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
+index b475d9a582b8..e829fa4c6788 100644
+--- a/arch/x86/include/asm/fpu/api.h
++++ b/arch/x86/include/asm/fpu/api.h
+@@ -148,7 +148,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) {
+ static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
+ #endif
+
+-extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
++extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
++ unsigned int size, u64 xfeatures, u32 pkru);
+ extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
+
+ static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
+diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
+index 98e507cc7d34..b582325b9c37 100644
+--- a/arch/x86/kernel/fpu/core.c
++++ b/arch/x86/kernel/fpu/core.c
+@@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
+ EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
+
+ void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
+- unsigned int size, u32 pkru)
++ unsigned int size, u64 xfeatures, u32 pkru)
+ {
+ struct fpstate *kstate = gfpu->fpstate;
+ union fpregs_state *ustate = buf;
+ struct membuf mb = { .p = buf, .left = size };
+
+ if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
+- __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
++ __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,
++ XSTATE_COPY_XSAVE);
+ } else {
+ memcpy(&ustate->fxsave, &kstate->regs.fxsave,
+ sizeof(ustate->fxsave));
+diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
+index 1afbc4866b10..463ec0cd0dab 100644
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -1053,6 +1053,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
+ * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
+ * @to: membuf descriptor
+ * @fpstate: The fpstate buffer from which to copy
++ * @xfeatures: The mask of xfeatures to save (XSAVE mode only)
+ * @pkru_val: The PKRU value to store in the PKRU component
+ * @copy_mode: The requested copy mode
+ *
+@@ -1063,7 +1064,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
+ * It supports partial copy but @to.pos always starts from zero.
+ */
+ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
+- u32 pkru_val, enum xstate_copy_mode copy_mode)
++ u64 xfeatures, u32 pkru_val,
++ enum xstate_copy_mode copy_mode)
+ {
+ const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
+ struct xregs_state *xinit = &init_fpstate.regs.xsave;
+@@ -1087,7 +1089,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
+ break;
+
+ case XSTATE_COPY_XSAVE:
+- header.xfeatures &= fpstate->user_xfeatures;
++ header.xfeatures &= fpstate->user_xfeatures & xfeatures;
+ break;
+ }
+
+@@ -1189,6 +1191,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
+ enum xstate_copy_mode copy_mode)
+ {
+ __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
++ tsk->thread.fpu.fpstate->user_xfeatures,
+ tsk->thread.pkru, copy_mode);
+ }
+
+diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
+index a4ecb04d8d64..3518fb26d06b 100644
+--- a/arch/x86/kernel/fpu/xstate.h
++++ b/arch/x86/kernel/fpu/xstate.h
+@@ -43,7 +43,8 @@ enum xstate_copy_mode {
+
+ struct membuf;
+ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
+- u32 pkru_val, enum xstate_copy_mode copy_mode);
++ u64 xfeatures, u32 pkru_val,
++ enum xstate_copy_mode copy_mode);
+ extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
+ enum xstate_copy_mode mode);
+ extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 6690a3722007..394d3a8b4682 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5385,26 +5385,23 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
+ return 0;
+ }
+
+-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+- struct kvm_xsave *guest_xsave)
++
++static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
++ u8 *state, unsigned int size)
+ {
+ if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+ return;
+
+- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
+- guest_xsave->region,
+- sizeof(guest_xsave->region),
++ fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
++ vcpu->arch.guest_fpu.fpstate->user_xfeatures,
+ vcpu->arch.pkru);
+ }
+
+-static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
+- u8 *state, unsigned int size)
++static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
++ struct kvm_xsave *guest_xsave)
+ {
+- if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+- return;
+-
+- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
+- state, size, vcpu->arch.pkru);
++ return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
++ sizeof(guest_xsave->region));
+ }
+
+ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 27 Sep 2023 17:19:53 -0700
+Subject: [PATCH] KVM: x86: Constrain guest-supported xfeatures only at
+ KVM_GET_XSAVE{2}
+
+Mask off xfeatures that aren't exposed to the guest only when saving guest
+state via KVM_GET_XSAVE{2} instead of modifying user_xfeatures directly.
+Preserving the maximal set of xfeatures in user_xfeatures restores KVM's
+ABI for KVM_SET_XSAVE, which prior to commit ad856280ddea ("x86/kvm/fpu:
+Limit guest user_xfeatures to supported bits of XCR0") allowed userspace
+to load xfeatures that are supported by the host, irrespective of what
+xfeatures are exposed to the guest.
+
+There is no known use case where userspace *intentionally* loads xfeatures
+that aren't exposed to the guest, but the bug fixed by commit ad856280ddea
+was specifically that KVM_GET_SAVE{2} would save xfeatures that weren't
+exposed to the guest, e.g. would lead to userspace unintentionally loading
+guest-unsupported xfeatures when live migrating a VM.
+
+Restricting KVM_SET_XSAVE to guest-supported xfeatures is especially
+problematic for QEMU-based setups, as QEMU has a bug where instead of
+terminating the VM if KVM_SET_XSAVE fails, QEMU instead simply stops
+loading guest state, i.e. resumes the guest after live migration with
+incomplete guest state, and ultimately results in guest data corruption.
+
+Note, letting userspace restore all host-supported xfeatures does not fix
+setups where a VM is migrated from a host *without* commit ad856280ddea,
+to a target with a subset of host-supported xfeatures. However there is
+no way to safely address that scenario, e.g. KVM could silently drop the
+unsupported features, but that would be a clear violation of KVM's ABI and
+so would require userspace to opt-in, at which point userspace could
+simply be updated to sanitize the to-be-loaded XSAVE state.
+
+Reported-by: Tyler Stachecki <stachecki.tyler@gmail.com>
+Closes: https://lore.kernel.org/all/20230914010003.358162-1-tstachecki@bloomberg.net
+Fixes: ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0")
+Cc: stable@vger.kernel.org
+Cc: Leonardo Bras <leobras@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Message-Id: <20230928001956.924301-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 8647c52e9504c99752a39f1d44f6268f82c40a5c)
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/kernel/fpu/xstate.c | 5 +----
+ arch/x86/kvm/cpuid.c | 8 --------
+ arch/x86/kvm/x86.c | 18 ++++++++++++++++--
+ 3 files changed, 17 insertions(+), 14 deletions(-)
+
+diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
+index 463ec0cd0dab..ebe698f8af73 100644
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -1543,10 +1543,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
+ fpregs_restore_userregs();
+
+ newfps->xfeatures = curfps->xfeatures | xfeatures;
+-
+- if (!guest_fpu)
+- newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+-
++ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+ newfps->xfd = curfps->xfd & ~xfeatures;
+
+ /* Do the final updates within the locked region */
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index 2c20da9aa2ac..e2b67975869c 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -332,14 +332,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+ vcpu->arch.guest_supported_xcr0 =
+ cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+
+- /*
+- * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
+- * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
+- * supported by the host.
+- */
+- vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 |
+- XFEATURE_MASK_FPSSE;
+-
+ kvm_update_pv_runtime(vcpu);
+
+ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 394d3a8b4682..e0cea0f8380a 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5389,12 +5389,26 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
+ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
+ u8 *state, unsigned int size)
+ {
++ /*
++ * Only copy state for features that are enabled for the guest. The
++ * state itself isn't problematic, but setting bits in the header for
++ * features that are supported in *this* host but not exposed to the
++ * guest can result in KVM_SET_XSAVE failing when live migrating to a
++ * compatible host without the features that are NOT exposed to the
++ * guest.
++ *
++ * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
++ * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
++ * supported by the host.
++ */
++ u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 |
++ XFEATURE_MASK_FPSSE;
++
+ if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+ return;
+
+ fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
+- vcpu->arch.guest_fpu.fpstate->user_xfeatures,
+- vcpu->arch.pkru);
++ supported_xcr0, vcpu->arch.pkru);
+ }
+
+ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Sean Christopherson <seanjc@google.com>
-Date: Wed, 27 Sep 2023 17:19:52 -0700
-Subject: [PATCH] x86/fpu: Allow caller to constrain xfeatures when copying to
- uabi buffer
-
-Plumb an xfeatures mask into __copy_xstate_to_uabi_buf() so that KVM can
-constrain which xfeatures are saved into the userspace buffer without
-having to modify the user_xfeatures field in KVM's guest_fpu state.
-
-KVM's ABI for KVM_GET_XSAVE{2} is that features that are not exposed to
-guest must not show up in the effective xstate_bv field of the buffer.
-Saving only the guest-supported xfeatures allows userspace to load the
-saved state on a different host with a fewer xfeatures, so long as the
-target host supports the xfeatures that are exposed to the guest.
-
-KVM currently sets user_xfeatures directly to restrict KVM_GET_XSAVE{2} to
-the set of guest-supported xfeatures, but doing so broke KVM's historical
-ABI for KVM_SET_XSAVE, which allows userspace to load any xfeatures that
-are supported by the *host*.
-
-Cc: stable@vger.kernel.org
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Message-Id: <20230928001956.924301-2-seanjc@google.com>
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-(cherry picked from commit 18164f66e6c59fda15c198b371fa008431efdb22)
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/include/asm/fpu/api.h | 3 ++-
- arch/x86/kernel/fpu/core.c | 5 +++--
- arch/x86/kernel/fpu/xstate.c | 7 +++++--
- arch/x86/kernel/fpu/xstate.h | 3 ++-
- arch/x86/kvm/x86.c | 21 +++++++++------------
- 5 files changed, 21 insertions(+), 18 deletions(-)
-
-diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
-index b475d9a582b8..e829fa4c6788 100644
---- a/arch/x86/include/asm/fpu/api.h
-+++ b/arch/x86/include/asm/fpu/api.h
-@@ -148,7 +148,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) {
- static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
- #endif
-
--extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
-+extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
-+ unsigned int size, u64 xfeatures, u32 pkru);
- extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
-
- static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
-diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
-index caf33486dc5e..cddd5018e6a4 100644
---- a/arch/x86/kernel/fpu/core.c
-+++ b/arch/x86/kernel/fpu/core.c
-@@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
- EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
-
- void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
-- unsigned int size, u32 pkru)
-+ unsigned int size, u64 xfeatures, u32 pkru)
- {
- struct fpstate *kstate = gfpu->fpstate;
- union fpregs_state *ustate = buf;
- struct membuf mb = { .p = buf, .left = size };
-
- if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
-- __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
-+ __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,
-+ XSTATE_COPY_XSAVE);
- } else {
- memcpy(&ustate->fxsave, &kstate->regs.fxsave,
- sizeof(ustate->fxsave));
-diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
-index 1afbc4866b10..463ec0cd0dab 100644
---- a/arch/x86/kernel/fpu/xstate.c
-+++ b/arch/x86/kernel/fpu/xstate.c
-@@ -1053,6 +1053,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
- * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
- * @to: membuf descriptor
- * @fpstate: The fpstate buffer from which to copy
-+ * @xfeatures: The mask of xfeatures to save (XSAVE mode only)
- * @pkru_val: The PKRU value to store in the PKRU component
- * @copy_mode: The requested copy mode
- *
-@@ -1063,7 +1064,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
- * It supports partial copy but @to.pos always starts from zero.
- */
- void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
-- u32 pkru_val, enum xstate_copy_mode copy_mode)
-+ u64 xfeatures, u32 pkru_val,
-+ enum xstate_copy_mode copy_mode)
- {
- const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
- struct xregs_state *xinit = &init_fpstate.regs.xsave;
-@@ -1087,7 +1089,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
- break;
-
- case XSTATE_COPY_XSAVE:
-- header.xfeatures &= fpstate->user_xfeatures;
-+ header.xfeatures &= fpstate->user_xfeatures & xfeatures;
- break;
- }
-
-@@ -1189,6 +1191,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
- enum xstate_copy_mode copy_mode)
- {
- __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
-+ tsk->thread.fpu.fpstate->user_xfeatures,
- tsk->thread.pkru, copy_mode);
- }
-
-diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
-index a4ecb04d8d64..3518fb26d06b 100644
---- a/arch/x86/kernel/fpu/xstate.h
-+++ b/arch/x86/kernel/fpu/xstate.h
-@@ -43,7 +43,8 @@ enum xstate_copy_mode {
-
- struct membuf;
- extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
-- u32 pkru_val, enum xstate_copy_mode copy_mode);
-+ u64 xfeatures, u32 pkru_val,
-+ enum xstate_copy_mode copy_mode);
- extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
- enum xstate_copy_mode mode);
- extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index ff92ff41d5ce..a43a950d04cb 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -5314,26 +5314,23 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
- return 0;
- }
-
--static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
-- struct kvm_xsave *guest_xsave)
-+
-+static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
-+ u8 *state, unsigned int size)
- {
- if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
- return;
-
-- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
-- guest_xsave->region,
-- sizeof(guest_xsave->region),
-+ fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
-+ vcpu->arch.guest_fpu.fpstate->user_xfeatures,
- vcpu->arch.pkru);
- }
-
--static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
-- u8 *state, unsigned int size)
-+static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
-+ struct kvm_xsave *guest_xsave)
- {
-- if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
-- return;
--
-- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
-- state, size, vcpu->arch.pkru);
-+ return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
-+ sizeof(guest_xsave->region));
- }
-
- static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Sean Christopherson <seanjc@google.com>
-Date: Wed, 27 Sep 2023 17:19:53 -0700
-Subject: [PATCH] KVM: x86: Constrain guest-supported xfeatures only at
- KVM_GET_XSAVE{2}
-
-Mask off xfeatures that aren't exposed to the guest only when saving guest
-state via KVM_GET_XSAVE{2} instead of modifying user_xfeatures directly.
-Preserving the maximal set of xfeatures in user_xfeatures restores KVM's
-ABI for KVM_SET_XSAVE, which prior to commit ad856280ddea ("x86/kvm/fpu:
-Limit guest user_xfeatures to supported bits of XCR0") allowed userspace
-to load xfeatures that are supported by the host, irrespective of what
-xfeatures are exposed to the guest.
-
-There is no known use case where userspace *intentionally* loads xfeatures
-that aren't exposed to the guest, but the bug fixed by commit ad856280ddea
-was specifically that KVM_GET_SAVE{2} would save xfeatures that weren't
-exposed to the guest, e.g. would lead to userspace unintentionally loading
-guest-unsupported xfeatures when live migrating a VM.
-
-Restricting KVM_SET_XSAVE to guest-supported xfeatures is especially
-problematic for QEMU-based setups, as QEMU has a bug where instead of
-terminating the VM if KVM_SET_XSAVE fails, QEMU instead simply stops
-loading guest state, i.e. resumes the guest after live migration with
-incomplete guest state, and ultimately results in guest data corruption.
-
-Note, letting userspace restore all host-supported xfeatures does not fix
-setups where a VM is migrated from a host *without* commit ad856280ddea,
-to a target with a subset of host-supported xfeatures. However there is
-no way to safely address that scenario, e.g. KVM could silently drop the
-unsupported features, but that would be a clear violation of KVM's ABI and
-so would require userspace to opt-in, at which point userspace could
-simply be updated to sanitize the to-be-loaded XSAVE state.
-
-Reported-by: Tyler Stachecki <stachecki.tyler@gmail.com>
-Closes: https://lore.kernel.org/all/20230914010003.358162-1-tstachecki@bloomberg.net
-Fixes: ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0")
-Cc: stable@vger.kernel.org
-Cc: Leonardo Bras <leobras@redhat.com>
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
-Message-Id: <20230928001956.924301-3-seanjc@google.com>
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-(cherry picked from commit 8647c52e9504c99752a39f1d44f6268f82c40a5c)
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/kernel/fpu/xstate.c | 5 +----
- arch/x86/kvm/cpuid.c | 8 --------
- arch/x86/kvm/x86.c | 18 ++++++++++++++++--
- 3 files changed, 17 insertions(+), 14 deletions(-)
-
-diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
-index 463ec0cd0dab..ebe698f8af73 100644
---- a/arch/x86/kernel/fpu/xstate.c
-+++ b/arch/x86/kernel/fpu/xstate.c
-@@ -1543,10 +1543,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
- fpregs_restore_userregs();
-
- newfps->xfeatures = curfps->xfeatures | xfeatures;
--
-- if (!guest_fpu)
-- newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
--
-+ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
- newfps->xfd = curfps->xfd & ~xfeatures;
-
- /* Do the final updates within the locked region */
-diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
-index 61aefeb3fdbc..e5393ee652ba 100644
---- a/arch/x86/kvm/cpuid.c
-+++ b/arch/x86/kvm/cpuid.c
-@@ -350,14 +350,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
- vcpu->arch.guest_supported_xcr0 =
- cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
-
-- /*
-- * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
-- * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
-- * supported by the host.
-- */
-- vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 |
-- XFEATURE_MASK_FPSSE;
--
- kvm_update_pv_runtime(vcpu);
-
- vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index a43a950d04cb..a4a44adf7c72 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -5318,12 +5318,26 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
- static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
- u8 *state, unsigned int size)
- {
-+ /*
-+ * Only copy state for features that are enabled for the guest. The
-+ * state itself isn't problematic, but setting bits in the header for
-+ * features that are supported in *this* host but not exposed to the
-+ * guest can result in KVM_SET_XSAVE failing when live migrating to a
-+ * compatible host without the features that are NOT exposed to the
-+ * guest.
-+ *
-+ * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
-+ * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
-+ * supported by the host.
-+ */
-+ u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 |
-+ XFEATURE_MASK_FPSSE;
-+
- if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
- return;
-
- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
-- vcpu->arch.guest_fpu.fpstate->user_xfeatures,
-- vcpu->arch.pkru);
-+ supported_xcr0, vcpu->arch.pkru);
- }
-
- static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
-Subproject commit 0b6a250fd5bb058a9965d904b3e6f83b87a0c3b7
+Subproject commit 0f112085de77ffd667df863f7240164da1b8d26f