]> git.proxmox.com Git - pve-kernel.git/commitdiff
update submodule to Ubuntu-6.5.0-9.9
authorThomas Lamprecht <t.lamprecht@proxmox.com>
Fri, 13 Oct 2023 13:27:50 +0000 (15:27 +0200)
committerThomas Lamprecht <t.lamprecht@proxmox.com>
Sat, 28 Oct 2023 12:28:11 +0000 (14:28 +0200)
from ubuntu mantic sources

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
23 files changed:
patches/kernel/0004-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch
patches/kernel/0005-kvm-disable-default-dynamic-halt-polling-growth.patch
patches/kernel/0006-net-core-downgrade-unregister_netdevice-refcount-lea.patch
patches/kernel/0007-Revert-fortify-Do-not-cast-to-unsigned-char.patch
patches/kernel/0008-kvm-xsave-set-mask-out-PKRU-bit-in-xfeatures-if-vCPU.patch
patches/kernel/0009-KVM-x86-mmu-Fix-an-sign-extension-bug-with-mmu_seq-t.patch [deleted file]
patches/kernel/0009-allow-opt-in-to-allow-pass-through-on-broken-hardwar.patch [new file with mode: 0644]
patches/kernel/0010-allow-opt-in-to-allow-pass-through-on-broken-hardwar.patch [deleted file]
patches/kernel/0010-net-thunderbolt-Fix-TCPv6-GSO-checksum-calculation.patch [new file with mode: 0644]
patches/kernel/0011-net-thunderbolt-Fix-TCPv6-GSO-checksum-calculation.patch [deleted file]
patches/kernel/0011-thunderbolt-Restart-XDomain-discovery-handshake-afte.patch [new file with mode: 0644]
patches/kernel/0012-thunderbolt-Restart-XDomain-discovery-handshake-afte.patch [deleted file]
patches/kernel/0012-x86-cpu-Fix-AMD-erratum-1485-on-Zen4-based-CPUs.patch [new file with mode: 0644]
patches/kernel/0013-Revert-nSVM-Check-for-reserved-encodings-of-TLB_CONT.patch [new file with mode: 0644]
patches/kernel/0013-x86-cpu-Fix-AMD-erratum-1485-on-Zen4-based-CPUs.patch [deleted file]
patches/kernel/0014-KVM-nSVM-Advertise-support-for-flush-by-ASID.patch [new file with mode: 0644]
patches/kernel/0014-Revert-nSVM-Check-for-reserved-encodings-of-TLB_CONT.patch [deleted file]
patches/kernel/0015-KVM-nSVM-Advertise-support-for-flush-by-ASID.patch [deleted file]
patches/kernel/0015-x86-fpu-Allow-caller-to-constrain-xfeatures-when-cop.patch [new file with mode: 0644]
patches/kernel/0016-KVM-x86-Constrain-guest-supported-xfeatures-only-at-.patch [new file with mode: 0644]
patches/kernel/0016-x86-fpu-Allow-caller-to-constrain-xfeatures-when-cop.patch [deleted file]
patches/kernel/0017-KVM-x86-Constrain-guest-supported-xfeatures-only-at-.patch [deleted file]
submodules/ubuntu-kernel

index 72d991ab32e0c52e4cda76c2cf44fff5a2f73bff..9daedb75327693f5c55290b01a1be278e2c3034a 100644 (file)
@@ -55,10 +55,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
  2 files changed, 111 insertions(+)
 
 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index fa73bbcb0c8d..4964bb2e931e 100644
+index 9f2bcb8b7f96..a60a4220be95 100644
 --- a/Documentation/admin-guide/kernel-parameters.txt
 +++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -4209,6 +4209,15 @@
+@@ -4281,6 +4281,15 @@
                                Also, it enforces the PCI Local Bus spec
                                rule that those bits should be 0 in system reset
                                events (useful for kexec/kdump cases).
@@ -75,10 +75,10 @@ index fa73bbcb0c8d..4964bb2e931e 100644
                                Safety option to keep boot IRQs enabled. This
                                should never be necessary.
 diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index 592e1c4ae697..aebf6f412203 100644
+index 92302d5749d8..71387b9aca33 100644
 --- a/drivers/pci/quirks.c
 +++ b/drivers/pci/quirks.c
-@@ -194,6 +194,106 @@ static int __init pci_apply_final_quirks(void)
+@@ -287,6 +287,106 @@ static int __init pci_apply_final_quirks(void)
  }
  fs_initcall_sync(pci_apply_final_quirks);
  
@@ -185,7 +185,7 @@ index 592e1c4ae697..aebf6f412203 100644
  /*
   * Decoding should be disabled for a PCI device during BAR sizing to avoid
   * conflict. But doing so may cause problems on host bridge and perhaps other
-@@ -4974,6 +5074,8 @@ static const struct pci_dev_acs_enabled {
+@@ -5069,6 +5169,8 @@ static const struct pci_dev_acs_enabled {
        { PCI_VENDOR_ID_CAVIUM, 0xA060, pci_quirk_mf_endpoint_acs },
        /* APM X-Gene */
        { PCI_VENDOR_ID_AMCC, 0xE004, pci_quirk_xgene_acs },
index 91bf4a289b0c3981aeadc667f8d32eb177230e7f..4bc4bd3182dc0d99965bb255fdb5895d52637fed 100644 (file)
@@ -13,10 +13,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
-index 73fad57408f7..99ae3e468ce6 100644
+index 5bbb5612b207..691ce10e7647 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
-@@ -79,7 +79,7 @@ module_param(halt_poll_ns, uint, 0644);
+@@ -82,7 +82,7 @@ module_param(halt_poll_ns, uint, 0644);
  EXPORT_SYMBOL_GPL(halt_poll_ns);
  
  /* Default doubles per-vcpu halt_poll_ns. */
index 14c716c5ef6e8d118b941c28c7d0fea4dd1a9109..3a228647240e0d0e4d215ba9181c2caabee02c8a 100644 (file)
@@ -14,10 +14,10 @@ Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/net/core/dev.c b/net/core/dev.c
-index 555bbe774734..de2e0d0185fc 100644
+index 69a3e544676c..56a45b9b602e 100644
 --- a/net/core/dev.c
 +++ b/net/core/dev.c
-@@ -10262,7 +10262,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
+@@ -10269,7 +10269,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
                if (time_after(jiffies, warning_time +
                               READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
                        list_for_each_entry(dev, list, todo_list) {
index 192e77ee05e4751a26d9a487be14b35d45d9bf85..f420c8f31f0ae75b5d02513d55d08af00341a08b 100644 (file)
@@ -10,12 +10,13 @@ This reverts commit 106b7a61c488d2022f44e3531ce33461c7c0685f.
 
 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
 ---
  include/linux/fortify-string.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
-index 7cad8bb031e9..acc24887db3e 100644
+index da51a83b2829..9d9e7822eddf 100644
 --- a/include/linux/fortify-string.h
 +++ b/include/linux/fortify-string.h
 @@ -18,7 +18,7 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning("
@@ -25,5 +26,5 @@ index 7cad8bb031e9..acc24887db3e 100644
 -      char *__p = (char *)(p);                                \
 +      unsigned char *__p = (unsigned char *)(p);              \
        size_t __ret = SIZE_MAX;                                \
-       size_t __p_size = __member_size(p);                     \
+       const size_t __p_size = __member_size(p);               \
        if (__p_size != SIZE_MAX &&                             \
index d6053befd427caecee5a45694f60f1aca1913484..f6d8d411abb50724eeeae7588c00ae4bdd6df179 100644 (file)
@@ -78,10 +78,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
  3 files changed, 21 insertions(+)
 
 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
-index 7ccdf991d18e..61aefeb3fdbc 100644
+index d3432687c9e6..2c20da9aa2ac 100644
 --- a/arch/x86/kvm/cpuid.c
 +++ b/arch/x86/kvm/cpuid.c
-@@ -251,6 +251,12 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
+@@ -249,6 +249,12 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
        return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
  }
  
@@ -108,10 +108,10 @@ index b1658c0de847..12a02851ff57 100644
  
  int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index ee603f4edce1..ff92ff41d5ce 100644
+index c381770bcbf1..6690a3722007 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
-@@ -5342,6 +5342,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
+@@ -5413,6 +5413,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
        if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
                return 0;
  
diff --git a/patches/kernel/0009-KVM-x86-mmu-Fix-an-sign-extension-bug-with-mmu_seq-t.patch b/patches/kernel/0009-KVM-x86-mmu-Fix-an-sign-extension-bug-with-mmu_seq-t.patch
deleted file mode 100644 (file)
index 18c268e..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Sean Christopherson <seanjc@google.com>
-Date: Wed, 23 Aug 2023 18:01:04 -0700
-Subject: [PATCH] KVM: x86/mmu: Fix an sign-extension bug with mmu_seq that
- hangs vCPUs
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Upstream commit ba6e3fe25543 ("KVM: x86/mmu: Grab mmu_invalidate_seq in
-kvm_faultin_pfn()") unknowingly fixed the bug in v6.3 when refactoring
-how KVM tracks the sequence counter snapshot.
-
-Take the vCPU's mmu_seq snapshot as an "unsigned long" instead of an "int"
-when checking to see if a page fault is stale, as the sequence count is
-stored as an "unsigned long" everywhere else in KVM.  This fixes a bug
-where KVM will effectively hang vCPUs due to always thinking page faults
-are stale, which results in KVM refusing to "fix" faults.
-
-mmu_invalidate_seq (née mmu_notifier_seq) is a sequence counter used when
-KVM is handling page faults to detect if userspace mappings relevant to
-the guest were invalidated between snapshotting the counter and acquiring
-mmu_lock, i.e. to ensure that the userspace mapping KVM is using to
-resolve the page fault is fresh.  If KVM sees that the counter has
-changed, KVM simply resumes the guest without fixing the fault.
-
-What _should_ happen is that the source of the mmu_notifier invalidations
-eventually goes away, mmu_invalidate_seq becomes stable, and KVM can once
-again fix guest page fault(s).
-
-But for a long-lived VM and/or a VM that the host just doesn't particularly
-like, it's possible for a VM to be on the receiving end of 2 billion (with
-a B) mmu_notifier invalidations.  When that happens, bit 31 will be set in
-mmu_invalidate_seq.  This causes the value to be turned into a 32-bit
-negative value when implicitly cast to an "int" by is_page_fault_stale(),
-and then sign-extended into a 64-bit unsigned when the signed "int" is
-implicitly cast back to an "unsigned long" on the call to
-mmu_invalidate_retry_hva().
-
-As a result of the casting and sign-extension, given a sequence counter of
-e.g. 0x8002dc25, mmu_invalidate_retry_hva() ends up doing
-
-       if (0x8002dc25 != 0xffffffff8002dc25)
-
-and signals that the page fault is stale and needs to be retried even
-though the sequence counter is stable, and KVM effectively hangs any vCPU
-that takes a page fault (EPT violation or #NPF when TDP is enabled).
-
-Reported-by: Brian Rak <brak@vultr.com>
-Reported-by: Amaan Cheval <amaan.cheval@gmail.com>
-Reported-by: Eric Wheeler <kvm@lists.ewheeler.net>
-Closes: https://lore.kernel.org/all/f023d927-52aa-7e08-2ee5-59a2fbc65953@gameservers.com
-Fixes: a955cad84cda ("KVM: x86/mmu: Retry page fault if root is invalidated by memslot update")
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-(cherry-picked from commit 82d811ff566594de3676f35808e8a9e19c5c864c in stable v6.1.51)
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
----
- arch/x86/kvm/mmu/mmu.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
-index 3220c1285984..c42ba5cde7a4 100644
---- a/arch/x86/kvm/mmu/mmu.c
-+++ b/arch/x86/kvm/mmu/mmu.c
-@@ -4261,7 +4261,8 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
-  * root was invalidated by a memslot update or a relevant mmu_notifier fired.
-  */
- static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
--                              struct kvm_page_fault *fault, int mmu_seq)
-+                              struct kvm_page_fault *fault,
-+                              unsigned long mmu_seq)
- {
-       struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa);
diff --git a/patches/kernel/0009-allow-opt-in-to-allow-pass-through-on-broken-hardwar.patch b/patches/kernel/0009-allow-opt-in-to-allow-pass-through-on-broken-hardwar.patch
new file mode 100644 (file)
index 0000000..84d5b22
--- /dev/null
@@ -0,0 +1,43 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: kiler129 <grzegorz@noflash.pl>
+Date: Mon, 18 Sep 2023 15:19:26 +0200
+Subject: [PATCH] allow opt-in to allow pass-through on broken hardware..
+
+adapted from https://github.com/kiler129/relax-intel-rmrr , licensed under MIT or GPL 2.0+
+
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/iommu/intel/iommu.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
+index a377f8e0a414..3be334d34317 100644
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -298,6 +298,7 @@ static int dmar_map_gfx = 1;
+ static int dmar_map_ipu = 1;
+ static int intel_iommu_superpage = 1;
+ static int iommu_identity_mapping;
++static int intel_relaxable_rmrr = 0;
+ static int iommu_skip_te_disable;
+ #define IDENTMAP_GFX          2
+@@ -359,6 +360,9 @@ static int __init intel_iommu_setup(char *str)
+               } else if (!strncmp(str, "tboot_noforce", 13)) {
+                       pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
+                       intel_iommu_tboot_noforce = 1;
++              } else if (!strncmp(str, "relax_rmrr", 10)) {
++                      pr_info("Intel-IOMMU: assuming all RMRRs are relaxable. This can lead to instability or data loss\n");
++                      intel_relaxable_rmrr = 1;
+               } else {
+                       pr_notice("Unknown option - '%s'\n", str);
+               }
+@@ -2503,7 +2507,7 @@ static bool device_rmrr_is_relaxable(struct device *dev)
+               return false;
+       pdev = to_pci_dev(dev);
+-      if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
++      if (intel_relaxable_rmrr || IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
+               return true;
+       else
+               return false;
diff --git a/patches/kernel/0010-allow-opt-in-to-allow-pass-through-on-broken-hardwar.patch b/patches/kernel/0010-allow-opt-in-to-allow-pass-through-on-broken-hardwar.patch
deleted file mode 100644 (file)
index 40b0a15..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: kiler129 <grzegorz@noflash.pl>
-Date: Mon, 18 Sep 2023 15:19:26 +0200
-Subject: [PATCH] allow opt-in to allow pass-through on broken hardware..
-
-adapted from https://github.com/kiler129/relax-intel-rmrr , licensed under MIT or GPL 2.0+
----
- drivers/iommu/intel/iommu.c | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
-index 1c5ba4dbfe78..887667218e3b 100644
---- a/drivers/iommu/intel/iommu.c
-+++ b/drivers/iommu/intel/iommu.c
-@@ -297,6 +297,7 @@ static int dmar_map_gfx = 1;
- static int dmar_map_ipu = 1;
- static int intel_iommu_superpage = 1;
- static int iommu_identity_mapping;
-+static int intel_relaxable_rmrr = 0;
- static int iommu_skip_te_disable;
- #define IDENTMAP_GFX          2
-@@ -358,6 +359,9 @@ static int __init intel_iommu_setup(char *str)
-               } else if (!strncmp(str, "tboot_noforce", 13)) {
-                       pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
-                       intel_iommu_tboot_noforce = 1;
-+              } else if (!strncmp(str, "relax_rmrr", 10)) {
-+                      pr_info("Intel-IOMMU: assuming all RMRRs are relaxable. This can lead to instability or data loss\n");
-+                      intel_relaxable_rmrr = 1;
-               } else {
-                       pr_notice("Unknown option - '%s'\n", str);
-               }
-@@ -2538,7 +2542,7 @@ static bool device_rmrr_is_relaxable(struct device *dev)
-               return false;
-       pdev = to_pci_dev(dev);
--      if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
-+      if (intel_relaxable_rmrr || IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
-               return true;
-       else
-               return false;
diff --git a/patches/kernel/0010-net-thunderbolt-Fix-TCPv6-GSO-checksum-calculation.patch b/patches/kernel/0010-net-thunderbolt-Fix-TCPv6-GSO-checksum-calculation.patch
new file mode 100644 (file)
index 0000000..b3d7a41
--- /dev/null
@@ -0,0 +1,42 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Wed, 13 Sep 2023 08:26:47 +0300
+Subject: [PATCH] net: thunderbolt: Fix TCPv6 GSO checksum calculation
+
+Alex reported that running ssh over IPv6 does not work with
+Thunderbolt/USB4 networking driver. The reason for that is that driver
+should call skb_is_gso() before calling skb_is_gso_v6(), and it should
+not return false after calculates the checksum successfully. This probably
+was a copy paste error from the original driver where it was done properly.
+
+Reported-by: Alex Balcanquall <alex@alexbal.com>
+Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable")
+Cc: stable@vger.kernel.org
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/net/thunderbolt/main.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/net/thunderbolt/main.c b/drivers/net/thunderbolt/main.c
+index 0c1e8970ee58..0a53ec293d04 100644
+--- a/drivers/net/thunderbolt/main.c
++++ b/drivers/net/thunderbolt/main.c
+@@ -1049,12 +1049,11 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb,
+               *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+                                           ip_hdr(skb)->daddr, 0,
+                                           ip_hdr(skb)->protocol, 0);
+-      } else if (skb_is_gso_v6(skb)) {
++      } else if (skb_is_gso(skb) && skb_is_gso_v6(skb)) {
+               tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data);
+               *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+                                         &ipv6_hdr(skb)->daddr, 0,
+                                         IPPROTO_TCP, 0);
+-              return false;
+       } else if (protocol == htons(ETH_P_IPV6)) {
+               tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset;
+               *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
diff --git a/patches/kernel/0011-net-thunderbolt-Fix-TCPv6-GSO-checksum-calculation.patch b/patches/kernel/0011-net-thunderbolt-Fix-TCPv6-GSO-checksum-calculation.patch
deleted file mode 100644 (file)
index 4bae939..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Mika Westerberg <mika.westerberg@linux.intel.com>
-Date: Wed, 13 Sep 2023 08:26:47 +0300
-Subject: [PATCH] net: thunderbolt: Fix TCPv6 GSO checksum calculation
-
-Alex reported that running ssh over IPv6 does not work with
-Thunderbolt/USB4 networking driver. The reason for that is that driver
-should call skb_is_gso() before calling skb_is_gso_v6(), and it should
-not return false after calculates the checksum successfully. This probably
-was a copy paste error from the original driver where it was done properly.
-
-Reported-by: Alex Balcanquall <alex@alexbal.com>
-Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable")
-Cc: stable@vger.kernel.org
-Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
-Reviewed-by: Eric Dumazet <edumazet@google.com>
-Reviewed-by: Jiri Pirko <jiri@nvidia.com>
-Reviewed-by: Jiri Pirko <jiri@nvidia.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- drivers/net/thunderbolt.c | 3 +--
- 1 file changed, 1 insertion(+), 2 deletions(-)
-
-diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c
-index 990484776f2d..0c554a7a5ce4 100644
---- a/drivers/net/thunderbolt.c
-+++ b/drivers/net/thunderbolt.c
-@@ -1005,12 +1005,11 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb,
-               *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
-                                           ip_hdr(skb)->daddr, 0,
-                                           ip_hdr(skb)->protocol, 0);
--      } else if (skb_is_gso_v6(skb)) {
-+      } else if (skb_is_gso(skb) && skb_is_gso_v6(skb)) {
-               tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data);
-               *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-                                         &ipv6_hdr(skb)->daddr, 0,
-                                         IPPROTO_TCP, 0);
--              return false;
-       } else if (protocol == htons(ETH_P_IPV6)) {
-               tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset;
-               *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
diff --git a/patches/kernel/0011-thunderbolt-Restart-XDomain-discovery-handshake-afte.patch b/patches/kernel/0011-thunderbolt-Restart-XDomain-discovery-handshake-afte.patch
new file mode 100644 (file)
index 0000000..6af7817
--- /dev/null
@@ -0,0 +1,134 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Thu, 7 Sep 2023 16:02:30 +0300
+Subject: [PATCH] thunderbolt: Restart XDomain discovery handshake after
+ failure
+
+Alex reported that after rebooting the other host the peer-to-peer link
+does not come up anymore. The reason for this is that the host that was
+not rebooted tries to send the UUID request only 10 times according to
+the USB4 Inter-Domain spec and gives up if it does not get reply. Then
+when the other side is actually ready it cannot get the link established
+anymore. The USB4 Inter-Domain spec requires that the discovery protocol
+is restarted in that case so implement this now.
+
+Reported-by: Alex Balcanquall <alex@alexbal.com>
+Fixes: 8e1de7042596 ("thunderbolt: Add support for XDomain lane bonding")
+Cc: stable@vger.kernel.org
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/thunderbolt/xdomain.c | 58 +++++++++++++++++++++++++----------
+ 1 file changed, 41 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c
+index 5b5566862318..9803f0bbf20d 100644
+--- a/drivers/thunderbolt/xdomain.c
++++ b/drivers/thunderbolt/xdomain.c
+@@ -703,6 +703,27 @@ static void update_property_block(struct tb_xdomain *xd)
+       mutex_unlock(&xdomain_lock);
+ }
++static void start_handshake(struct tb_xdomain *xd)
++{
++      xd->state = XDOMAIN_STATE_INIT;
++      queue_delayed_work(xd->tb->wq, &xd->state_work,
++                         msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
++}
++
++/* Can be called from state_work */
++static void __stop_handshake(struct tb_xdomain *xd)
++{
++      cancel_delayed_work_sync(&xd->properties_changed_work);
++      xd->properties_changed_retries = 0;
++      xd->state_retries = 0;
++}
++
++static void stop_handshake(struct tb_xdomain *xd)
++{
++      cancel_delayed_work_sync(&xd->state_work);
++      __stop_handshake(xd);
++}
++
+ static void tb_xdp_handle_request(struct work_struct *work)
+ {
+       struct xdomain_request_work *xw = container_of(work, typeof(*xw), work);
+@@ -765,6 +786,15 @@ static void tb_xdp_handle_request(struct work_struct *work)
+       case UUID_REQUEST:
+               tb_dbg(tb, "%llx: received XDomain UUID request\n", route);
+               ret = tb_xdp_uuid_response(ctl, route, sequence, uuid);
++              /*
++               * If we've stopped the discovery with an error such as
++               * timing out, we will restart the handshake now that we
++               * received UUID request from the remote host.
++               */
++              if (!ret && xd && xd->state == XDOMAIN_STATE_ERROR) {
++                      dev_dbg(&xd->dev, "restarting handshake\n");
++                      start_handshake(xd);
++              }
+               break;
+       case LINK_STATE_STATUS_REQUEST:
+@@ -1521,6 +1551,13 @@ static void tb_xdomain_queue_properties_changed(struct tb_xdomain *xd)
+                          msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
+ }
++static void tb_xdomain_failed(struct tb_xdomain *xd)
++{
++      xd->state = XDOMAIN_STATE_ERROR;
++      queue_delayed_work(xd->tb->wq, &xd->state_work,
++                         msecs_to_jiffies(XDOMAIN_DEFAULT_TIMEOUT));
++}
++
+ static void tb_xdomain_state_work(struct work_struct *work)
+ {
+       struct tb_xdomain *xd = container_of(work, typeof(*xd), state_work.work);
+@@ -1547,7 +1584,7 @@ static void tb_xdomain_state_work(struct work_struct *work)
+               if (ret) {
+                       if (ret == -EAGAIN)
+                               goto retry_state;
+-                      xd->state = XDOMAIN_STATE_ERROR;
++                      tb_xdomain_failed(xd);
+               } else {
+                       tb_xdomain_queue_properties_changed(xd);
+                       if (xd->bonding_possible)
+@@ -1612,7 +1649,7 @@ static void tb_xdomain_state_work(struct work_struct *work)
+               if (ret) {
+                       if (ret == -EAGAIN)
+                               goto retry_state;
+-                      xd->state = XDOMAIN_STATE_ERROR;
++                      tb_xdomain_failed(xd);
+               } else {
+                       xd->state = XDOMAIN_STATE_ENUMERATED;
+               }
+@@ -1623,6 +1660,8 @@ static void tb_xdomain_state_work(struct work_struct *work)
+               break;
+       case XDOMAIN_STATE_ERROR:
++              dev_dbg(&xd->dev, "discovery failed, stopping handshake\n");
++              __stop_handshake(xd);
+               break;
+       default:
+@@ -1833,21 +1872,6 @@ static void tb_xdomain_release(struct device *dev)
+       kfree(xd);
+ }
+-static void start_handshake(struct tb_xdomain *xd)
+-{
+-      xd->state = XDOMAIN_STATE_INIT;
+-      queue_delayed_work(xd->tb->wq, &xd->state_work,
+-                         msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
+-}
+-
+-static void stop_handshake(struct tb_xdomain *xd)
+-{
+-      cancel_delayed_work_sync(&xd->properties_changed_work);
+-      cancel_delayed_work_sync(&xd->state_work);
+-      xd->properties_changed_retries = 0;
+-      xd->state_retries = 0;
+-}
+-
+ static int __maybe_unused tb_xdomain_suspend(struct device *dev)
+ {
+       stop_handshake(tb_to_xdomain(dev));
diff --git a/patches/kernel/0012-thunderbolt-Restart-XDomain-discovery-handshake-afte.patch b/patches/kernel/0012-thunderbolt-Restart-XDomain-discovery-handshake-afte.patch
deleted file mode 100644 (file)
index d15296e..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Mika Westerberg <mika.westerberg@linux.intel.com>
-Date: Thu, 7 Sep 2023 16:02:30 +0300
-Subject: [PATCH] thunderbolt: Restart XDomain discovery handshake after
- failure
-
-Alex reported that after rebooting the other host the peer-to-peer link
-does not come up anymore. The reason for this is that the host that was
-not rebooted tries to send the UUID request only 10 times according to
-the USB4 Inter-Domain spec and gives up if it does not get reply. Then
-when the other side is actually ready it cannot get the link established
-anymore. The USB4 Inter-Domain spec requires that the discovery protocol
-is restarted in that case so implement this now.
-
-Reported-by: Alex Balcanquall <alex@alexbal.com>
-Fixes: 8e1de7042596 ("thunderbolt: Add support for XDomain lane bonding")
-Cc: stable@vger.kernel.org
-Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- drivers/thunderbolt/xdomain.c | 58 +++++++++++++++++++++++++----------
- 1 file changed, 41 insertions(+), 17 deletions(-)
-
-diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c
-index 3c51e47dd86b..0b17a4d4e9b9 100644
---- a/drivers/thunderbolt/xdomain.c
-+++ b/drivers/thunderbolt/xdomain.c
-@@ -704,6 +704,27 @@ static void update_property_block(struct tb_xdomain *xd)
-       mutex_unlock(&xdomain_lock);
- }
-+static void start_handshake(struct tb_xdomain *xd)
-+{
-+      xd->state = XDOMAIN_STATE_INIT;
-+      queue_delayed_work(xd->tb->wq, &xd->state_work,
-+                         msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
-+}
-+
-+/* Can be called from state_work */
-+static void __stop_handshake(struct tb_xdomain *xd)
-+{
-+      cancel_delayed_work_sync(&xd->properties_changed_work);
-+      xd->properties_changed_retries = 0;
-+      xd->state_retries = 0;
-+}
-+
-+static void stop_handshake(struct tb_xdomain *xd)
-+{
-+      cancel_delayed_work_sync(&xd->state_work);
-+      __stop_handshake(xd);
-+}
-+
- static void tb_xdp_handle_request(struct work_struct *work)
- {
-       struct xdomain_request_work *xw = container_of(work, typeof(*xw), work);
-@@ -766,6 +787,15 @@ static void tb_xdp_handle_request(struct work_struct *work)
-       case UUID_REQUEST:
-               tb_dbg(tb, "%llx: received XDomain UUID request\n", route);
-               ret = tb_xdp_uuid_response(ctl, route, sequence, uuid);
-+              /*
-+               * If we've stopped the discovery with an error such as
-+               * timing out, we will restart the handshake now that we
-+               * received UUID request from the remote host.
-+               */
-+              if (!ret && xd && xd->state == XDOMAIN_STATE_ERROR) {
-+                      dev_dbg(&xd->dev, "restarting handshake\n");
-+                      start_handshake(xd);
-+              }
-               break;
-       case LINK_STATE_STATUS_REQUEST:
-@@ -1522,6 +1552,13 @@ static void tb_xdomain_queue_properties_changed(struct tb_xdomain *xd)
-                          msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
- }
-+static void tb_xdomain_failed(struct tb_xdomain *xd)
-+{
-+      xd->state = XDOMAIN_STATE_ERROR;
-+      queue_delayed_work(xd->tb->wq, &xd->state_work,
-+                         msecs_to_jiffies(XDOMAIN_DEFAULT_TIMEOUT));
-+}
-+
- static void tb_xdomain_state_work(struct work_struct *work)
- {
-       struct tb_xdomain *xd = container_of(work, typeof(*xd), state_work.work);
-@@ -1548,7 +1585,7 @@ static void tb_xdomain_state_work(struct work_struct *work)
-               if (ret) {
-                       if (ret == -EAGAIN)
-                               goto retry_state;
--                      xd->state = XDOMAIN_STATE_ERROR;
-+                      tb_xdomain_failed(xd);
-               } else {
-                       tb_xdomain_queue_properties_changed(xd);
-                       if (xd->bonding_possible)
-@@ -1613,7 +1650,7 @@ static void tb_xdomain_state_work(struct work_struct *work)
-               if (ret) {
-                       if (ret == -EAGAIN)
-                               goto retry_state;
--                      xd->state = XDOMAIN_STATE_ERROR;
-+                      tb_xdomain_failed(xd);
-               } else {
-                       xd->state = XDOMAIN_STATE_ENUMERATED;
-               }
-@@ -1624,6 +1661,8 @@ static void tb_xdomain_state_work(struct work_struct *work)
-               break;
-       case XDOMAIN_STATE_ERROR:
-+              dev_dbg(&xd->dev, "discovery failed, stopping handshake\n");
-+              __stop_handshake(xd);
-               break;
-       default:
-@@ -1793,21 +1832,6 @@ static void tb_xdomain_release(struct device *dev)
-       kfree(xd);
- }
--static void start_handshake(struct tb_xdomain *xd)
--{
--      xd->state = XDOMAIN_STATE_INIT;
--      queue_delayed_work(xd->tb->wq, &xd->state_work,
--                         msecs_to_jiffies(XDOMAIN_SHORT_TIMEOUT));
--}
--
--static void stop_handshake(struct tb_xdomain *xd)
--{
--      cancel_delayed_work_sync(&xd->properties_changed_work);
--      cancel_delayed_work_sync(&xd->state_work);
--      xd->properties_changed_retries = 0;
--      xd->state_retries = 0;
--}
--
- static int __maybe_unused tb_xdomain_suspend(struct device *dev)
- {
-       stop_handshake(tb_to_xdomain(dev));
diff --git a/patches/kernel/0012-x86-cpu-Fix-AMD-erratum-1485-on-Zen4-based-CPUs.patch b/patches/kernel/0012-x86-cpu-Fix-AMD-erratum-1485-on-Zen4-based-CPUs.patch
new file mode 100644 (file)
index 0000000..a8e9fb0
--- /dev/null
@@ -0,0 +1,72 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Sat, 7 Oct 2023 12:57:02 +0200
+Subject: [PATCH] x86/cpu: Fix AMD erratum #1485 on Zen4-based CPUs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Fix erratum #1485 on Zen4 parts where running with STIBP disabled can
+cause an #UD exception. The performance impact of the fix is negligible.
+
+Reported-by: René Rebe <rene@exactcode.de>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Tested-by: René Rebe <rene@exactcode.de>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/D99589F4-BC5D-430B-87B2-72C20370CF57@exactcode.com
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/include/asm/msr-index.h | 9 +++++++--
+ arch/x86/kernel/cpu/amd.c        | 8 ++++++++
+ 2 files changed, 15 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index 1d111350197f..b37abb55e948 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -637,12 +637,17 @@
+ /* AMD Last Branch Record MSRs */
+ #define MSR_AMD64_LBR_SELECT                  0xc000010e
+-/* Fam 17h MSRs */
+-#define MSR_F17H_IRPERF                       0xc00000e9
++/* Zen4 */
++#define MSR_ZEN4_BP_CFG                       0xc001102e
++#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
++/* Zen 2 */
+ #define MSR_ZEN2_SPECTRAL_CHICKEN     0xc00110e3
+ #define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
++/* Fam 17h MSRs */
++#define MSR_F17H_IRPERF                       0xc00000e9
++
+ /* Fam 16h MSRs */
+ #define MSR_F16H_L2I_PERF_CTL         0xc0010230
+ #define MSR_F16H_L2I_PERF_CTR         0xc0010231
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index 7eca6a8abbb1..981bc23665a3 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -80,6 +80,10 @@ static const int amd_div0[] =
+       AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
+                          AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
++static const int amd_erratum_1485[] =
++      AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf),
++                         AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf));
++
+ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
+ {
+       int osvw_id = *erratum++;
+@@ -1140,6 +1144,10 @@ static void init_amd(struct cpuinfo_x86 *c)
+               pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
+               setup_force_cpu_bug(X86_BUG_DIV0);
+       }
++
++      if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
++           cpu_has_amd_erratum(c, amd_erratum_1485))
++              msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
+ }
+ #ifdef CONFIG_X86_32
diff --git a/patches/kernel/0013-Revert-nSVM-Check-for-reserved-encodings-of-TLB_CONT.patch b/patches/kernel/0013-Revert-nSVM-Check-for-reserved-encodings-of-TLB_CONT.patch
new file mode 100644 (file)
index 0000000..a0f8f3a
--- /dev/null
@@ -0,0 +1,57 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 18 Oct 2023 12:41:03 -0700
+Subject: [PATCH] Revert "nSVM: Check for reserved encodings of TLB_CONTROL in
+ nested VMCB"
+
+Revert KVM's made-up consistency check on SVM's TLB control.  The APM says
+that unsupported encodings are reserved, but the APM doesn't state that
+VMRUN checks for a supported encoding.  Unless something is called out
+in "Canonicalization and Consistency Checks" or listed as MBZ (Must Be
+Zero), AMD behavior is typically to let software shoot itself in the foot.
+
+This reverts commit 174a921b6975ef959dd82ee9e8844067a62e3ec1.
+
+Fixes: 174a921b6975 ("nSVM: Check for reserved encodings of TLB_CONTROL in nested VMCB")
+Reported-by: Stefan Sterz <s.sterz@proxmox.com>
+Closes: https://lkml.kernel.org/r/b9915c9c-4cf6-051a-2d91-44cc6380f455%40proxmox.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/kvm/svm/nested.c | 15 ---------------
+ 1 file changed, 15 deletions(-)
+
+diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
+index 96936ddf1b3c..92db000409a9 100644
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -247,18 +247,6 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
+           kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
+ }
+-static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl)
+-{
+-      /* Nested FLUSHBYASID is not supported yet.  */
+-      switch(tlb_ctl) {
+-              case TLB_CONTROL_DO_NOTHING:
+-              case TLB_CONTROL_FLUSH_ALL_ASID:
+-                      return true;
+-              default:
+-                      return false;
+-      }
+-}
+-
+ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
+                                        struct vmcb_ctrl_area_cached *control)
+ {
+@@ -278,9 +266,6 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
+                                          IOPM_SIZE)))
+               return false;
+-      if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
+-              return false;
+-
+       if (CC((control->int_ctl & V_NMI_ENABLE_MASK) &&
+              !vmcb12_is_intercept(control, INTERCEPT_NMI))) {
+               return false;
diff --git a/patches/kernel/0013-x86-cpu-Fix-AMD-erratum-1485-on-Zen4-based-CPUs.patch b/patches/kernel/0013-x86-cpu-Fix-AMD-erratum-1485-on-Zen4-based-CPUs.patch
deleted file mode 100644 (file)
index 9f1201e..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: "Borislav Petkov (AMD)" <bp@alien8.de>
-Date: Sat, 7 Oct 2023 12:57:02 +0200
-Subject: [PATCH] x86/cpu: Fix AMD erratum #1485 on Zen4-based CPUs
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Fix erratum #1485 on Zen4 parts where running with STIBP disabled can
-cause an #UD exception. The performance impact of the fix is negligible.
-
-Reported-by: René Rebe <rene@exactcode.de>
-Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
-Tested-by: René Rebe <rene@exactcode.de>
-Cc: <stable@kernel.org>
-Link: https://lore.kernel.org/r/D99589F4-BC5D-430B-87B2-72C20370CF57@exactcode.com
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/include/asm/msr-index.h | 9 +++++++--
- arch/x86/kernel/cpu/amd.c        | 8 ++++++++
- 2 files changed, 15 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
-index ebbf80d8b8bd..a79b10e57757 100644
---- a/arch/x86/include/asm/msr-index.h
-+++ b/arch/x86/include/asm/msr-index.h
-@@ -630,12 +630,17 @@
- /* AMD Last Branch Record MSRs */
- #define MSR_AMD64_LBR_SELECT                  0xc000010e
--/* Fam 17h MSRs */
--#define MSR_F17H_IRPERF                       0xc00000e9
-+/* Zen4 */
-+#define MSR_ZEN4_BP_CFG                       0xc001102e
-+#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
-+/* Zen 2 */
- #define MSR_ZEN2_SPECTRAL_CHICKEN     0xc00110e3
- #define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
-+/* Fam 17h MSRs */
-+#define MSR_F17H_IRPERF                       0xc00000e9
-+
- /* Fam 16h MSRs */
- #define MSR_F16H_L2I_PERF_CTL         0xc0010230
- #define MSR_F16H_L2I_PERF_CTR         0xc0010231
-diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
-index 6daf6a8fa0c7..044e3869620c 100644
---- a/arch/x86/kernel/cpu/amd.c
-+++ b/arch/x86/kernel/cpu/amd.c
-@@ -79,6 +79,10 @@ static const int amd_div0[] =
-       AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
-                          AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
-+static const int amd_erratum_1485[] =
-+      AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf),
-+                         AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf));
-+
- static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
- {
-       int osvw_id = *erratum++;
-@@ -1124,6 +1128,10 @@ static void init_amd(struct cpuinfo_x86 *c)
-               pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
-               setup_force_cpu_bug(X86_BUG_DIV0);
-       }
-+
-+      if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
-+           cpu_has_amd_erratum(c, amd_erratum_1485))
-+              msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
- }
- #ifdef CONFIG_X86_32
diff --git a/patches/kernel/0014-KVM-nSVM-Advertise-support-for-flush-by-ASID.patch b/patches/kernel/0014-KVM-nSVM-Advertise-support-for-flush-by-ASID.patch
new file mode 100644 (file)
index 0000000..3592431
--- /dev/null
@@ -0,0 +1,37 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 18 Oct 2023 12:41:04 -0700
+Subject: [PATCH] KVM: nSVM: Advertise support for flush-by-ASID
+
+Advertise support for FLUSHBYASID when nested SVM is enabled, as KVM can
+always emulate flushing TLB entries for a vmcb12 ASID, e.g. by running L2
+with a new, fresh ASID in vmcb02.  Some modern hypervisors, e.g. VMWare
+Workstation 17, require FLUSHBYASID support and will refuse to run if it's
+not present.
+
+Punt on proper support, as "Honor L1's request to flush an ASID on nested
+VMRUN" is one of the TODO items in the (incomplete) list of issues that
+need to be addressed in order for KVM to NOT do a full TLB flush on every
+nested SVM transition (see nested_svm_transition_tlb_flush()).
+
+Reported-by: Stefan Sterz <s.sterz@proxmox.com>
+Closes: https://lkml.kernel.org/r/b9915c9c-4cf6-051a-2d91-44cc6380f455%40proxmox.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Stefan Sterz <s.sterz@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/kvm/svm/svm.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 2ec76ab525ea..ef3215286428 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4969,6 +4969,7 @@ static __init void svm_set_cpu_caps(void)
+       if (nested) {
+               kvm_cpu_cap_set(X86_FEATURE_SVM);
+               kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);
++              kvm_cpu_cap_set(X86_FEATURE_FLUSHBYASID);
+               if (nrips)
+                       kvm_cpu_cap_set(X86_FEATURE_NRIPS);
diff --git a/patches/kernel/0014-Revert-nSVM-Check-for-reserved-encodings-of-TLB_CONT.patch b/patches/kernel/0014-Revert-nSVM-Check-for-reserved-encodings-of-TLB_CONT.patch
deleted file mode 100644 (file)
index 2ad8c3a..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Stefan Sterz <s.sterz@proxmox.com>
-Date: Wed, 18 Oct 2023 10:45:45 +0200
-Subject: [PATCH] Revert "nSVM: Check for reserved encodings of TLB_CONTROL in
- nested VMCB"
-
-This reverts commit 174a921b6975ef959dd82ee9e8844067a62e3ec1.
-
-Signed-off-by: Stefan Sterz <s.sterz@proxmox.com>
----
- arch/x86/kvm/svm/nested.c | 15 ---------------
- 1 file changed, 15 deletions(-)
-
-diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
-index add65dd59756..61a6c0235519 100644
---- a/arch/x86/kvm/svm/nested.c
-+++ b/arch/x86/kvm/svm/nested.c
-@@ -242,18 +242,6 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
-           kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
- }
--static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl)
--{
--      /* Nested FLUSHBYASID is not supported yet.  */
--      switch(tlb_ctl) {
--              case TLB_CONTROL_DO_NOTHING:
--              case TLB_CONTROL_FLUSH_ALL_ASID:
--                      return true;
--              default:
--                      return false;
--      }
--}
--
- static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
-                                        struct vmcb_ctrl_area_cached *control)
- {
-@@ -273,9 +261,6 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
-                                          IOPM_SIZE)))
-               return false;
--      if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
--              return false;
--
-       return true;
- }
diff --git a/patches/kernel/0015-KVM-nSVM-Advertise-support-for-flush-by-ASID.patch b/patches/kernel/0015-KVM-nSVM-Advertise-support-for-flush-by-ASID.patch
deleted file mode 100644 (file)
index e6e4f0e..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Sean Christopherson <seanjc@google.com>
-Date: Wed, 18 Oct 2023 12:41:04 -0700
-Subject: [PATCH] KVM: nSVM: Advertise support for flush-by-ASID
-
-Advertise support for FLUSHBYASID when nested SVM is enabled, as KVM can
-always emulate flushing TLB entries for a vmcb12 ASID, e.g. by running L2
-with a new, fresh ASID in vmcb02.  Some modern hypervisors, e.g. VMWare
-Workstation 17, require FLUSHBYASID support and will refuse to run if it's
-not present.
-
-Punt on proper support, as "Honor L1's request to flush an ASID on nested
-VMRUN" is one of the TODO items in the (incomplete) list of issues that
-need to be addressed in order for KVM to NOT do a full TLB flush on every
-nested SVM transition (see nested_svm_transition_tlb_flush()).
-
-Reported-by: Stefan Sterz <s.sterz@proxmox.com>
-Closes: https://lkml.kernel.org/r/b9915c9c-4cf6-051a-2d91-44cc6380f455%40proxmox.com
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Signed-off-by: Stefan Sterz <s.sterz@proxmox.com>
----
- arch/x86/kvm/svm/svm.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
-index fb9cde86930d..db8028864094 100644
---- a/arch/x86/kvm/svm/svm.c
-+++ b/arch/x86/kvm/svm/svm.c
-@@ -4921,6 +4921,7 @@ static __init void svm_set_cpu_caps(void)
-       if (nested) {
-               kvm_cpu_cap_set(X86_FEATURE_SVM);
-               kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);
-+              kvm_cpu_cap_set(X86_FEATURE_FLUSHBYASID);
-               if (nrips)
-                       kvm_cpu_cap_set(X86_FEATURE_NRIPS);
diff --git a/patches/kernel/0015-x86-fpu-Allow-caller-to-constrain-xfeatures-when-cop.patch b/patches/kernel/0015-x86-fpu-Allow-caller-to-constrain-xfeatures-when-cop.patch
new file mode 100644 (file)
index 0000000..e4837b9
--- /dev/null
@@ -0,0 +1,164 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 27 Sep 2023 17:19:52 -0700
+Subject: [PATCH] x86/fpu: Allow caller to constrain xfeatures when copying to
+ uabi buffer
+
+Plumb an xfeatures mask into __copy_xstate_to_uabi_buf() so that KVM can
+constrain which xfeatures are saved into the userspace buffer without
+having to modify the user_xfeatures field in KVM's guest_fpu state.
+
+KVM's ABI for KVM_GET_XSAVE{2} is that features that are not exposed to
+guest must not show up in the effective xstate_bv field of the buffer.
+Saving only the guest-supported xfeatures allows userspace to load the
+saved state on a different host with a fewer xfeatures, so long as the
+target host supports the xfeatures that are exposed to the guest.
+
+KVM currently sets user_xfeatures directly to restrict KVM_GET_XSAVE{2} to
+the set of guest-supported xfeatures, but doing so broke KVM's historical
+ABI for KVM_SET_XSAVE, which allows userspace to load any xfeatures that
+are supported by the *host*.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230928001956.924301-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 18164f66e6c59fda15c198b371fa008431efdb22)
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/include/asm/fpu/api.h |  3 ++-
+ arch/x86/kernel/fpu/core.c     |  5 +++--
+ arch/x86/kernel/fpu/xstate.c   |  7 +++++--
+ arch/x86/kernel/fpu/xstate.h   |  3 ++-
+ arch/x86/kvm/x86.c             | 21 +++++++++------------
+ 5 files changed, 21 insertions(+), 18 deletions(-)
+
+diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
+index b475d9a582b8..e829fa4c6788 100644
+--- a/arch/x86/include/asm/fpu/api.h
++++ b/arch/x86/include/asm/fpu/api.h
+@@ -148,7 +148,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) {
+ static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
+ #endif
+-extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
++extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
++                                         unsigned int size, u64 xfeatures, u32 pkru);
+ extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
+ static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
+diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
+index 98e507cc7d34..b582325b9c37 100644
+--- a/arch/x86/kernel/fpu/core.c
++++ b/arch/x86/kernel/fpu/core.c
+@@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
+ EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
+ void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
+-                                  unsigned int size, u32 pkru)
++                                  unsigned int size, u64 xfeatures, u32 pkru)
+ {
+       struct fpstate *kstate = gfpu->fpstate;
+       union fpregs_state *ustate = buf;
+       struct membuf mb = { .p = buf, .left = size };
+       if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
+-              __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
++              __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,
++                                        XSTATE_COPY_XSAVE);
+       } else {
+               memcpy(&ustate->fxsave, &kstate->regs.fxsave,
+                      sizeof(ustate->fxsave));
+diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
+index 1afbc4866b10..463ec0cd0dab 100644
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -1053,6 +1053,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
+  * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
+  * @to:               membuf descriptor
+  * @fpstate:  The fpstate buffer from which to copy
++ * @xfeatures:        The mask of xfeatures to save (XSAVE mode only)
+  * @pkru_val: The PKRU value to store in the PKRU component
+  * @copy_mode:        The requested copy mode
+  *
+@@ -1063,7 +1064,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
+  * It supports partial copy but @to.pos always starts from zero.
+  */
+ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
+-                             u32 pkru_val, enum xstate_copy_mode copy_mode)
++                             u64 xfeatures, u32 pkru_val,
++                             enum xstate_copy_mode copy_mode)
+ {
+       const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
+       struct xregs_state *xinit = &init_fpstate.regs.xsave;
+@@ -1087,7 +1089,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
+               break;
+       case XSTATE_COPY_XSAVE:
+-              header.xfeatures &= fpstate->user_xfeatures;
++              header.xfeatures &= fpstate->user_xfeatures & xfeatures;
+               break;
+       }
+@@ -1189,6 +1191,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
+                            enum xstate_copy_mode copy_mode)
+ {
+       __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
++                                tsk->thread.fpu.fpstate->user_xfeatures,
+                                 tsk->thread.pkru, copy_mode);
+ }
+diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
+index a4ecb04d8d64..3518fb26d06b 100644
+--- a/arch/x86/kernel/fpu/xstate.h
++++ b/arch/x86/kernel/fpu/xstate.h
+@@ -43,7 +43,8 @@ enum xstate_copy_mode {
+ struct membuf;
+ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
+-                                    u32 pkru_val, enum xstate_copy_mode copy_mode);
++                                    u64 xfeatures, u32 pkru_val,
++                                    enum xstate_copy_mode copy_mode);
+ extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
+                                   enum xstate_copy_mode mode);
+ extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 6690a3722007..394d3a8b4682 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5385,26 +5385,23 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
+       return 0;
+ }
+-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+-                                       struct kvm_xsave *guest_xsave)
++
++static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
++                                        u8 *state, unsigned int size)
+ {
+       if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+               return;
+-      fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
+-                                     guest_xsave->region,
+-                                     sizeof(guest_xsave->region),
++      fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
++                                     vcpu->arch.guest_fpu.fpstate->user_xfeatures,
+                                      vcpu->arch.pkru);
+ }
+-static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
+-                                        u8 *state, unsigned int size)
++static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
++                                       struct kvm_xsave *guest_xsave)
+ {
+-      if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+-              return;
+-
+-      fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
+-                                     state, size, vcpu->arch.pkru);
++      return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
++                                           sizeof(guest_xsave->region));
+ }
+ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
diff --git a/patches/kernel/0016-KVM-x86-Constrain-guest-supported-xfeatures-only-at-.patch b/patches/kernel/0016-KVM-x86-Constrain-guest-supported-xfeatures-only-at-.patch
new file mode 100644 (file)
index 0000000..c9d06f5
--- /dev/null
@@ -0,0 +1,119 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 27 Sep 2023 17:19:53 -0700
+Subject: [PATCH] KVM: x86: Constrain guest-supported xfeatures only at
+ KVM_GET_XSAVE{2}
+
+Mask off xfeatures that aren't exposed to the guest only when saving guest
+state via KVM_GET_XSAVE{2} instead of modifying user_xfeatures directly.
+Preserving the maximal set of xfeatures in user_xfeatures restores KVM's
+ABI for KVM_SET_XSAVE, which prior to commit ad856280ddea ("x86/kvm/fpu:
+Limit guest user_xfeatures to supported bits of XCR0") allowed userspace
+to load xfeatures that are supported by the host, irrespective of what
+xfeatures are exposed to the guest.
+
+There is no known use case where userspace *intentionally* loads xfeatures
+that aren't exposed to the guest, but the bug fixed by commit ad856280ddea
+was specifically that KVM_GET_SAVE{2} would save xfeatures that weren't
+exposed to the guest, e.g. would lead to userspace unintentionally loading
+guest-unsupported xfeatures when live migrating a VM.
+
+Restricting KVM_SET_XSAVE to guest-supported xfeatures is especially
+problematic for QEMU-based setups, as QEMU has a bug where instead of
+terminating the VM if KVM_SET_XSAVE fails, QEMU instead simply stops
+loading guest state, i.e. resumes the guest after live migration with
+incomplete guest state, and ultimately results in guest data corruption.
+
+Note, letting userspace restore all host-supported xfeatures does not fix
+setups where a VM is migrated from a host *without* commit ad856280ddea,
+to a target with a subset of host-supported xfeatures.  However there is
+no way to safely address that scenario, e.g. KVM could silently drop the
+unsupported features, but that would be a clear violation of KVM's ABI and
+so would require userspace to opt-in, at which point userspace could
+simply be updated to sanitize the to-be-loaded XSAVE state.
+
+Reported-by: Tyler Stachecki <stachecki.tyler@gmail.com>
+Closes: https://lore.kernel.org/all/20230914010003.358162-1-tstachecki@bloomberg.net
+Fixes: ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0")
+Cc: stable@vger.kernel.org
+Cc: Leonardo Bras <leobras@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Message-Id: <20230928001956.924301-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 8647c52e9504c99752a39f1d44f6268f82c40a5c)
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/kernel/fpu/xstate.c |  5 +----
+ arch/x86/kvm/cpuid.c         |  8 --------
+ arch/x86/kvm/x86.c           | 18 ++++++++++++++++--
+ 3 files changed, 17 insertions(+), 14 deletions(-)
+
+diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
+index 463ec0cd0dab..ebe698f8af73 100644
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -1543,10 +1543,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
+               fpregs_restore_userregs();
+       newfps->xfeatures = curfps->xfeatures | xfeatures;
+-
+-      if (!guest_fpu)
+-              newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+-
++      newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+       newfps->xfd = curfps->xfd & ~xfeatures;
+       /* Do the final updates within the locked region */
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index 2c20da9aa2ac..e2b67975869c 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -332,14 +332,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+       vcpu->arch.guest_supported_xcr0 =
+               cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+-      /*
+-       * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
+-       * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
+-       * supported by the host.
+-       */
+-      vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 |
+-                                                     XFEATURE_MASK_FPSSE;
+-
+       kvm_update_pv_runtime(vcpu);
+       vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 394d3a8b4682..e0cea0f8380a 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5389,12 +5389,26 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
+ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
+                                         u8 *state, unsigned int size)
+ {
++      /*
++       * Only copy state for features that are enabled for the guest.  The
++       * state itself isn't problematic, but setting bits in the header for
++       * features that are supported in *this* host but not exposed to the
++       * guest can result in KVM_SET_XSAVE failing when live migrating to a
++       * compatible host without the features that are NOT exposed to the
++       * guest.
++       *
++       * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
++       * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
++       * supported by the host.
++       */
++      u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 |
++                           XFEATURE_MASK_FPSSE;
++
+       if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+               return;
+       fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
+-                                     vcpu->arch.guest_fpu.fpstate->user_xfeatures,
+-                                     vcpu->arch.pkru);
++                                     supported_xcr0, vcpu->arch.pkru);
+ }
+ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
diff --git a/patches/kernel/0016-x86-fpu-Allow-caller-to-constrain-xfeatures-when-cop.patch b/patches/kernel/0016-x86-fpu-Allow-caller-to-constrain-xfeatures-when-cop.patch
deleted file mode 100644 (file)
index 83a64ce..0000000
+++ /dev/null
@@ -1,164 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Sean Christopherson <seanjc@google.com>
-Date: Wed, 27 Sep 2023 17:19:52 -0700
-Subject: [PATCH] x86/fpu: Allow caller to constrain xfeatures when copying to
- uabi buffer
-
-Plumb an xfeatures mask into __copy_xstate_to_uabi_buf() so that KVM can
-constrain which xfeatures are saved into the userspace buffer without
-having to modify the user_xfeatures field in KVM's guest_fpu state.
-
-KVM's ABI for KVM_GET_XSAVE{2} is that features that are not exposed to
-guest must not show up in the effective xstate_bv field of the buffer.
-Saving only the guest-supported xfeatures allows userspace to load the
-saved state on a different host with a fewer xfeatures, so long as the
-target host supports the xfeatures that are exposed to the guest.
-
-KVM currently sets user_xfeatures directly to restrict KVM_GET_XSAVE{2} to
-the set of guest-supported xfeatures, but doing so broke KVM's historical
-ABI for KVM_SET_XSAVE, which allows userspace to load any xfeatures that
-are supported by the *host*.
-
-Cc: stable@vger.kernel.org
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Message-Id: <20230928001956.924301-2-seanjc@google.com>
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-(cherry picked from commit 18164f66e6c59fda15c198b371fa008431efdb22)
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/include/asm/fpu/api.h |  3 ++-
- arch/x86/kernel/fpu/core.c     |  5 +++--
- arch/x86/kernel/fpu/xstate.c   |  7 +++++--
- arch/x86/kernel/fpu/xstate.h   |  3 ++-
- arch/x86/kvm/x86.c             | 21 +++++++++------------
- 5 files changed, 21 insertions(+), 18 deletions(-)
-
-diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
-index b475d9a582b8..e829fa4c6788 100644
---- a/arch/x86/include/asm/fpu/api.h
-+++ b/arch/x86/include/asm/fpu/api.h
-@@ -148,7 +148,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) {
- static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
- #endif
--extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
-+extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
-+                                         unsigned int size, u64 xfeatures, u32 pkru);
- extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
- static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
-diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
-index caf33486dc5e..cddd5018e6a4 100644
---- a/arch/x86/kernel/fpu/core.c
-+++ b/arch/x86/kernel/fpu/core.c
-@@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
- EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
- void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
--                                  unsigned int size, u32 pkru)
-+                                  unsigned int size, u64 xfeatures, u32 pkru)
- {
-       struct fpstate *kstate = gfpu->fpstate;
-       union fpregs_state *ustate = buf;
-       struct membuf mb = { .p = buf, .left = size };
-       if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
--              __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
-+              __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,
-+                                        XSTATE_COPY_XSAVE);
-       } else {
-               memcpy(&ustate->fxsave, &kstate->regs.fxsave,
-                      sizeof(ustate->fxsave));
-diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
-index 1afbc4866b10..463ec0cd0dab 100644
---- a/arch/x86/kernel/fpu/xstate.c
-+++ b/arch/x86/kernel/fpu/xstate.c
-@@ -1053,6 +1053,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
-  * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
-  * @to:               membuf descriptor
-  * @fpstate:  The fpstate buffer from which to copy
-+ * @xfeatures:        The mask of xfeatures to save (XSAVE mode only)
-  * @pkru_val: The PKRU value to store in the PKRU component
-  * @copy_mode:        The requested copy mode
-  *
-@@ -1063,7 +1064,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
-  * It supports partial copy but @to.pos always starts from zero.
-  */
- void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
--                             u32 pkru_val, enum xstate_copy_mode copy_mode)
-+                             u64 xfeatures, u32 pkru_val,
-+                             enum xstate_copy_mode copy_mode)
- {
-       const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
-       struct xregs_state *xinit = &init_fpstate.regs.xsave;
-@@ -1087,7 +1089,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
-               break;
-       case XSTATE_COPY_XSAVE:
--              header.xfeatures &= fpstate->user_xfeatures;
-+              header.xfeatures &= fpstate->user_xfeatures & xfeatures;
-               break;
-       }
-@@ -1189,6 +1191,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
-                            enum xstate_copy_mode copy_mode)
- {
-       __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
-+                                tsk->thread.fpu.fpstate->user_xfeatures,
-                                 tsk->thread.pkru, copy_mode);
- }
-diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
-index a4ecb04d8d64..3518fb26d06b 100644
---- a/arch/x86/kernel/fpu/xstate.h
-+++ b/arch/x86/kernel/fpu/xstate.h
-@@ -43,7 +43,8 @@ enum xstate_copy_mode {
- struct membuf;
- extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
--                                    u32 pkru_val, enum xstate_copy_mode copy_mode);
-+                                    u64 xfeatures, u32 pkru_val,
-+                                    enum xstate_copy_mode copy_mode);
- extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
-                                   enum xstate_copy_mode mode);
- extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index ff92ff41d5ce..a43a950d04cb 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -5314,26 +5314,23 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
-       return 0;
- }
--static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
--                                       struct kvm_xsave *guest_xsave)
-+
-+static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
-+                                        u8 *state, unsigned int size)
- {
-       if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
-               return;
--      fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
--                                     guest_xsave->region,
--                                     sizeof(guest_xsave->region),
-+      fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
-+                                     vcpu->arch.guest_fpu.fpstate->user_xfeatures,
-                                      vcpu->arch.pkru);
- }
--static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
--                                        u8 *state, unsigned int size)
-+static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
-+                                       struct kvm_xsave *guest_xsave)
- {
--      if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
--              return;
--
--      fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
--                                     state, size, vcpu->arch.pkru);
-+      return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
-+                                           sizeof(guest_xsave->region));
- }
- static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
diff --git a/patches/kernel/0017-KVM-x86-Constrain-guest-supported-xfeatures-only-at-.patch b/patches/kernel/0017-KVM-x86-Constrain-guest-supported-xfeatures-only-at-.patch
deleted file mode 100644 (file)
index 9154817..0000000
+++ /dev/null
@@ -1,119 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Sean Christopherson <seanjc@google.com>
-Date: Wed, 27 Sep 2023 17:19:53 -0700
-Subject: [PATCH] KVM: x86: Constrain guest-supported xfeatures only at
- KVM_GET_XSAVE{2}
-
-Mask off xfeatures that aren't exposed to the guest only when saving guest
-state via KVM_GET_XSAVE{2} instead of modifying user_xfeatures directly.
-Preserving the maximal set of xfeatures in user_xfeatures restores KVM's
-ABI for KVM_SET_XSAVE, which prior to commit ad856280ddea ("x86/kvm/fpu:
-Limit guest user_xfeatures to supported bits of XCR0") allowed userspace
-to load xfeatures that are supported by the host, irrespective of what
-xfeatures are exposed to the guest.
-
-There is no known use case where userspace *intentionally* loads xfeatures
-that aren't exposed to the guest, but the bug fixed by commit ad856280ddea
-was specifically that KVM_GET_SAVE{2} would save xfeatures that weren't
-exposed to the guest, e.g. would lead to userspace unintentionally loading
-guest-unsupported xfeatures when live migrating a VM.
-
-Restricting KVM_SET_XSAVE to guest-supported xfeatures is especially
-problematic for QEMU-based setups, as QEMU has a bug where instead of
-terminating the VM if KVM_SET_XSAVE fails, QEMU instead simply stops
-loading guest state, i.e. resumes the guest after live migration with
-incomplete guest state, and ultimately results in guest data corruption.
-
-Note, letting userspace restore all host-supported xfeatures does not fix
-setups where a VM is migrated from a host *without* commit ad856280ddea,
-to a target with a subset of host-supported xfeatures.  However there is
-no way to safely address that scenario, e.g. KVM could silently drop the
-unsupported features, but that would be a clear violation of KVM's ABI and
-so would require userspace to opt-in, at which point userspace could
-simply be updated to sanitize the to-be-loaded XSAVE state.
-
-Reported-by: Tyler Stachecki <stachecki.tyler@gmail.com>
-Closes: https://lore.kernel.org/all/20230914010003.358162-1-tstachecki@bloomberg.net
-Fixes: ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0")
-Cc: stable@vger.kernel.org
-Cc: Leonardo Bras <leobras@redhat.com>
-Signed-off-by: Sean Christopherson <seanjc@google.com>
-Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
-Message-Id: <20230928001956.924301-3-seanjc@google.com>
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-(cherry picked from commit 8647c52e9504c99752a39f1d44f6268f82c40a5c)
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/kernel/fpu/xstate.c |  5 +----
- arch/x86/kvm/cpuid.c         |  8 --------
- arch/x86/kvm/x86.c           | 18 ++++++++++++++++--
- 3 files changed, 17 insertions(+), 14 deletions(-)
-
-diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
-index 463ec0cd0dab..ebe698f8af73 100644
---- a/arch/x86/kernel/fpu/xstate.c
-+++ b/arch/x86/kernel/fpu/xstate.c
-@@ -1543,10 +1543,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
-               fpregs_restore_userregs();
-       newfps->xfeatures = curfps->xfeatures | xfeatures;
--
--      if (!guest_fpu)
--              newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
--
-+      newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
-       newfps->xfd = curfps->xfd & ~xfeatures;
-       /* Do the final updates within the locked region */
-diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
-index 61aefeb3fdbc..e5393ee652ba 100644
---- a/arch/x86/kvm/cpuid.c
-+++ b/arch/x86/kvm/cpuid.c
-@@ -350,14 +350,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
-       vcpu->arch.guest_supported_xcr0 =
-               cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
--      /*
--       * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
--       * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
--       * supported by the host.
--       */
--      vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 |
--                                                     XFEATURE_MASK_FPSSE;
--
-       kvm_update_pv_runtime(vcpu);
-       vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index a43a950d04cb..a4a44adf7c72 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -5318,12 +5318,26 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
- static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
-                                         u8 *state, unsigned int size)
- {
-+      /*
-+       * Only copy state for features that are enabled for the guest.  The
-+       * state itself isn't problematic, but setting bits in the header for
-+       * features that are supported in *this* host but not exposed to the
-+       * guest can result in KVM_SET_XSAVE failing when live migrating to a
-+       * compatible host without the features that are NOT exposed to the
-+       * guest.
-+       *
-+       * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
-+       * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
-+       * supported by the host.
-+       */
-+      u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 |
-+                           XFEATURE_MASK_FPSSE;
-+
-       if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
-               return;
-       fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
--                                     vcpu->arch.guest_fpu.fpstate->user_xfeatures,
--                                     vcpu->arch.pkru);
-+                                     supported_xcr0, vcpu->arch.pkru);
- }
- static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
index 0b6a250fd5bb058a9965d904b3e6f83b87a0c3b7..0f112085de77ffd667df863f7240164da1b8d26f 160000 (submodule)
@@ -1 +1 @@
-Subproject commit 0b6a250fd5bb058a9965d904b3e6f83b87a0c3b7
+Subproject commit 0f112085de77ffd667df863f7240164da1b8d26f