From 15baf5b4c2b0ecf549431f4ccaba93157ec0f2f9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fabian=20Gr=C3=BCnbichler?= Date: Fri, 9 Mar 2018 14:43:58 +0100 Subject: [PATCH] rebase patches and drop those applied in 4.14/4.15 --- ...-accept-an-alternate-timestamp-strin.patch | 4 +- ...idge-keep-MAC-of-first-assigned-port.patch | 2 +- ...ides-for-missing-ACS-capabilities-4..patch | 2 +- ...-default-dynamic-halt-polling-growth.patch | 2 +- ...-flag-to-enable-cpuset-to-use-v2-beh.patch | 66 ---- ...-dst-refcnt-leak-in-sctp_v4_get_dst.patch} | 2 +- ...puset-Allow-v2-behavior-in-v1-cgroup.patch | 141 --------- ...-dst-refcnt-leak-in-sctp_v6_get_dst.patch} | 2 +- ...7-KVM-x86-fix-APIC-page-invalidation.patch | 90 ------ ...adata-estimation-accurate-and-clear.patch} | 8 +- ...e-extent-block-in-dealloc-without-m.patch} | 18 +- ...0008-vhost-fix-skb-leak-in-handle_rx.patch | 72 ----- .../0009-tun-free-skb-in-early-errors.patch | 86 ----- .../0010-tap-free-skb-if-flags-error.patch | 58 ---- ...ash-on-pkey-enforcement-failed-in-re.patch | 93 ------ ...-t-enforce-PKey-security-on-SMI-MADs.patch | 47 --- ...e-support-for-CPUs-without-virtual-N.patch | 299 ------------------ .../kernel/0014-KVM-SVM-obey-guest-PAT.patch | 56 ---- ...byte-don-t-add-the-data-offset-twice.patch | 33 -- ...F_LAYER_LINK-case-in-tcf_get_base_pt.patch | 34 -- ...leak-related-filter-programming-stat.patch | 127 -------- ...-memory-barrier-on-vmcs-field-lookup.patch | 49 --- ...-t-create-a-second-memory-controller.patch | 102 ------ ...-sb_edac-Fix-missing-break-in-switch.patch | 37 --- ...-lpfc-Fix-loop-mode-target-discovery.patch | 45 --- ...Fix-add_wait_queue-behavioral-change.patch | 52 --- ...-Warn-about-missing-retpoline-in-mod.patch | 164 ---------- ...ose-sock-if-net-namespace-is-exiting.patch | 127 -------- ...ack-of-set_grace_period-in-lockd_dow.patch | 46 --- 29 files changed, 20 insertions(+), 1844 deletions(-) delete mode 100644 patches/kernel/0005-cgroup-Add-mount-flag-to-enable-cpuset-to-use-v2-beh.patch rename patches/kernel/{0025-sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch => 0005-sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch} (98%) delete mode 100644 patches/kernel/0006-cpuset-Allow-v2-behavior-in-v1-cgroup.patch rename patches/kernel/{0026-sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch => 0006-sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch} (98%) delete mode 100644 patches/kernel/0007-KVM-x86-fix-APIC-page-invalidation.patch rename patches/kernel/{0028-ocfs2-make-metadata-estimation-accurate-and-clear.patch => 0007-ocfs2-make-metadata-estimation-accurate-and-clear.patch} (89%) rename patches/kernel/{0029-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch => 0008-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch} (95%) delete mode 100644 patches/kernel/0008-vhost-fix-skb-leak-in-handle_rx.patch delete mode 100644 patches/kernel/0009-tun-free-skb-in-early-errors.patch delete mode 100644 patches/kernel/0010-tap-free-skb-if-flags-error.patch delete mode 100644 patches/kernel/0011-IB-core-Avoid-crash-on-pkey-enforcement-failed-in-re.patch delete mode 100644 patches/kernel/0012-IB-core-Don-t-enforce-PKey-security-on-SMI-MADs.patch delete mode 100644 patches/kernel/0013-kvm-vmx-Reinstate-support-for-CPUs-without-virtual-N.patch delete mode 100644 patches/kernel/0014-KVM-SVM-obey-guest-PAT.patch delete mode 100644 patches/kernel/0015-net-sched-em_nbyte-don-t-add-the-data-offset-twice.patch delete mode 100644 patches/kernel/0016-net-sched-fix-TCF_LAYER_LINK-case-in-tcf_get_base_pt.patch delete mode 100644 patches/kernel/0017-i40e-Fix-memory-leak-related-filter-programming-stat.patch delete mode 100644 patches/kernel/0018-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch delete mode 100644 patches/kernel/0019-EDAC-sb_edac-Don-t-create-a-second-memory-controller.patch delete mode 100644 patches/kernel/0020-EDAC-sb_edac-Fix-missing-break-in-switch.patch delete mode 100644 patches/kernel/0021-scsi-lpfc-Fix-loop-mode-target-discovery.patch delete mode 100644 patches/kernel/0022-sched-wait-Fix-add_wait_queue-behavioral-change.patch delete mode 100644 patches/kernel/0023-module-retpoline-Warn-about-missing-retpoline-in-mod.patch delete mode 100644 patches/kernel/0024-net-tcp-close-sock-if-net-namespace-is-exiting.patch delete mode 100644 patches/kernel/0027-lockd-lost-rollback-of-set_grace_period-in-lockd_dow.patch diff --git a/patches/kernel/0001-Make-mkcompile_h-accept-an-alternate-timestamp-strin.patch b/patches/kernel/0001-Make-mkcompile_h-accept-an-alternate-timestamp-strin.patch index 522b5f4..acd4b3f 100644 --- a/patches/kernel/0001-Make-mkcompile_h-accept-an-alternate-timestamp-strin.patch +++ b/patches/kernel/0001-Make-mkcompile_h-accept-an-alternate-timestamp-strin.patch @@ -20,10 +20,10 @@ Signed-off-by: Fabian Grünbichler 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h -index fd8fdb91581d..1e35ac9fc810 100755 +index 87f1fc9801d7..4ef868f1f244 100755 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h -@@ -37,10 +37,14 @@ else +@@ -33,10 +33,14 @@ else VERSION=$KBUILD_BUILD_VERSION fi diff --git a/patches/kernel/0002-bridge-keep-MAC-of-first-assigned-port.patch b/patches/kernel/0002-bridge-keep-MAC-of-first-assigned-port.patch index 05a8f20..658c3f1 100644 --- a/patches/kernel/0002-bridge-keep-MAC-of-first-assigned-port.patch +++ b/patches/kernel/0002-bridge-keep-MAC-of-first-assigned-port.patch @@ -18,7 +18,7 @@ Signed-off-by: Fabian Grünbichler 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c -index 89110319ef0f..5e73fff65f47 100644 +index 808e2b914015..b0ad54384826 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -259,10 +259,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br) diff --git a/patches/kernel/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch b/patches/kernel/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch index 1806736..969d5cb 100644 --- a/patches/kernel/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch +++ b/patches/kernel/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch @@ -74,7 +74,7 @@ index 27ca3fbc47aa..5e3caff3fb49 100644 Safety option to keep boot IRQs enabled. This should never be necessary. diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index db82bef43b99..d338fdb7c402 100644 +index db82bef43b99..ed94ba0d0922 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3695,6 +3695,106 @@ static int __init pci_apply_final_quirks(void) diff --git a/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch b/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch index 863f916..aa5f518 100644 --- a/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch +++ b/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch @@ -12,7 +12,7 @@ Signed-off-by: Fabian Grünbichler 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index 4d81f6ded88e..bfa9c4d34102 100644 +index 210bf820385a..5b7e582f3742 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -77,7 +77,7 @@ module_param(halt_poll_ns, uint, 0644); diff --git a/patches/kernel/0005-cgroup-Add-mount-flag-to-enable-cpuset-to-use-v2-beh.patch b/patches/kernel/0005-cgroup-Add-mount-flag-to-enable-cpuset-to-use-v2-beh.patch deleted file mode 100644 index 0f6a3de..0000000 --- a/patches/kernel/0005-cgroup-Add-mount-flag-to-enable-cpuset-to-use-v2-beh.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Waiman Long -Date: Thu, 17 Aug 2017 15:33:09 -0400 -Subject: [PATCH] cgroup: Add mount flag to enable cpuset to use v2 behavior in - v1 cgroup -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -A new mount option "cpuset_v2_mode" is added to the v1 cgroupfs -filesystem to enable cpuset controller to use v2 behavior in a v1 -cgroup. This mount option applies only to cpuset controller and have -no effect on other controllers. - -Signed-off-by: Waiman Long -Signed-off-by: Tejun Heo -(cherry-picked from e1cba4b85daa71b710384d451ff6238d5e4d1ff6) -Signed-off-by: Fabian Grünbichler ---- - include/linux/cgroup-defs.h | 5 +++++ - kernel/cgroup/cgroup-v1.c | 6 ++++++ - 2 files changed, 11 insertions(+) - -diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h -index 09f4c7df1478..c344e77707a5 100644 ---- a/include/linux/cgroup-defs.h -+++ b/include/linux/cgroup-defs.h -@@ -74,6 +74,11 @@ enum { - * aren't writeable from inside the namespace. - */ - CGRP_ROOT_NS_DELEGATE = (1 << 3), -+ -+ /* -+ * Enable cpuset controller in v1 cgroup to use v2 behavior. -+ */ -+ CGRP_ROOT_CPUSET_V2_MODE = (1 << 4), - }; - - /* cftype->flags */ -diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c -index 7bf4b1533f34..ce7426b875f5 100644 ---- a/kernel/cgroup/cgroup-v1.c -+++ b/kernel/cgroup/cgroup-v1.c -@@ -846,6 +846,8 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo - seq_puts(seq, ",noprefix"); - if (root->flags & CGRP_ROOT_XATTR) - seq_puts(seq, ",xattr"); -+ if (root->flags & CGRP_ROOT_CPUSET_V2_MODE) -+ seq_puts(seq, ",cpuset_v2_mode"); - - spin_lock(&release_agent_path_lock); - if (strlen(root->release_agent_path)) -@@ -900,6 +902,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) - opts->cpuset_clone_children = true; - continue; - } -+ if (!strcmp(token, "cpuset_v2_mode")) { -+ opts->flags |= CGRP_ROOT_CPUSET_V2_MODE; -+ continue; -+ } - if (!strcmp(token, "xattr")) { - opts->flags |= CGRP_ROOT_XATTR; - continue; --- -2.14.2 - diff --git a/patches/kernel/0025-sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch b/patches/kernel/0005-sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch similarity index 98% rename from patches/kernel/0025-sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch rename to patches/kernel/0005-sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch index 1bed6b0..d1a4cd6 100644 --- a/patches/kernel/0025-sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch +++ b/patches/kernel/0005-sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch @@ -54,7 +54,7 @@ Signed-off-by: Fabian Grünbichler 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c -index 989a900383b5..e1a3ae4f3cab 100644 +index 6a38c2503649..91813e686c67 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -514,22 +514,20 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, diff --git a/patches/kernel/0006-cpuset-Allow-v2-behavior-in-v1-cgroup.patch b/patches/kernel/0006-cpuset-Allow-v2-behavior-in-v1-cgroup.patch deleted file mode 100644 index 25be085..0000000 --- a/patches/kernel/0006-cpuset-Allow-v2-behavior-in-v1-cgroup.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Waiman Long -Date: Thu, 17 Aug 2017 15:33:10 -0400 -Subject: [PATCH] cpuset: Allow v2 behavior in v1 cgroup -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Cpuset v2 has some useful behaviors that are not present in v1 because -of backward compatibility concern. One of that is the restoration of -the original cpu and memory node mask after a hot removal and addition -event sequence. - -This patch makes the cpuset controller to check the -CGRP_ROOT_CPUSET_V2_MODE flag and use the v2 behavior if it is set. - -Signed-off-by: Waiman Long -Signed-off-by: Tejun Heo -(cherry-picked from b8d1b8ee93df8ffbabbeadd65d39853cfad6d698) -Signed-off-by: Fabian Grünbichler ---- - kernel/cgroup/cpuset.c | 33 ++++++++++++++++++++------------- - 1 file changed, 20 insertions(+), 13 deletions(-) - -diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c -index e8cb34193433..f76c4bf3d46a 100644 ---- a/kernel/cgroup/cpuset.c -+++ b/kernel/cgroup/cpuset.c -@@ -299,6 +299,16 @@ static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); - - static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); - -+/* -+ * Cgroup v2 behavior is used when on default hierarchy or the -+ * cgroup_v2_mode flag is set. -+ */ -+static inline bool is_in_v2_mode(void) -+{ -+ return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) || -+ (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE); -+} -+ - /* - * This is ugly, but preserves the userspace API for existing cpuset - * users. If someone tries to mount the "cpuset" filesystem, we -@@ -489,8 +499,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) - - /* On legacy hiearchy, we must be a subset of our parent cpuset. */ - ret = -EACCES; -- if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && -- !is_cpuset_subset(trial, par)) -+ if (!is_in_v2_mode() && !is_cpuset_subset(trial, par)) - goto out; - - /* -@@ -896,8 +905,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) - * If it becomes empty, inherit the effective mask of the - * parent, which is guaranteed to have some CPUs. - */ -- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && -- cpumask_empty(new_cpus)) -+ if (is_in_v2_mode() && cpumask_empty(new_cpus)) - cpumask_copy(new_cpus, parent->effective_cpus); - - /* Skip the whole subtree if the cpumask remains the same. */ -@@ -914,7 +922,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) - cpumask_copy(cp->effective_cpus, new_cpus); - spin_unlock_irq(&callback_lock); - -- WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && -+ WARN_ON(!is_in_v2_mode() && - !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); - - update_tasks_cpumask(cp); -@@ -1150,8 +1158,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) - * If it becomes empty, inherit the effective mask of the - * parent, which is guaranteed to have some MEMs. - */ -- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && -- nodes_empty(*new_mems)) -+ if (is_in_v2_mode() && nodes_empty(*new_mems)) - *new_mems = parent->effective_mems; - - /* Skip the whole subtree if the nodemask remains the same. */ -@@ -1168,7 +1175,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) - cp->effective_mems = *new_mems; - spin_unlock_irq(&callback_lock); - -- WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && -+ WARN_ON(!is_in_v2_mode() && - !nodes_equal(cp->mems_allowed, cp->effective_mems)); - - update_tasks_nodemask(cp); -@@ -1460,7 +1467,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) - - /* allow moving tasks into an empty cpuset if on default hierarchy */ - ret = -ENOSPC; -- if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && -+ if (!is_in_v2_mode() && - (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) - goto out_unlock; - -@@ -1979,7 +1986,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) - cpuset_inc(); - - spin_lock_irq(&callback_lock); -- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { -+ if (is_in_v2_mode()) { - cpumask_copy(cs->effective_cpus, parent->effective_cpus); - cs->effective_mems = parent->effective_mems; - } -@@ -2056,7 +2063,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) - mutex_lock(&cpuset_mutex); - spin_lock_irq(&callback_lock); - -- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { -+ if (is_in_v2_mode()) { - cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); - top_cpuset.mems_allowed = node_possible_map; - } else { -@@ -2250,7 +2257,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs) - cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); - mems_updated = !nodes_equal(new_mems, cs->effective_mems); - -- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) -+ if (is_in_v2_mode()) - hotplug_update_tasks(cs, &new_cpus, &new_mems, - cpus_updated, mems_updated); - else -@@ -2288,7 +2295,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) - static cpumask_t new_cpus; - static nodemask_t new_mems; - bool cpus_updated, mems_updated; -- bool on_dfl = cgroup_subsys_on_dfl(cpuset_cgrp_subsys); -+ bool on_dfl = is_in_v2_mode(); - - mutex_lock(&cpuset_mutex); - --- -2.14.2 - diff --git a/patches/kernel/0026-sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch b/patches/kernel/0006-sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch similarity index 98% rename from patches/kernel/0026-sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch rename to patches/kernel/0006-sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch index 58087ed..ea351e7 100644 --- a/patches/kernel/0026-sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch +++ b/patches/kernel/0006-sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch @@ -27,7 +27,7 @@ Signed-off-by: Fabian Grünbichler 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c -index a4b6ffb61495..c5a5ad8ac00f 100644 +index 5d4c15bf66d2..e35d4f73d2df 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -326,8 +326,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, diff --git a/patches/kernel/0007-KVM-x86-fix-APIC-page-invalidation.patch b/patches/kernel/0007-KVM-x86-fix-APIC-page-invalidation.patch deleted file mode 100644 index 1e693f9..0000000 --- a/patches/kernel/0007-KVM-x86-fix-APIC-page-invalidation.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= -Date: Thu, 30 Nov 2017 19:05:45 +0100 -Subject: [PATCH] KVM: x86: fix APIC page invalidation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Implementation of the unpinned APIC page didn't update the VMCS address -cache when invalidation was done through range mmu notifiers. -This became a problem when the page notifier was removed. - -Re-introduce the arch-specific helper and call it from ...range_start. - -Fixes: 38b9917350cb ("kvm: vmx: Implement set_apic_access_page_addr") -Fixes: 369ea8242c0f ("mm/rmap: update to new mmu_notifier semantic v2") -Signed-off-by: Radim Krčmář -Signed-off-by: Fabian Grünbichler ---- - arch/x86/include/asm/kvm_host.h | 3 +++ - arch/x86/kvm/x86.c | 14 ++++++++++++++ - virt/kvm/kvm_main.c | 8 ++++++++ - 3 files changed, 25 insertions(+) - -diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h -index 78ec3cda9429..1953c0a5b972 100644 ---- a/arch/x86/include/asm/kvm_host.h -+++ b/arch/x86/include/asm/kvm_host.h -@@ -1439,4 +1439,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) - #endif - } - -+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, -+ unsigned long start, unsigned long end); -+ - #endif /* _ASM_X86_KVM_HOST_H */ -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index f896c441fc2c..eae4aecf3cfe 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -6711,6 +6711,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) - kvm_x86_ops->tlb_flush(vcpu); - } - -+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, -+ unsigned long start, unsigned long end) -+{ -+ unsigned long apic_address; -+ -+ /* -+ * The physical address of apic access page is stored in the VMCS. -+ * Update it when it becomes invalid. -+ */ -+ apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); -+ if (start <= apic_address && apic_address < end) -+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); -+} -+ - void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) - { - struct page *page = NULL; -diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index bfa9c4d34102..d0085c9d6297 100644 ---- a/virt/kvm/kvm_main.c -+++ b/virt/kvm/kvm_main.c -@@ -136,6 +136,11 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); - static unsigned long long kvm_createvm_count; - static unsigned long long kvm_active_vms; - -+__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, -+ unsigned long start, unsigned long end) -+{ -+} -+ - bool kvm_is_reserved_pfn(kvm_pfn_t pfn) - { - if (pfn_valid(pfn)) -@@ -361,6 +366,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, - kvm_flush_remote_tlbs(kvm); - - spin_unlock(&kvm->mmu_lock); -+ -+ kvm_arch_mmu_notifier_invalidate_range(kvm, start, end); -+ - srcu_read_unlock(&kvm->srcu, idx); - } - --- -2.14.2 - diff --git a/patches/kernel/0028-ocfs2-make-metadata-estimation-accurate-and-clear.patch b/patches/kernel/0007-ocfs2-make-metadata-estimation-accurate-and-clear.patch similarity index 89% rename from patches/kernel/0028-ocfs2-make-metadata-estimation-accurate-and-clear.patch rename to patches/kernel/0007-ocfs2-make-metadata-estimation-accurate-and-clear.patch index b3cabcf..60e1f42 100644 --- a/patches/kernel/0028-ocfs2-make-metadata-estimation-accurate-and-clear.patch +++ b/patches/kernel/0007-ocfs2-make-metadata-estimation-accurate-and-clear.patch @@ -28,10 +28,10 @@ Signed-off-by: Fabian Grünbichler 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c -index 88a31e9340a0..77ec9b495027 100644 +index d1516327b787..256986aca8df 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c -@@ -784,6 +784,7 @@ struct ocfs2_write_ctxt { +@@ -797,6 +797,7 @@ struct ocfs2_write_ctxt { struct ocfs2_cached_dealloc_ctxt w_dealloc; struct list_head w_unwritten_list; @@ -39,7 +39,7 @@ index 88a31e9340a0..77ec9b495027 100644 }; void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) -@@ -1373,6 +1374,7 @@ static int ocfs2_unwritten_check(struct inode *inode, +@@ -1386,6 +1387,7 @@ static int ocfs2_unwritten_check(struct inode *inode, desc->c_clear_unwritten = 0; list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list); list_add_tail(&new->ue_node, &wc->w_unwritten_list); @@ -47,7 +47,7 @@ index 88a31e9340a0..77ec9b495027 100644 new = NULL; unlock: spin_unlock(&oi->ip_lock); -@@ -2246,7 +2248,7 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock, +@@ -2256,7 +2258,7 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, ue->ue_phys = desc->c_phys; list_splice_tail_init(&wc->w_unwritten_list, &dwc->dw_zero_list); diff --git a/patches/kernel/0029-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch b/patches/kernel/0008-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch similarity index 95% rename from patches/kernel/0029-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch rename to patches/kernel/0008-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch index 15e3488..b2bba0b 100644 --- a/patches/kernel/0029-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch +++ b/patches/kernel/0008-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch @@ -72,7 +72,7 @@ Signed-off-by: Fabian Grünbichler 3 files changed, 203 insertions(+), 10 deletions(-) diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h -index 4a5152ec88a3..571692171dd1 100644 +index 27b75cf32cfa..250bcacdf9e9 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -61,6 +61,7 @@ struct ocfs2_extent_tree { @@ -84,7 +84,7 @@ index 4a5152ec88a3..571692171dd1 100644 /* diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c -index 386aecce881d..9b5e7d8ba710 100644 +index ab5105f9767e..2f2c76193f54 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -165,6 +165,13 @@ static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et, @@ -109,7 +109,7 @@ index 386aecce881d..9b5e7d8ba710 100644 et->et_ops->eo_fill_root_el(et); if (!et->et_ops->eo_fill_max_leaf_clusters) -@@ -1159,7 +1167,7 @@ static int ocfs2_add_branch(handle_t *handle, +@@ -1158,7 +1166,7 @@ static int ocfs2_add_branch(handle_t *handle, struct buffer_head **last_eb_bh, struct ocfs2_alloc_context *meta_ac) { @@ -118,7 +118,7 @@ index 386aecce881d..9b5e7d8ba710 100644 u64 next_blkno, new_last_eb_blk; struct buffer_head *bh; struct buffer_head **new_eb_bhs = NULL; -@@ -1214,11 +1222,31 @@ static int ocfs2_add_branch(handle_t *handle, +@@ -1213,11 +1221,31 @@ static int ocfs2_add_branch(handle_t *handle, goto bail; } @@ -155,7 +155,7 @@ index 386aecce881d..9b5e7d8ba710 100644 } /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be -@@ -1341,15 +1369,25 @@ static int ocfs2_shift_tree_depth(handle_t *handle, +@@ -1340,15 +1368,25 @@ static int ocfs2_shift_tree_depth(handle_t *handle, struct ocfs2_alloc_context *meta_ac, struct buffer_head **ret_new_eb_bh) { @@ -184,7 +184,7 @@ index 386aecce881d..9b5e7d8ba710 100644 if (status < 0) { mlog_errno(status); goto bail; -@@ -1512,7 +1550,7 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et, +@@ -1511,7 +1549,7 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et, int depth = le16_to_cpu(el->l_tree_depth); struct buffer_head *bh = NULL; @@ -193,7 +193,7 @@ index 386aecce881d..9b5e7d8ba710 100644 shift = ocfs2_find_branch_target(et, &bh); if (shift < 0) { -@@ -6593,6 +6631,154 @@ ocfs2_find_per_slot_free_list(int type, +@@ -6585,6 +6623,154 @@ ocfs2_find_per_slot_free_list(int type, return fl; } @@ -349,10 +349,10 @@ index 386aecce881d..9b5e7d8ba710 100644 int type, int slot, u64 suballoc, u64 blkno, unsigned int bit) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c -index 77ec9b495027..2ff02dda97d8 100644 +index 256986aca8df..e8e205bf2e41 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c -@@ -2322,6 +2322,12 @@ static int ocfs2_dio_end_io_write(struct inode *inode, +@@ -2332,6 +2332,12 @@ static int ocfs2_dio_end_io_write(struct inode *inode, ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); diff --git a/patches/kernel/0008-vhost-fix-skb-leak-in-handle_rx.patch b/patches/kernel/0008-vhost-fix-skb-leak-in-handle_rx.patch deleted file mode 100644 index 2176dbc..0000000 --- a/patches/kernel/0008-vhost-fix-skb-leak-in-handle_rx.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wei Xu -Date: Fri, 1 Dec 2017 05:10:36 -0500 -Subject: [PATCH] vhost: fix skb leak in handle_rx() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Matthew found a roughly 40% tcp throughput regression with commit -c67df11f(vhost_net: try batch dequing from skb array) as discussed -in the following thread: -https://www.mail-archive.com/netdev@vger.kernel.org/msg187936.html - -Eventually we figured out that it was a skb leak in handle_rx() -when sending packets to the VM. This usually happens when a guest -can not drain out vq as fast as vhost fills in, afterwards it sets -off the traffic jam and leaks skb(s) which occurs as no headcount -to send on the vq from vhost side. - -This can be avoided by making sure we have got enough headcount -before actually consuming a skb from the batched rx array while -transmitting, which is simply done by moving checking the zero -headcount a bit ahead. - -Signed-off-by: Wei Xu -Reported-by: Matthew Rosato -Signed-off-by: Fabian Grünbichler ---- - drivers/vhost/net.c | 20 ++++++++++---------- - 1 file changed, 10 insertions(+), 10 deletions(-) - -diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c -index 1c75572f5a3f..010253847022 100644 ---- a/drivers/vhost/net.c -+++ b/drivers/vhost/net.c -@@ -781,16 +781,6 @@ static void handle_rx(struct vhost_net *net) - /* On error, stop handling until the next kick. */ - if (unlikely(headcount < 0)) - goto out; -- if (nvq->rx_array) -- msg.msg_control = vhost_net_buf_consume(&nvq->rxq); -- /* On overrun, truncate and discard */ -- if (unlikely(headcount > UIO_MAXIOV)) { -- iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); -- err = sock->ops->recvmsg(sock, &msg, -- 1, MSG_DONTWAIT | MSG_TRUNC); -- pr_debug("Discarded rx packet: len %zd\n", sock_len); -- continue; -- } - /* OK, now we need to know about added descriptors. */ - if (!headcount) { - if (unlikely(vhost_enable_notify(&net->dev, vq))) { -@@ -803,6 +793,16 @@ static void handle_rx(struct vhost_net *net) - * they refilled. */ - goto out; - } -+ if (nvq->rx_array) -+ msg.msg_control = vhost_net_buf_consume(&nvq->rxq); -+ /* On overrun, truncate and discard */ -+ if (unlikely(headcount > UIO_MAXIOV)) { -+ iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); -+ err = sock->ops->recvmsg(sock, &msg, -+ 1, MSG_DONTWAIT | MSG_TRUNC); -+ pr_debug("Discarded rx packet: len %zd\n", sock_len); -+ continue; -+ } - /* We don't need to be notified again. */ - iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len); - fixup = msg.msg_iter; --- -2.14.2 - diff --git a/patches/kernel/0009-tun-free-skb-in-early-errors.patch b/patches/kernel/0009-tun-free-skb-in-early-errors.patch deleted file mode 100644 index 0185bbd..0000000 --- a/patches/kernel/0009-tun-free-skb-in-early-errors.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wei Xu -Date: Fri, 1 Dec 2017 05:10:37 -0500 -Subject: [PATCH] tun: free skb in early errors -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -tun_recvmsg() supports accepting skb by msg_control after -commit ac77cfd4258f ("tun: support receiving skb through msg_control"), -the skb if presented should be freed no matter how far it can go -along, otherwise it would be leaked. - -This patch fixes several missed cases. - -Signed-off-by: Wei Xu -Reported-by: Matthew Rosato -Signed-off-by: Fabian Grünbichler ---- - drivers/net/tun.c | 24 ++++++++++++++++++------ - 1 file changed, 18 insertions(+), 6 deletions(-) - -diff --git a/drivers/net/tun.c b/drivers/net/tun.c -index d1cb1ff83251..d58ae8ad0a4e 100644 ---- a/drivers/net/tun.c -+++ b/drivers/net/tun.c -@@ -1519,8 +1519,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, - - tun_debug(KERN_INFO, tun, "tun_do_read\n"); - -- if (!iov_iter_count(to)) -+ if (!iov_iter_count(to)) { -+ if (skb) -+ kfree_skb(skb); - return 0; -+ } - - if (!skb) { - /* Read frames from ring */ -@@ -1636,22 +1639,24 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, - { - struct tun_file *tfile = container_of(sock, struct tun_file, socket); - struct tun_struct *tun = __tun_get(tfile); -+ struct sk_buff *skb = m->msg_control; - int ret; - -- if (!tun) -- return -EBADFD; -+ if (!tun) { -+ ret = -EBADFD; -+ goto out_free_skb; -+ } - - if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) { - ret = -EINVAL; -- goto out; -+ goto out_put_tun; - } - if (flags & MSG_ERRQUEUE) { - ret = sock_recv_errqueue(sock->sk, m, total_len, - SOL_PACKET, TUN_TX_TIMESTAMP); - goto out; - } -- ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, -- m->msg_control); -+ ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb); - if (ret > (ssize_t)total_len) { - m->msg_flags |= MSG_TRUNC; - ret = flags & MSG_TRUNC ? ret : total_len; -@@ -1659,6 +1664,13 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, - out: - tun_put(tun); - return ret; -+ -+out_put_tun: -+ tun_put(tun); -+out_free_skb: -+ if (skb) -+ kfree_skb(skb); -+ return ret; - } - - static int tun_peek_len(struct socket *sock) --- -2.14.2 - diff --git a/patches/kernel/0010-tap-free-skb-if-flags-error.patch b/patches/kernel/0010-tap-free-skb-if-flags-error.patch deleted file mode 100644 index 87f6502..0000000 --- a/patches/kernel/0010-tap-free-skb-if-flags-error.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wei Xu -Date: Fri, 1 Dec 2017 05:10:38 -0500 -Subject: [PATCH] tap: free skb if flags error -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -tap_recvmsg() supports accepting skb by msg_control after -commit 3b4ba04acca8 ("tap: support receiving skb from msg_control"), -the skb if presented should be freed within the function, otherwise -it would be leaked. - -Signed-off-by: Wei Xu -Reported-by: Matthew Rosato -Signed-off-by: Fabian Grünbichler ---- - drivers/net/tap.c | 14 ++++++++++---- - 1 file changed, 10 insertions(+), 4 deletions(-) - -diff --git a/drivers/net/tap.c b/drivers/net/tap.c -index 7a2f6bebfd15..96e5e5b2ae39 100644 ---- a/drivers/net/tap.c -+++ b/drivers/net/tap.c -@@ -829,8 +829,11 @@ static ssize_t tap_do_read(struct tap_queue *q, - DEFINE_WAIT(wait); - ssize_t ret = 0; - -- if (!iov_iter_count(to)) -+ if (!iov_iter_count(to)) { -+ if (skb) -+ kfree_skb(skb); - return 0; -+ } - - if (skb) - goto put; -@@ -1157,11 +1160,14 @@ static int tap_recvmsg(struct socket *sock, struct msghdr *m, - size_t total_len, int flags) - { - struct tap_queue *q = container_of(sock, struct tap_queue, sock); -+ struct sk_buff *skb = m->msg_control; - int ret; -- if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) -+ if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) { -+ if (skb) -+ kfree_skb(skb); - return -EINVAL; -- ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, -- m->msg_control); -+ } -+ ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb); - if (ret > total_len) { - m->msg_flags |= MSG_TRUNC; - ret = flags & MSG_TRUNC ? ret : total_len; --- -2.14.2 - diff --git a/patches/kernel/0011-IB-core-Avoid-crash-on-pkey-enforcement-failed-in-re.patch b/patches/kernel/0011-IB-core-Avoid-crash-on-pkey-enforcement-failed-in-re.patch deleted file mode 100644 index f61fa4b..0000000 --- a/patches/kernel/0011-IB-core-Avoid-crash-on-pkey-enforcement-failed-in-re.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Parav Pandit -Date: Fri, 5 Jan 2018 23:51:12 +0100 -Subject: [PATCH] IB/core: Avoid crash on pkey enforcement failed in received - MADs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 89548bcafec7ecfeea58c553f0834b5d575a66eb upstream. - -Below kernel crash is observed when Pkey security enforcement fails on -received MADs. This issue is reported in [1]. - -ib_free_recv_mad() accesses the rmpp_list, whose initialization is -needed before accessing it. -When security enformcent fails on received MADs, MAD processing avoided -due to security checks failed. - -OpenSM[3770]: SM port is down -kernel: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 -kernel: IP: ib_free_recv_mad+0x44/0xa0 [ib_core] -kernel: PGD 0 -kernel: P4D 0 -kernel: -kernel: Oops: 0002 [#1] SMP -kernel: CPU: 0 PID: 2833 Comm: kworker/0:1H Tainted: P IO 4.13.4-1-pve #1 -kernel: Hardware name: Dell XS23-TY3 /9CMP63, BIOS 1.71 09/17/2013 -kernel: Workqueue: ib-comp-wq ib_cq_poll_work [ib_core] -kernel: task: ffffa069c6541600 task.stack: ffffb9a729054000 -kernel: RIP: 0010:ib_free_recv_mad+0x44/0xa0 [ib_core] -kernel: RSP: 0018:ffffb9a729057d38 EFLAGS: 00010286 -kernel: RAX: ffffa069cb138a48 RBX: ffffa069cb138a10 RCX: 0000000000000000 -kernel: RDX: ffffb9a729057d38 RSI: 0000000000000000 RDI: ffffa069cb138a20 -kernel: RBP: ffffb9a729057d60 R08: ffffa072d2d49800 R09: ffffa069cb138ae0 -kernel: R10: ffffa069cb138ae0 R11: ffffa072b3994e00 R12: ffffb9a729057d38 -kernel: R13: ffffa069d1c90000 R14: 0000000000000000 R15: ffffa069d1c90880 -kernel: FS: 0000000000000000(0000) GS:ffffa069dba00000(0000) knlGS:0000000000000000 -kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 -kernel: CR2: 0000000000000008 CR3: 00000011f51f2000 CR4: 00000000000006f0 -kernel: Call Trace: -kernel: ib_mad_recv_done+0x5cc/0xb50 [ib_core] -kernel: __ib_process_cq+0x5c/0xb0 [ib_core] -kernel: ib_cq_poll_work+0x20/0x60 [ib_core] -kernel: process_one_work+0x1e9/0x410 -kernel: worker_thread+0x4b/0x410 -kernel: kthread+0x109/0x140 -kernel: ? process_one_work+0x410/0x410 -kernel: ? kthread_create_on_node+0x70/0x70 -kernel: ? SyS_exit_group+0x14/0x20 -kernel: ret_from_fork+0x25/0x30 -kernel: RIP: ib_free_recv_mad+0x44/0xa0 [ib_core] RSP: ffffb9a729057d38 -kernel: CR2: 0000000000000008 - -[1] : https://www.spinics.net/lists/linux-rdma/msg56190.html - -Fixes: 47a2b338fe63 ("IB/core: Enforce security on management datagrams") -Signed-off-by: Parav Pandit -Reported-by: Chris Blake -Reviewed-by: Daniel Jurgens -Reviewed-by: Hal Rosenstock -Signed-off-by: Doug Ledford -Signed-off-by: Greg Kroah-Hartman - -Signed-off-by: Fabian Grünbichler ---- - drivers/infiniband/core/mad.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c -index f8f53bb90837..cb91245e9163 100644 ---- a/drivers/infiniband/core/mad.c -+++ b/drivers/infiniband/core/mad.c -@@ -1974,14 +1974,15 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, - unsigned long flags; - int ret; - -+ INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); - ret = ib_mad_enforce_security(mad_agent_priv, - mad_recv_wc->wc->pkey_index); - if (ret) { - ib_free_recv_mad(mad_recv_wc); - deref_mad_agent(mad_agent_priv); -+ return; - } - -- INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); - list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); - if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { - mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, --- -2.14.2 - diff --git a/patches/kernel/0012-IB-core-Don-t-enforce-PKey-security-on-SMI-MADs.patch b/patches/kernel/0012-IB-core-Don-t-enforce-PKey-security-on-SMI-MADs.patch deleted file mode 100644 index 10cb488..0000000 --- a/patches/kernel/0012-IB-core-Don-t-enforce-PKey-security-on-SMI-MADs.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Daniel Jurgens -Date: Mon, 20 Nov 2017 16:47:45 -0600 -Subject: [PATCH] IB/core: Don't enforce PKey security on SMI MADs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Per the infiniband spec an SMI MAD can have any PKey. Checking the pkey -on SMI MADs is not necessary, and it seems that some older adapters -using the mthca driver don't follow the convention of using the default -PKey, resulting in false denials, or errors querying the PKey cache. - -SMI MAD security is still enforced, only agents allowed to manage the -subnet are able to receive or send SMI MADs. - -Reported-by: Chris Blake -Fixes: 47a2b338fe63("IB/core: Enforce security on management datagrams") -Signed-off-by: Daniel Jurgens -Reviewed-by: Parav Pandit -Signed-off-by: Leon Romanovsky -Signed-off-by: Fabian Grünbichler ---- - drivers/infiniband/core/security.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c -index 70ad19c4c73e..8f9fd3b757db 100644 ---- a/drivers/infiniband/core/security.c -+++ b/drivers/infiniband/core/security.c -@@ -692,8 +692,11 @@ int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index) - { - int ret; - -- if (map->agent.qp->qp_type == IB_QPT_SMI && !map->agent.smp_allowed) -- return -EACCES; -+ if (map->agent.qp->qp_type == IB_QPT_SMI) { -+ if (!map->agent.smp_allowed) -+ return -EACCES; -+ return 0; -+ } - - ret = ib_security_pkey_access(map->agent.device, - map->agent.port_num, --- -2.14.2 - diff --git a/patches/kernel/0013-kvm-vmx-Reinstate-support-for-CPUs-without-virtual-N.patch b/patches/kernel/0013-kvm-vmx-Reinstate-support-for-CPUs-without-virtual-N.patch deleted file mode 100644 index 54a3fdb..0000000 --- a/patches/kernel/0013-kvm-vmx-Reinstate-support-for-CPUs-without-virtual-N.patch +++ /dev/null @@ -1,299 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 6 Nov 2017 13:31:12 +0100 -Subject: [PATCH] kvm: vmx: Reinstate support for CPUs without virtual NMI -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 8a1b43922d0d1279e7936ba85c4c2a870403c95f upstream. - -This is more or less a revert of commit 2c82878b0cb3 ("KVM: VMX: require -virtual NMI support", 2017-03-27); it turns out that Core 2 Duo machines -only had virtual NMIs in some SKUs. - -The revert is not trivial because in the meanwhile there have been several -fixes to nested NMI injection. Therefore, the entire vNMI state is moved -to struct loaded_vmcs. - -Another change compared to before the patch is a simplification here: - - if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked && - !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis( - get_vmcs12(vcpu))))) { - -The final condition here is always true (because nested_cpu_has_virtual_nmis -is always false) and is removed. - -Fixes: 2c82878b0cb38fd516fd612c67852a6bbf282003 -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1490803 -Signed-off-by: Paolo Bonzini -Signed-off-by: Radim Krčmář -Signed-off-by: Greg Kroah-Hartman -Signed-off-by: Fabian Grünbichler ---- - arch/x86/kvm/vmx.c | 150 +++++++++++++++++++++++++++++++++++++---------------- - 1 file changed, 106 insertions(+), 44 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 5edf05ce45de..146caacd8fdd 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -204,6 +204,10 @@ struct loaded_vmcs { - bool nmi_known_unmasked; - unsigned long vmcs_host_cr3; /* May not match real cr3 */ - unsigned long vmcs_host_cr4; /* May not match real cr4 */ -+ /* Support for vnmi-less CPUs */ -+ int soft_vnmi_blocked; -+ ktime_t entry_time; -+ s64 vnmi_blocked_time; - struct list_head loaded_vmcss_on_cpu_link; - }; - -@@ -1290,6 +1294,11 @@ static inline bool cpu_has_vmx_invpcid(void) - SECONDARY_EXEC_ENABLE_INVPCID; - } - -+static inline bool cpu_has_virtual_nmis(void) -+{ -+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; -+} -+ - static inline bool cpu_has_vmx_wbinvd_exit(void) - { - return vmcs_config.cpu_based_2nd_exec_ctrl & -@@ -1341,11 +1350,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit) - (vmcs12->secondary_vm_exec_control & bit); - } - --static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) --{ -- return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; --} -- - static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12) - { - return vmcs12->pin_based_vm_exec_control & -@@ -3687,9 +3691,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) - &_vmexit_control) < 0) - return -EIO; - -- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | -- PIN_BASED_VIRTUAL_NMIS; -- opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER; -+ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; -+ opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | -+ PIN_BASED_VMX_PREEMPTION_TIMER; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, - &_pin_based_exec_control) < 0) - return -EIO; -@@ -5549,7 +5553,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) - - static void enable_nmi_window(struct kvm_vcpu *vcpu) - { -- if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { -+ if (!cpu_has_virtual_nmis() || -+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { - enable_irq_window(vcpu); - return; - } -@@ -5589,6 +5594,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) - { - struct vcpu_vmx *vmx = to_vmx(vcpu); - -+ if (!cpu_has_virtual_nmis()) { -+ /* -+ * Tracking the NMI-blocked state in software is built upon -+ * finding the next open IRQ window. This, in turn, depends on -+ * well-behaving guests: They have to keep IRQs disabled at -+ * least as long as the NMI handler runs. Otherwise we may -+ * cause NMI nesting, maybe breaking the guest. But as this is -+ * highly unlikely, we can live with the residual risk. -+ */ -+ vmx->loaded_vmcs->soft_vnmi_blocked = 1; -+ vmx->loaded_vmcs->vnmi_blocked_time = 0; -+ } -+ - ++vcpu->stat.nmi_injections; - vmx->loaded_vmcs->nmi_known_unmasked = false; - -@@ -5607,6 +5625,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) - struct vcpu_vmx *vmx = to_vmx(vcpu); - bool masked; - -+ if (!cpu_has_virtual_nmis()) -+ return vmx->loaded_vmcs->soft_vnmi_blocked; - if (vmx->loaded_vmcs->nmi_known_unmasked) - return false; - masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; -@@ -5618,13 +5638,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) - { - struct vcpu_vmx *vmx = to_vmx(vcpu); - -- vmx->loaded_vmcs->nmi_known_unmasked = !masked; -- if (masked) -- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, -- GUEST_INTR_STATE_NMI); -- else -- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, -- GUEST_INTR_STATE_NMI); -+ if (!cpu_has_virtual_nmis()) { -+ if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) { -+ vmx->loaded_vmcs->soft_vnmi_blocked = masked; -+ vmx->loaded_vmcs->vnmi_blocked_time = 0; -+ } -+ } else { -+ vmx->loaded_vmcs->nmi_known_unmasked = !masked; -+ if (masked) -+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, -+ GUEST_INTR_STATE_NMI); -+ else -+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, -+ GUEST_INTR_STATE_NMI); -+ } - } - - static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) -@@ -5632,6 +5659,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) - if (to_vmx(vcpu)->nested.nested_run_pending) - return 0; - -+ if (!cpu_has_virtual_nmis() && -+ to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) -+ return 0; -+ - return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI - | GUEST_INTR_STATE_NMI)); -@@ -6360,6 +6391,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) - * AAK134, BY25. - */ - if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && -+ cpu_has_virtual_nmis() && - (exit_qualification & INTR_INFO_UNBLOCK_NMI)) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); - -@@ -6834,7 +6866,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) - } - - /* Create a new VMCS */ -- item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); -+ item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL); - if (!item) - return NULL; - item->vmcs02.vmcs = alloc_vmcs(); -@@ -7851,6 +7883,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) - * "blocked by NMI" bit has to be set before next VM entry. - */ - if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && -+ cpu_has_virtual_nmis() && - (exit_qualification & INTR_INFO_UNBLOCK_NMI)) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); -@@ -8568,6 +8601,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) - return 0; - } - -+ if (unlikely(!cpu_has_virtual_nmis() && -+ vmx->loaded_vmcs->soft_vnmi_blocked)) { -+ if (vmx_interrupt_allowed(vcpu)) { -+ vmx->loaded_vmcs->soft_vnmi_blocked = 0; -+ } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && -+ vcpu->arch.nmi_pending) { -+ /* -+ * This CPU don't support us in finding the end of an -+ * NMI-blocked window if the guest runs with IRQs -+ * disabled. So we pull the trigger after 1 s of -+ * futile waiting, but inform the user about this. -+ */ -+ printk(KERN_WARNING "%s: Breaking out of NMI-blocked " -+ "state on VCPU %d after 1 s timeout\n", -+ __func__, vcpu->vcpu_id); -+ vmx->loaded_vmcs->soft_vnmi_blocked = 0; -+ } -+ } -+ - if (exit_reason < kvm_vmx_max_exit_handlers - && kvm_vmx_exit_handlers[exit_reason]) - return kvm_vmx_exit_handlers[exit_reason](vcpu); -@@ -8850,33 +8902,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) - - idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; - -- if (vmx->loaded_vmcs->nmi_known_unmasked) -- return; -- /* -- * Can't use vmx->exit_intr_info since we're not sure what -- * the exit reason is. -- */ -- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); -- unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; -- vector = exit_intr_info & INTR_INFO_VECTOR_MASK; -- /* -- * SDM 3: 27.7.1.2 (September 2008) -- * Re-set bit "block by NMI" before VM entry if vmexit caused by -- * a guest IRET fault. -- * SDM 3: 23.2.2 (September 2008) -- * Bit 12 is undefined in any of the following cases: -- * If the VM exit sets the valid bit in the IDT-vectoring -- * information field. -- * If the VM exit is due to a double fault. -- */ -- if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && -- vector != DF_VECTOR && !idtv_info_valid) -- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, -- GUEST_INTR_STATE_NMI); -- else -- vmx->loaded_vmcs->nmi_known_unmasked = -- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) -- & GUEST_INTR_STATE_NMI); -+ if (cpu_has_virtual_nmis()) { -+ if (vmx->loaded_vmcs->nmi_known_unmasked) -+ return; -+ /* -+ * Can't use vmx->exit_intr_info since we're not sure what -+ * the exit reason is. -+ */ -+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); -+ unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; -+ vector = exit_intr_info & INTR_INFO_VECTOR_MASK; -+ /* -+ * SDM 3: 27.7.1.2 (September 2008) -+ * Re-set bit "block by NMI" before VM entry if vmexit caused by -+ * a guest IRET fault. -+ * SDM 3: 23.2.2 (September 2008) -+ * Bit 12 is undefined in any of the following cases: -+ * If the VM exit sets the valid bit in the IDT-vectoring -+ * information field. -+ * If the VM exit is due to a double fault. -+ */ -+ if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && -+ vector != DF_VECTOR && !idtv_info_valid) -+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, -+ GUEST_INTR_STATE_NMI); -+ else -+ vmx->loaded_vmcs->nmi_known_unmasked = -+ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) -+ & GUEST_INTR_STATE_NMI); -+ } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked)) -+ vmx->loaded_vmcs->vnmi_blocked_time += -+ ktime_to_ns(ktime_sub(ktime_get(), -+ vmx->loaded_vmcs->entry_time)); - } - - static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, -@@ -8993,6 +9050,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long debugctlmsr, cr3, cr4; - -+ /* Record the guest's net vcpu time for enforced NMI injections. */ -+ if (unlikely(!cpu_has_virtual_nmis() && -+ vmx->loaded_vmcs->soft_vnmi_blocked)) -+ vmx->loaded_vmcs->entry_time = ktime_get(); -+ - /* Don't enter VMX if guest state is invalid, let the exit handler - start emulation until we arrive back to a valid state */ - if (vmx->emulation_required) --- -2.14.2 - diff --git a/patches/kernel/0014-KVM-SVM-obey-guest-PAT.patch b/patches/kernel/0014-KVM-SVM-obey-guest-PAT.patch deleted file mode 100644 index cc64a62..0000000 --- a/patches/kernel/0014-KVM-SVM-obey-guest-PAT.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Thu, 26 Oct 2017 09:13:27 +0200 -Subject: [PATCH] KVM: SVM: obey guest PAT -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -For many years some users of assigned devices have reported worse -performance on AMD processors with NPT than on AMD without NPT, -Intel or bare metal. - -The reason turned out to be that SVM is discarding the guest PAT -setting and uses the default (PA0=PA4=WB, PA1=PA5=WT, PA2=PA6=UC-, -PA3=UC). The guest might be using a different setting, and -especially might want write combining but isn't getting it -(instead getting slow UC or UC- accesses). - -Thanks a lot to geoff@hostfission.com for noticing the relation -to the g_pat setting. The patch has been tested also by a bunch -of people on VFIO users forums. - -Fixes: 709ddebf81cb40e3c36c6109a7892e8b93a09464 -Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=196409 -Cc: stable@vger.kernel.org -Signed-off-by: Paolo Bonzini -Reviewed-by: David Hildenbrand -Tested-by: Nick Sarnie -Signed-off-by: Radim Krčmář -(cherry picked from commit 15038e14724799b8c205beb5f20f9e54896013c3) -Signed-off-by: Fabian Grünbichler ---- - arch/x86/kvm/svm.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c -index a8c911fcd73f..e9d0f80fd83a 100644 ---- a/arch/x86/kvm/svm.c -+++ b/arch/x86/kvm/svm.c -@@ -3650,6 +3650,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) - u32 ecx = msr->index; - u64 data = msr->data; - switch (ecx) { -+ case MSR_IA32_CR_PAT: -+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) -+ return 1; -+ vcpu->arch.pat = data; -+ svm->vmcb->save.g_pat = data; -+ mark_dirty(svm->vmcb, VMCB_NPT); -+ break; - case MSR_IA32_TSC: - kvm_write_tsc(vcpu, msr); - break; --- -2.14.2 - diff --git a/patches/kernel/0015-net-sched-em_nbyte-don-t-add-the-data-offset-twice.patch b/patches/kernel/0015-net-sched-em_nbyte-don-t-add-the-data-offset-twice.patch deleted file mode 100644 index d0b118e..0000000 --- a/patches/kernel/0015-net-sched-em_nbyte-don-t-add-the-data-offset-twice.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Fri, 19 Jan 2018 11:12:37 +0100 -Subject: [PATCH] net: sched: em_nbyte: don't add the data offset twice -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -'ptr' is shifted by the offset and then validated, -the memcmp should not add it a second time. - -Signed-off-by: Wolfgang Bumiller -Signed-off-by: Fabian Grünbichler ---- - net/sched/em_nbyte.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c -index df3110d69585..07c10bac06a0 100644 ---- a/net/sched/em_nbyte.c -+++ b/net/sched/em_nbyte.c -@@ -51,7 +51,7 @@ static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em, - if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len)) - return 0; - -- return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len); -+ return !memcmp(ptr, nbyte->pattern, nbyte->hdr.len); - } - - static struct tcf_ematch_ops em_nbyte_ops = { --- -2.14.2 - diff --git a/patches/kernel/0016-net-sched-fix-TCF_LAYER_LINK-case-in-tcf_get_base_pt.patch b/patches/kernel/0016-net-sched-fix-TCF_LAYER_LINK-case-in-tcf_get_base_pt.patch deleted file mode 100644 index 416c277..0000000 --- a/patches/kernel/0016-net-sched-fix-TCF_LAYER_LINK-case-in-tcf_get_base_pt.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Fri, 19 Jan 2018 11:12:38 +0100 -Subject: [PATCH] net: sched: fix TCF_LAYER_LINK case in tcf_get_base_ptr -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -TCF_LAYER_LINK and TCF_LAYER_NETWORK returned the same pointer as -skb->data points to the network header. -Use skb_mac_header instead. - -Signed-off-by: Wolfgang Bumiller -Signed-off-by: Fabian Grünbichler ---- - include/net/pkt_cls.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h -index 537d0a0ad4c4..4450961b1554 100644 ---- a/include/net/pkt_cls.h -+++ b/include/net/pkt_cls.h -@@ -395,7 +395,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer) - { - switch (layer) { - case TCF_LAYER_LINK: -- return skb->data; -+ return skb_mac_header(skb); - case TCF_LAYER_NETWORK: - return skb_network_header(skb); - case TCF_LAYER_TRANSPORT: --- -2.14.2 - diff --git a/patches/kernel/0017-i40e-Fix-memory-leak-related-filter-programming-stat.patch b/patches/kernel/0017-i40e-Fix-memory-leak-related-filter-programming-stat.patch deleted file mode 100644 index e318a18..0000000 --- a/patches/kernel/0017-i40e-Fix-memory-leak-related-filter-programming-stat.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Alexander Duyck -Date: Wed, 4 Oct 2017 08:44:43 -0700 -Subject: [PATCH] i40e: Fix memory leak related filter programming status -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -It looks like we weren't correctly placing the pages from buffers that had -been used to return a filter programming status back on the ring. As a -result they were being overwritten and tracking of the pages was lost. - -This change works to correct that by incorporating part of -i40e_put_rx_buffer into the programming status handler code. As a result we -should now be correctly placing the pages for those buffers on the -re-allocation list instead of letting them stay in place. - -Fixes: 0e626ff7ccbf ("i40e: Fix support for flow director programming status") -Reported-by: Anders K. Pedersen -Signed-off-by: Alexander Duyck -Tested-by: Anders K Pedersen -Signed-off-by: Jeff Kirsher -(cherry picked from commit 2b9478ffc550f17c6cd8c69057234e91150f5972) -Signed-off-by: Fabian Grünbichler ---- - drivers/net/ethernet/intel/i40e/i40e_txrx.c | 63 ++++++++++++++++------------- - 1 file changed, 36 insertions(+), 27 deletions(-) - -diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c -index 2194960d5855..391b1878c24b 100644 ---- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c -+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c -@@ -1042,6 +1042,32 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) - return false; - } - -+/** -+ * i40e_reuse_rx_page - page flip buffer and store it back on the ring -+ * @rx_ring: rx descriptor ring to store buffers on -+ * @old_buff: donor buffer to have page reused -+ * -+ * Synchronizes page for reuse by the adapter -+ **/ -+static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, -+ struct i40e_rx_buffer *old_buff) -+{ -+ struct i40e_rx_buffer *new_buff; -+ u16 nta = rx_ring->next_to_alloc; -+ -+ new_buff = &rx_ring->rx_bi[nta]; -+ -+ /* update, and store next to alloc */ -+ nta++; -+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; -+ -+ /* transfer page from old buffer to new buffer */ -+ new_buff->dma = old_buff->dma; -+ new_buff->page = old_buff->page; -+ new_buff->page_offset = old_buff->page_offset; -+ new_buff->pagecnt_bias = old_buff->pagecnt_bias; -+} -+ - /** - * i40e_rx_is_programming_status - check for programming status descriptor - * @qw: qword representing status_error_len in CPU ordering -@@ -1076,15 +1102,24 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc, - u64 qw) - { -- u32 ntc = rx_ring->next_to_clean + 1; -+ struct i40e_rx_buffer *rx_buffer; -+ u32 ntc = rx_ring->next_to_clean; - u8 id; - - /* fetch, update, and store next to clean */ -+ rx_buffer = &rx_ring->rx_bi[ntc++]; - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; - - prefetch(I40E_RX_DESC(rx_ring, ntc)); - -+ /* place unused page back on the ring */ -+ i40e_reuse_rx_page(rx_ring, rx_buffer); -+ rx_ring->rx_stats.page_reuse_count++; -+ -+ /* clear contents of buffer_info */ -+ rx_buffer->page = NULL; -+ - id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >> - I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT; - -@@ -1643,32 +1678,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb, - return false; - } - --/** -- * i40e_reuse_rx_page - page flip buffer and store it back on the ring -- * @rx_ring: rx descriptor ring to store buffers on -- * @old_buff: donor buffer to have page reused -- * -- * Synchronizes page for reuse by the adapter -- **/ --static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, -- struct i40e_rx_buffer *old_buff) --{ -- struct i40e_rx_buffer *new_buff; -- u16 nta = rx_ring->next_to_alloc; -- -- new_buff = &rx_ring->rx_bi[nta]; -- -- /* update, and store next to alloc */ -- nta++; -- rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; -- -- /* transfer page from old buffer to new buffer */ -- new_buff->dma = old_buff->dma; -- new_buff->page = old_buff->page; -- new_buff->page_offset = old_buff->page_offset; -- new_buff->pagecnt_bias = old_buff->pagecnt_bias; --} -- - /** - * i40e_page_is_reusable - check if any reuse is possible - * @page: page struct to check --- -2.14.2 - diff --git a/patches/kernel/0018-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch b/patches/kernel/0018-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch deleted file mode 100644 index 8fc4603..0000000 --- a/patches/kernel/0018-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Andrew Honig -Date: Wed, 10 Jan 2018 10:12:03 -0800 -Subject: [PATCH] KVM: x86: Add memory barrier on vmcs field lookup -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 75f139aaf896d6fdeec2e468ddfa4b2fe469bf40 upstream. - -This adds a memory barrier when performing a lookup into -the vmcs_field_to_offset_table. This is related to -CVE-2017-5753. - -Signed-off-by: Andrew Honig -Reviewed-by: Jim Mattson -Signed-off-by: Paolo Bonzini -Signed-off-by: Greg Kroah-Hartman -Signed-off-by: Fabian Grünbichler ---- - arch/x86/kvm/vmx.c | 12 ++++++++++-- - 1 file changed, 10 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 146caacd8fdd..80732f87cac0 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -883,8 +883,16 @@ static inline short vmcs_field_to_offset(unsigned long field) - { - BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); - -- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || -- vmcs_field_to_offset_table[field] == 0) -+ if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) -+ return -ENOENT; -+ -+ /* -+ * FIXME: Mitigation for CVE-2017-5753. To be replaced with a -+ * generic mechanism. -+ */ -+ asm("lfence"); -+ -+ if (vmcs_field_to_offset_table[field] == 0) - return -ENOENT; - - return vmcs_field_to_offset_table[field]; --- -2.14.2 - diff --git a/patches/kernel/0019-EDAC-sb_edac-Don-t-create-a-second-memory-controller.patch b/patches/kernel/0019-EDAC-sb_edac-Don-t-create-a-second-memory-controller.patch deleted file mode 100644 index 4272010..0000000 --- a/patches/kernel/0019-EDAC-sb_edac-Don-t-create-a-second-memory-controller.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Qiuxu Zhuo -Date: Wed, 13 Sep 2017 18:42:14 +0800 -Subject: [PATCH] EDAC, sb_edac: Don't create a second memory controller if HA1 - is not present -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Yi Zhang reported the following failure on a 2-socket Haswell (E5-2603v3) -server (DELL PowerEdge 730xd): - - EDAC sbridge: Some needed devices are missing - EDAC MC: Removed device 0 for sb_edac.c Haswell SrcID#0_Ha#0: DEV 0000:7f:12.0 - EDAC MC: Removed device 1 for sb_edac.c Haswell SrcID#1_Ha#0: DEV 0000:ff:12.0 - EDAC sbridge: Couldn't find mci handler - EDAC sbridge: Couldn't find mci handler - EDAC sbridge: Failed to register device with error -19. - -The refactored sb_edac driver creates the IMC1 (the 2nd memory -controller) if any IMC1 device is present. In this case only -HA1_TA of IMC1 was present, but the driver expected to find -HA1/HA1_TM/HA1_TAD[0-3] devices too, leading to the above failure. - -The document [1] says the 'E5-2603 v3' CPU has 4 memory channels max. Yi -Zhang inserted one DIMM per channel for each CPU, and did random error -address injection test with this patch: - - 4024 addresses fell in TOLM hole area - 12715 addresses fell in CPU_SrcID#0_Ha#0_Chan#0_DIMM#0 - 12774 addresses fell in CPU_SrcID#0_Ha#0_Chan#1_DIMM#0 - 12798 addresses fell in CPU_SrcID#0_Ha#0_Chan#2_DIMM#0 - 12913 addresses fell in CPU_SrcID#0_Ha#0_Chan#3_DIMM#0 - 12674 addresses fell in CPU_SrcID#1_Ha#0_Chan#0_DIMM#0 - 12686 addresses fell in CPU_SrcID#1_Ha#0_Chan#1_DIMM#0 - 12882 addresses fell in CPU_SrcID#1_Ha#0_Chan#2_DIMM#0 - 12934 addresses fell in CPU_SrcID#1_Ha#0_Chan#3_DIMM#0 - 106400 addresses were injected totally. - -The test result shows that all the 4 channels belong to IMC0 per CPU, so -the server really only has one IMC per CPU. - -In the 1st page of chapter 2 in datasheet [2], it also says 'E5-2600 v3' -implements either one or two IMCs. For CPUs with one IMC, IMC1 is not -used and should be ignored. - -Thus, do not create a second memory controller if the key HA1 is absent. - -[1] http://ark.intel.com/products/83349/Intel-Xeon-Processor-E5-2603-v3-15M-Cache-1_60-GHz -[2] https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e5-v3-datasheet-vol-2.pdf - -Reported-and-tested-by: Yi Zhang -Signed-off-by: Qiuxu Zhuo -Cc: Tony Luck -Cc: linux-edac -Fixes: e2f747b1f42a ("EDAC, sb_edac: Assign EDAC memory controller per h/w controller") -Link: http://lkml.kernel.org/r/20170913104214.7325-1-qiuxu.zhuo@intel.com -[ Massage commit message. ] -Signed-off-by: Borislav Petkov -(cherry picked from commit 15cc3ae001873845b5d842e212478a6570c7d938) -Signed-off-by: Fabian Grünbichler ---- - drivers/edac/sb_edac.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c -index 80d860cb0746..7a3b201d51df 100644 ---- a/drivers/edac/sb_edac.c -+++ b/drivers/edac/sb_edac.c -@@ -455,6 +455,7 @@ static const struct pci_id_table pci_dev_descr_sbridge_table[] = { - static const struct pci_id_descr pci_dev_descr_ibridge[] = { - /* Processor Home Agent */ - { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0, IMC0) }, -+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1, IMC1) }, - - /* Memory controller */ - { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0, IMC0) }, -@@ -465,7 +466,6 @@ static const struct pci_id_descr pci_dev_descr_ibridge[] = { - { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0, IMC0) }, - - /* Optional, mode 2HA */ -- { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1, IMC1) }, - { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1, IMC1) }, - { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1, IMC1) }, - { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1, IMC1) }, -@@ -2260,6 +2260,13 @@ static int sbridge_get_onedevice(struct pci_dev **prev, - next_imc: - sbridge_dev = get_sbridge_dev(bus, dev_descr->dom, multi_bus, sbridge_dev); - if (!sbridge_dev) { -+ /* If the HA1 wasn't found, don't create EDAC second memory controller */ -+ if (dev_descr->dom == IMC1 && devno != 1) { -+ edac_dbg(0, "Skip IMC1: %04x:%04x (since HA1 was absent)\n", -+ PCI_VENDOR_ID_INTEL, dev_descr->dev_id); -+ pci_dev_put(pdev); -+ return 0; -+ } - - if (dev_descr->dom == SOCK) - goto out_imc; --- -2.14.2 - diff --git a/patches/kernel/0020-EDAC-sb_edac-Fix-missing-break-in-switch.patch b/patches/kernel/0020-EDAC-sb_edac-Fix-missing-break-in-switch.patch deleted file mode 100644 index b7ca81a..0000000 --- a/patches/kernel/0020-EDAC-sb_edac-Fix-missing-break-in-switch.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Gustavo A. R. Silva" -Date: Mon, 16 Oct 2017 12:40:29 -0500 -Subject: [PATCH] EDAC, sb_edac: Fix missing break in switch -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Add missing break statement in order to prevent the code from falling -through. - -Signed-off-by: Gustavo A. R. Silva -Cc: Qiuxu Zhuo -Cc: linux-edac -Link: http://lkml.kernel.org/r/20171016174029.GA19757@embeddedor.com -Signed-off-by: Borislav Petkov -(cherry picked from commit a8e9b186f153a44690ad0363a56716e7077ad28c) -Signed-off-by: Fabian Grünbichler ---- - drivers/edac/sb_edac.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c -index 7a3b201d51df..fb0264ef83a3 100644 ---- a/drivers/edac/sb_edac.c -+++ b/drivers/edac/sb_edac.c -@@ -2467,6 +2467,7 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci, - case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA: - case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA: - pvt->pci_ta = pdev; -+ break; - case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS: - case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS: - pvt->pci_ras = pdev; --- -2.14.2 - diff --git a/patches/kernel/0021-scsi-lpfc-Fix-loop-mode-target-discovery.patch b/patches/kernel/0021-scsi-lpfc-Fix-loop-mode-target-discovery.patch deleted file mode 100644 index 4f7bf71..0000000 --- a/patches/kernel/0021-scsi-lpfc-Fix-loop-mode-target-discovery.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Dick Kennedy -Date: Wed, 23 Aug 2017 16:55:31 -0700 -Subject: [PATCH] scsi: lpfc: Fix loop mode target discovery -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The driver does not discover targets when in loop mode. - -The NLP type is correctly getting set when a fabric connection is -detected but, not for loop. The unknown NLP type means that the driver -does not issue a PRLI when in loop topology. Thus target discovery -fails. - -Fix by checking the topology during discovery. If it is loop, set the -NLP FC4 type to FCP. - -Signed-off-by: Dick Kennedy -Signed-off-by: James Smart -Reviewed-by: Johannes Thumshirn -Signed-off-by: Martin K. Petersen -(cherry picked from commit 2877cbffb79ed121a6bcc5edbe629d3aba36cd29) -Signed-off-by: Fabian Grünbichler ---- - drivers/scsi/lpfc/lpfc_nportdisc.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c -index f74cb0142fd4..95b2b43ac37d 100644 ---- a/drivers/scsi/lpfc/lpfc_nportdisc.c -+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c -@@ -1724,6 +1724,9 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport, - lpfc_nvme_update_localport(vport); - } - -+ } else if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) { -+ ndlp->nlp_fc4_type |= NLP_FC4_FCP; -+ - } else if (ndlp->nlp_fc4_type == 0) { - rc = lpfc_ns_cmd(vport, SLI_CTNS_GFT_ID, - 0, ndlp->nlp_DID); --- -2.14.2 - diff --git a/patches/kernel/0022-sched-wait-Fix-add_wait_queue-behavioral-change.patch b/patches/kernel/0022-sched-wait-Fix-add_wait_queue-behavioral-change.patch deleted file mode 100644 index 852ce99..0000000 --- a/patches/kernel/0022-sched-wait-Fix-add_wait_queue-behavioral-change.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Omar Sandoval -Date: Tue, 5 Dec 2017 23:15:31 -0800 -Subject: [PATCH] sched/wait: Fix add_wait_queue() behavioral change -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The following cleanup commit: - - 50816c48997a ("sched/wait: Standardize internal naming of wait-queue entries") - -... unintentionally changed the behavior of add_wait_queue() from -inserting the wait entry at the head of the wait queue to the tail -of the wait queue. - -Beyond a negative performance impact this change in behavior -theoretically also breaks wait queues which mix exclusive and -non-exclusive waiters, as non-exclusive waiters will not be -woken up if they are queued behind enough exclusive waiters. - -Signed-off-by: Omar Sandoval -Reviewed-by: Jens Axboe -Acked-by: Peter Zijlstra -Cc: Linus Torvalds -Cc: Thomas Gleixner -Cc: kernel-team@fb.com -Fixes: ("sched/wait: Standardize internal naming of wait-queue entries") -Link: http://lkml.kernel.org/r/a16c8ccffd39bd08fdaa45a5192294c784b803a7.1512544324.git.osandov@fb.com -Signed-off-by: Ingo Molnar -(cherry picked from commit c6b9d9a33029014446bd9ed84c1688f6d3d4eab9) -Signed-off-by: Fabian Grünbichler ---- - kernel/sched/wait.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c -index d6afed6d0752..c09ebe92a40a 100644 ---- a/kernel/sched/wait.c -+++ b/kernel/sched/wait.c -@@ -27,7 +27,7 @@ void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq - - wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE; - spin_lock_irqsave(&wq_head->lock, flags); -- __add_wait_queue_entry_tail(wq_head, wq_entry); -+ __add_wait_queue(wq_head, wq_entry); - spin_unlock_irqrestore(&wq_head->lock, flags); - } - EXPORT_SYMBOL(add_wait_queue); --- -2.14.2 - diff --git a/patches/kernel/0023-module-retpoline-Warn-about-missing-retpoline-in-mod.patch b/patches/kernel/0023-module-retpoline-Warn-about-missing-retpoline-in-mod.patch deleted file mode 100644 index 27fffe4..0000000 --- a/patches/kernel/0023-module-retpoline-Warn-about-missing-retpoline-in-mod.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Andi Kleen -Date: Thu, 25 Jan 2018 15:50:28 -0800 -Subject: [PATCH] module/retpoline: Warn about missing retpoline in module -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -There's a risk that a kernel which has full retpoline mitigations becomes -vulnerable when a module gets loaded that hasn't been compiled with the -right compiler or the right option. - -To enable detection of that mismatch at module load time, add a module info -string "retpoline" at build time when the module was compiled with -retpoline support. This only covers compiled C source, but assembler source -or prebuilt object files are not checked. - -If a retpoline enabled kernel detects a non retpoline protected module at -load time, print a warning and report it in the sysfs vulnerability file. - -[ tglx: Massaged changelog ] - -Signed-off-by: Andi Kleen -Signed-off-by: Thomas Gleixner -Cc: David Woodhouse -Cc: gregkh@linuxfoundation.org -Cc: torvalds@linux-foundation.org -Cc: jeyu@kernel.org -Cc: arjan@linux.intel.com -Link: https://lkml.kernel.org/r/20180125235028.31211-1-andi@firstfloor.org -(backported from commit caf7501a1b4ec964190f31f9c3f163de252273b8) -Conflicts: - arch/x86/kernel/cpu/bugs.c -context changes -Signed-off-by: Fabian Grünbichler ---- - include/linux/module.h | 9 +++++++++ - arch/x86/kernel/cpu/bugs.c | 19 +++++++++++++++++-- - kernel/module.c | 11 +++++++++++ - scripts/mod/modpost.c | 9 +++++++++ - 4 files changed, 46 insertions(+), 2 deletions(-) - -diff --git a/include/linux/module.h b/include/linux/module.h -index e7bdd549e527..c4fdf7661f82 100644 ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -794,6 +794,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, - static inline void module_bug_cleanup(struct module *mod) {} - #endif /* CONFIG_GENERIC_BUG */ - -+#ifdef RETPOLINE -+extern bool retpoline_module_ok(bool has_retpoline); -+#else -+static inline bool retpoline_module_ok(bool has_retpoline) -+{ -+ return true; -+} -+#endif -+ - #ifdef CONFIG_MODULE_SIG - static inline bool module_sig_ok(struct module *module) - { -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index d5bafcdb4891..e623bd731a74 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -11,6 +11,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -93,6 +94,19 @@ static const char *spectre_v2_strings[] = { - #define pr_fmt(fmt) "Spectre V2 mitigation: " fmt - - static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; -+static bool spectre_v2_bad_module; -+ -+#ifdef RETPOLINE -+bool retpoline_module_ok(bool has_retpoline) -+{ -+ if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) -+ return true; -+ -+ pr_err("System may be vunerable to spectre v2\n"); -+ spectre_v2_bad_module = true; -+ return false; -+} -+#endif - - static void __init spec2_print_if_insecure(const char *reason) - { -@@ -299,7 +313,8 @@ ssize_t cpu_show_spectre_v2(struct device *dev, - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - return sprintf(buf, "Not affected\n"); - -- return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], -- ibpb_inuse ? ", IBPB (Intel v4)" : ""); -+ return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], -+ ibpb_inuse ? ", IBPB (Intel v4)" : "", -+ spectre_v2_bad_module ? " - vulnerable module loaded" : ""); - } - #endif -diff --git a/kernel/module.c b/kernel/module.c -index e5b878b26906..de7db074f793 100644 ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -2855,6 +2855,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) - } - #endif /* CONFIG_LIVEPATCH */ - -+static void check_modinfo_retpoline(struct module *mod, struct load_info *info) -+{ -+ if (retpoline_module_ok(get_modinfo(info, "retpoline"))) -+ return; -+ -+ pr_warn("%s: loading module not compiled with retpoline compiler.\n", -+ mod->name); -+} -+ - /* Sets info->hdr and info->len. */ - static int copy_module_from_user(const void __user *umod, unsigned long len, - struct load_info *info) -@@ -3021,6 +3030,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) - add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); - } - -+ check_modinfo_retpoline(mod, info); -+ - if (get_modinfo(info, "staging")) { - add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); - pr_warn("%s: module is from the staging directory, the quality " -diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c -index 48397feb08fb..cc91f81ac33e 100644 ---- a/scripts/mod/modpost.c -+++ b/scripts/mod/modpost.c -@@ -2147,6 +2147,14 @@ static void add_intree_flag(struct buffer *b, int is_intree) - buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); - } - -+/* Cannot check for assembler */ -+static void add_retpoline(struct buffer *b) -+{ -+ buf_printf(b, "\n#ifdef RETPOLINE\n"); -+ buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); -+ buf_printf(b, "#endif\n"); -+} -+ - static void add_staging_flag(struct buffer *b, const char *name) - { - static const char *staging_dir = "drivers/staging"; -@@ -2492,6 +2500,7 @@ int main(int argc, char **argv) - - add_header(&buf, mod); - add_intree_flag(&buf, !external_module); -+ add_retpoline(&buf); - add_staging_flag(&buf, mod->name); - err |= add_versions(&buf, mod); - add_depends(&buf, mod, modules); --- -2.14.2 - diff --git a/patches/kernel/0024-net-tcp-close-sock-if-net-namespace-is-exiting.patch b/patches/kernel/0024-net-tcp-close-sock-if-net-namespace-is-exiting.patch deleted file mode 100644 index d4eb5e6..0000000 --- a/patches/kernel/0024-net-tcp-close-sock-if-net-namespace-is-exiting.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Dan Streetman -Date: Thu, 18 Jan 2018 16:14:26 -0500 -Subject: [PATCH] net: tcp: close sock if net namespace is exiting -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When a tcp socket is closed, if it detects that its net namespace is -exiting, close immediately and do not wait for FIN sequence. - -For normal sockets, a reference is taken to their net namespace, so it will -never exit while the socket is open. However, kernel sockets do not take a -reference to their net namespace, so it may begin exiting while the kernel -socket is still open. In this case if the kernel socket is a tcp socket, -it will stay open trying to complete its close sequence. The sock's dst(s) -hold a reference to their interface, which are all transferred to the -namespace's loopback interface when the real interfaces are taken down. -When the namespace tries to take down its loopback interface, it hangs -waiting for all references to the loopback interface to release, which -results in messages like: - -unregister_netdevice: waiting for lo to become free. Usage count = 1 - -These messages continue until the socket finally times out and closes. -Since the net namespace cleanup holds the net_mutex while calling its -registered pernet callbacks, any new net namespace initialization is -blocked until the current net namespace finishes exiting. - -After this change, the tcp socket notices the exiting net namespace, and -closes immediately, releasing its dst(s) and their reference to the -loopback interface, which lets the net namespace continue exiting. - -Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1711407 -Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=97811 -Signed-off-by: Dan Streetman -Signed-off-by: David S. Miller -Signed-off-by: Fabian Grünbichler ---- - include/net/net_namespace.h | 10 ++++++++++ - net/ipv4/tcp.c | 3 +++ - net/ipv4/tcp_timer.c | 15 +++++++++++++++ - 3 files changed, 28 insertions(+) - -diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h -index 1c401bd4c2e0..a5d023fa78db 100644 ---- a/include/net/net_namespace.h -+++ b/include/net/net_namespace.h -@@ -221,6 +221,11 @@ int net_eq(const struct net *net1, const struct net *net2) - return net1 == net2; - } - -+static inline int check_net(const struct net *net) -+{ -+ return atomic_read(&net->count) != 0; -+} -+ - void net_drop_ns(void *); - - #else -@@ -245,6 +250,11 @@ int net_eq(const struct net *net1, const struct net *net2) - return 1; - } - -+static inline int check_net(const struct net *net) -+{ -+ return 1; -+} -+ - #define net_drop_ns NULL - #endif - -diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c -index a3e91b552edc..fd2a086da910 100644 ---- a/net/ipv4/tcp.c -+++ b/net/ipv4/tcp.c -@@ -2258,6 +2258,9 @@ void tcp_close(struct sock *sk, long timeout) - tcp_send_active_reset(sk, GFP_ATOMIC); - __NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPABORTONMEMORY); -+ } else if (!check_net(sock_net(sk))) { -+ /* Not possible to send reset; just close */ -+ tcp_set_state(sk, TCP_CLOSE); - } - } - -diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c -index e906014890b6..ec1e5de41653 100644 ---- a/net/ipv4/tcp_timer.c -+++ b/net/ipv4/tcp_timer.c -@@ -50,11 +50,19 @@ static void tcp_write_err(struct sock *sk) - * to prevent DoS attacks. It is called when a retransmission timeout - * or zero probe timeout occurs on orphaned socket. - * -+ * Also close if our net namespace is exiting; in that case there is no -+ * hope of ever communicating again since all netns interfaces are already -+ * down (or about to be down), and we need to release our dst references, -+ * which have been moved to the netns loopback interface, so the namespace -+ * can finish exiting. This condition is only possible if we are a kernel -+ * socket, as those do not hold references to the namespace. -+ * - * Criteria is still not confirmed experimentally and may change. - * We kill the socket, if: - * 1. If number of orphaned sockets exceeds an administratively configured - * limit. - * 2. If we have strong memory pressure. -+ * 3. If our net namespace is exiting. - */ - static int tcp_out_of_resources(struct sock *sk, bool do_reset) - { -@@ -83,6 +91,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); - return 1; - } -+ -+ if (!check_net(sock_net(sk))) { -+ /* Not possible to send reset; just close */ -+ tcp_done(sk); -+ return 1; -+ } -+ - return 0; - } - --- -2.14.2 - diff --git a/patches/kernel/0027-lockd-lost-rollback-of-set_grace_period-in-lockd_dow.patch b/patches/kernel/0027-lockd-lost-rollback-of-set_grace_period-in-lockd_dow.patch deleted file mode 100644 index d7ba32d..0000000 --- a/patches/kernel/0027-lockd-lost-rollback-of-set_grace_period-in-lockd_dow.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Vasily Averin -Date: Thu, 2 Nov 2017 13:03:42 +0300 -Subject: [PATCH] lockd: lost rollback of set_grace_period() in - lockd_down_net() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Commit efda760fe95ea ("lockd: fix lockd shutdown race") is incorrect, -it removes lockd_manager and disarm grace_period_end for init_net only. - -If nfsd was started from another net namespace lockd_up_net() calls -set_grace_period() that adds lockd_manager into per-netns list -and queues grace_period_end delayed work. - -These action should be reverted in lockd_down_net(). -Otherwise it can lead to double list_add on after restart nfsd in netns, -and to use-after-free if non-disarmed delayed work will be executed after netns destroy. - -Fixes: efda760fe95e ("lockd: fix lockd shutdown race") -Cc: stable@vger.kernel.org -Signed-off-by: Vasily Averin -Signed-off-by: J. Bruce Fields -(cherry picked from commit 3a2b19d1ee5633f76ae8a88da7bc039a5d1732aa) -Signed-off-by: Fabian Grünbichler ---- - fs/lockd/svc.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c -index 726b6cecf430..fa8f6effcf00 100644 ---- a/fs/lockd/svc.c -+++ b/fs/lockd/svc.c -@@ -274,6 +274,8 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) - if (ln->nlmsvc_users) { - if (--ln->nlmsvc_users == 0) { - nlm_shutdown_hosts_net(net); -+ cancel_delayed_work_sync(&ln->grace_period_end); -+ locks_end_grace(&ln->lockd_manager); - svc_shutdown_net(serv, net); - dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net); - } --- -2.14.2 - -- 2.39.2